rodrigomasini commited on
Commit
e70011d
·
verified ·
1 Parent(s): ad2e70e

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +224 -0
main.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from nicegui import ui, run
3
+ import requests
4
+ import zipfile
5
+ import io
6
+ import ast
7
+ import json
8
+ import tiktoken
9
+
10
+
11
+ # --- Core Helper Functions (Preserved from original script) ---
12
+
13
+ def is_file_type(file_path: str, file_extension: str) -> bool:
14
+ """Check if the file has the specified file extension."""
15
+ return file_path.endswith(file_extension)
16
+
17
+
18
+ def is_likely_useful_file(file_path: str, lang: str = "python") -> bool:
19
+ """Determine if the file is likely to be useful by excluding common non-source directories and config files."""
20
+ excluded_dirs = ["docs", "examples", "tests", "test", "scripts", "utils", "benchmarks"]
21
+ utility_or_config_files = []
22
+ github_workflow_or_docs = [".github", ".gitignore", "LICENSE"]
23
+
24
+ if lang == "python":
25
+ excluded_dirs.append("__pycache__")
26
+ utility_or_config_files.extend(["hubconf.py", "setup.py"])
27
+ github_workflow_or_docs.extend(["stale.py", "gen-card-", "write_model_card"])
28
+ elif lang == "go":
29
+ excluded_dirs.append("vendor")
30
+ utility_or_config_files.extend(["go.mod", "go.sum", "Makefile"])
31
+
32
+ if any(part.startswith(".") for part in file_path.split("/")):
33
+ return False
34
+ if "test" in file_path.lower():
35
+ return False
36
+ for excluded_dir in excluded_dirs:
37
+ if f"/{excluded_dir}/" in file_path or file_path.startswith(excluded_dir + "/"):
38
+ return False
39
+ for file_name in utility_or_config_files:
40
+ if file_name in file_path:
41
+ return False
42
+ for doc_file in github_workflow_or_docs:
43
+ if doc_file in file_path:
44
+ return False
45
+ return True
46
+
47
+
48
+ def is_test_file(file_content: str, lang: str) -> bool:
49
+ """Determine if the file content suggests it is a test file by checking for testing library imports."""
50
+ test_indicators = {"python": ["unittest", "pytest"], "go": ["testing"]}.get(lang, [])
51
+
52
+ if lang == "python":
53
+ try:
54
+ module = ast.parse(file_content)
55
+ for node in ast.walk(module):
56
+ if isinstance(node, ast.Import):
57
+ for alias in node.names:
58
+ if alias.name in test_indicators:
59
+ return True
60
+ elif isinstance(node, ast.ImportFrom):
61
+ if node.module in test_indicators:
62
+ return True
63
+ except SyntaxError:
64
+ pass
65
+ return False
66
+
67
+
68
+ # --- New Token Calculation Function ---
69
+ def get_token_count(text: str) -> int:
70
+ """Calculates the number of tokens in a string using the cl100k_base encoding."""
71
+ try:
72
+ encoding = tiktoken.get_encoding("cl100k_base")
73
+ tokens = encoding.encode(text)
74
+ return len(tokens)
75
+ except Exception as e:
76
+ print(f"Could not calculate tokens: {e}")
77
+ return 0
78
+
79
+
80
+ # --- Core Processing Logic (Adapted for UI Integration) ---
81
+
82
+ def download_and_process_repo(repo_url: str, branch_or_tag: str, log: ui.log) -> str | None:
83
+ """
84
+ Downloads and processes files from a GitHub repository, logging progress to the UI.
85
+ Returns the concatenated content as a string, or None on failure.
86
+ """
87
+ download_url = f"{repo_url}/archive/refs/heads/{branch_or_tag}.zip"
88
+ lang = "python"
89
+
90
+ log.push(f"Attempting to download from: {download_url}")
91
+ try:
92
+ response = requests.get(download_url, timeout=30)
93
+ response.raise_for_status()
94
+ except requests.exceptions.RequestException as e:
95
+ log.push(f"❌ Error: Failed to download the repository. {e}")
96
+ return None
97
+
98
+ log.push("✅ Download successful. Processing files...")
99
+ zip_file = zipfile.ZipFile(io.BytesIO(response.content))
100
+ file_contents = ""
101
+
102
+ all_files = zip_file.namelist()
103
+ log.push(f"Found {len(all_files)} total files in the archive.")
104
+
105
+ for file_path in all_files:
106
+ cleaned_path = "/".join(file_path.split('/')[1:])
107
+ if not cleaned_path or file_path.endswith("/"):
108
+ continue
109
+
110
+ if not is_file_type(cleaned_path, ".py") or not is_likely_useful_file(cleaned_path, lang):
111
+ continue
112
+
113
+ try:
114
+ file_content = zip_file.read(file_path).decode("utf-8")
115
+ except (UnicodeDecodeError, Exception) as e:
116
+ log.push(f"⚠️ Skipping (read/decode error): {cleaned_path} - {e}")
117
+ continue
118
+
119
+ if is_test_file(file_content, lang):
120
+ log.push(f"Skipping (test file): {cleaned_path}")
121
+ continue
122
+
123
+ log.push(f"Appending: {cleaned_path}")
124
+ file_contents += f"# File: {cleaned_path}\n"
125
+ file_contents += file_content
126
+ file_contents += "\n\n"
127
+
128
+ log.push("✅ Processing complete.")
129
+ return file_contents
130
+
131
+
132
+ # --- NiceGUI User Interface Definition ---
133
+
134
+ @ui.page('/')
135
+ def main_page():
136
+ """Defines the layout and functionality of the web interface."""
137
+
138
+ async def process_repository():
139
+ """Handles the button click event to start processing the repository."""
140
+ # 1. Clear previous results and set loading state
141
+ log.clear()
142
+ output_area.set_value('')
143
+ token_count_label.set_text('0')
144
+ process_button.set_visibility(False)
145
+ spinner.set_visibility(True)
146
+
147
+ repo_url = repo_input.value
148
+ branch = branch_input.value
149
+
150
+ if not repo_url:
151
+ ui.notify('Repository URL cannot be empty.', type='negative')
152
+ process_button.set_visibility(True)
153
+ spinner.set_visibility(False)
154
+ return
155
+
156
+ # 2. Run the blocking I/O function in a separate thread
157
+ content = await run.io_bound(download_and_process_repo, repo_url, branch, log)
158
+
159
+ # 3. Restore UI and display results
160
+ process_button.set_visibility(True)
161
+ spinner.set_visibility(False)
162
+
163
+ if content is not None:
164
+ output_area.set_value(content)
165
+ # Calculate and display token count
166
+ num_tokens = get_token_count(content)
167
+ token_count_label.set_text(f'{num_tokens:,}')
168
+ ui.notify('Repository processed successfully!', type='positive')
169
+ else:
170
+ ui.notify('Failed to process repository. Check log for details.', type='negative')
171
+
172
+ async def copy_to_clipboard(text: str):
173
+ """Copies the provided text to the user's clipboard."""
174
+ if not text:
175
+ ui.notify('There is no content to copy.', type='warning')
176
+ return
177
+ await ui.run_javascript(f'navigator.clipboard.writeText({json.dumps(text)})', respond=False)
178
+ ui.notify('Output copied to clipboard!', type='positive')
179
+
180
+ # --- UI Layout ---
181
+ ui.query('body').style('background-color: #f4f4f5;')
182
+
183
+ with ui.column().classes('w-full items-center gap-4 mx-auto p-4'):
184
+ ui.label('Research MAGIC GitHub Repo to Single File').classes('text-3xl font-bold mt-4')
185
+ ui.label('Concatenate repository Python and Go source files into a single file for Agentic Analysis.').classes(
186
+ 'text-lg text-gray-600')
187
+
188
+ with ui.card().classes('w-full max-w-4xl shadow-lg'):
189
+ with ui.row().classes('w-full items-center gap-4'):
190
+ repo_input = ui.input(
191
+ label="GitHub Repository URL",
192
+ value="https://github.com/rodrigo-masini/github2file"
193
+ ).props('outlined dense').classes('flex-grow')
194
+
195
+ branch_input = ui.input(label="Branch or Tag", value="master").props('outlined dense').style(
196
+ 'width: 150px;')
197
+
198
+ process_button = ui.button('Process Repository', on_click=process_repository).props('icon=hub')
199
+ spinner = ui.spinner(size='lg').classes('absolute-center')
200
+ spinner.set_visibility(False)
201
+
202
+ with ui.card().classes('w-full max-w-4xl shadow-lg'):
203
+ ui.label('Processing Log').classes('text-xl font-semibold')
204
+ log = ui.log().classes('w-full h-48').props('bordered')
205
+
206
+ # --- New Dynamic Output Information Card ---
207
+ with ui.card().classes('w-full max-w-4xl shadow-lg'):
208
+ ui.label('Output Information').classes('text-xl font-semibold')
209
+ with ui.row().classes('w-full items-center'):
210
+ ui.label('Total Tokens (cl100k_base):').classes('text-lg')
211
+ token_count_label = ui.label('0').classes('text-lg font-mono font-bold ml-2')
212
+
213
+ with ui.card().classes('w-full max-w-4xl shadow-lg'):
214
+ with ui.row().classes('w-full justify-between items-center'):
215
+ ui.label('Concatenated Repo Data').classes('text-xl font-semibold')
216
+ ui.button(icon='content_copy', on_click=lambda: copy_to_clipboard(output_area.value)).props(
217
+ 'flat dense round')
218
+
219
+ output_area = ui.textarea().classes('w-full h-96 font-mono').props(
220
+ 'outlined readonly placeholder="Output will appear here..."')
221
+
222
+
223
+ # Run the NiceGUI application
224
+ ui.run()