Update app.py
Browse filesUse hf_hub_download to download the attachments from Github repo of GAIA.
app.py
CHANGED
@@ -19,6 +19,7 @@ from custom_tools import (
|
|
19 |
WebpageStructureAnalyzerTool, SummarizeWebpageContentTool, ExtractTableFromWebpageTool, GetWikipediaSectionTool,
|
20 |
ImageContentDescriberTool, TranscribeAudioTool, CachedWebSearchTool, CachedWikiTool, PreloadedPythonTool
|
21 |
)
|
|
|
22 |
|
23 |
subprocess.run(["playwright", "install"], check=True)
|
24 |
|
@@ -329,45 +330,52 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
329 |
|
330 |
# Check for an associated filename and enhance the prompt
|
331 |
file_name = item.get("file_name")
|
|
|
332 |
if file_name:
|
333 |
-
print(f"Task {task_id} requires file: '{file_name}'.
|
334 |
-
|
335 |
-
headers = {"Authorization": f"Bearer {hf_token}"}
|
336 |
file_downloaded = False
|
337 |
-
#
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
|
|
342 |
|
343 |
-
for
|
344 |
try:
|
345 |
-
print(f"Attempting download from
|
346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
print(f"File not found at
|
358 |
-
continue
|
359 |
else:
|
360 |
-
# For other errors (like
|
361 |
-
print(f"
|
362 |
break
|
363 |
-
|
364 |
-
|
365 |
-
print(f"
|
366 |
-
|
367 |
|
368 |
if not file_downloaded:
|
369 |
print(f"Failed to download '{file_name}' from all provided sources.")
|
370 |
-
question_text += f"\n\n[System Note: A file named '{file_name}' was required for this task, but it could not be downloaded
|
371 |
|
372 |
if not task_id or question_text is None:
|
373 |
print(f"Skipping item with missing task_id or question: {item}")
|
|
|
19 |
WebpageStructureAnalyzerTool, SummarizeWebpageContentTool, ExtractTableFromWebpageTool, GetWikipediaSectionTool,
|
20 |
ImageContentDescriberTool, TranscribeAudioTool, CachedWebSearchTool, CachedWikiTool, PreloadedPythonTool
|
21 |
)
|
22 |
+
from huggingface_hub import hf_hub_download
|
23 |
|
24 |
subprocess.run(["playwright", "install"], check=True)
|
25 |
|
|
|
330 |
|
331 |
# Check for an associated filename and enhance the prompt
|
332 |
file_name = item.get("file_name")
|
333 |
+
|
334 |
if file_name:
|
335 |
+
print(f"Task {task_id} requires file: '{file_name}'. Downloading via hf_hub_download...")
|
336 |
+
|
|
|
337 |
file_downloaded = False
|
338 |
+
local_file_path = None # Will be updated if download is successful
|
339 |
+
repo_id = "gaia-benchmark/GAIA"
|
340 |
+
potential_paths = [
|
341 |
+
f"2023/validation/{file_name}",
|
342 |
+
f"2023/test/{file_name}"
|
343 |
+
]
|
344 |
|
345 |
+
for path_in_repo in potential_paths:
|
346 |
try:
|
347 |
+
print(f"Attempting to download from repo path: '{path_in_repo}'")
|
348 |
+
# Use the official library to download the file
|
349 |
+
local_file_path = hf_hub_download(
|
350 |
+
repo_id=repo_id,
|
351 |
+
filename=path_in_repo,
|
352 |
+
repo_type="dataset",
|
353 |
+
token=hf_token
|
354 |
+
)
|
355 |
|
356 |
+
print(f"Successfully downloaded '{file_name}' to cache path: {local_file_path}")
|
357 |
+
# Inform the agent about the successful download and the exact path
|
358 |
+
question_text += f"\n\n[System Note: The required file named '{file_name}' has been successfully downloaded and is available for analysis at the path '{local_file_path}'.]"
|
359 |
+
file_downloaded = True
|
360 |
+
break # Exit the loop on success
|
361 |
+
|
362 |
+
except HfHubHTTPError as e:
|
363 |
+
# Specifically catch 404 Not Found errors and try the next path
|
364 |
+
if e.response.status_code == 404:
|
365 |
+
print(f"File not found at '{path_in_repo}'. Trying next location.")
|
366 |
+
continue
|
367 |
else:
|
368 |
+
# For other HTTP errors (like 401), stop trying
|
369 |
+
print(f"HTTP Error {e.response.status_code} downloading '{path_in_repo}'. Aborting download for this file. Error: {e}")
|
370 |
break
|
371 |
+
except Exception as e:
|
372 |
+
# For other exceptions (like network issues), stop trying
|
373 |
+
print(f"An unexpected error occurred downloading '{path_in_repo}': {e}")
|
374 |
+
break
|
375 |
|
376 |
if not file_downloaded:
|
377 |
print(f"Failed to download '{file_name}' from all provided sources.")
|
378 |
+
question_text += f"\n\n[System Note: A file named '{file_name}' was required for this task, but it could not be downloaded. Please report that the file is inaccessible.]"
|
379 |
|
380 |
if not task_id or question_text is None:
|
381 |
print(f"Skipping item with missing task_id or question: {item}")
|