keynes42 commited on
Commit
e924cff
·
verified ·
1 Parent(s): aa427a5

Update app.py

Browse files

Use hf_hub_download to download the attachments from Github repo of GAIA.

Files changed (1) hide show
  1. app.py +37 -29
app.py CHANGED
@@ -19,6 +19,7 @@ from custom_tools import (
19
  WebpageStructureAnalyzerTool, SummarizeWebpageContentTool, ExtractTableFromWebpageTool, GetWikipediaSectionTool,
20
  ImageContentDescriberTool, TranscribeAudioTool, CachedWebSearchTool, CachedWikiTool, PreloadedPythonTool
21
  )
 
22
 
23
  subprocess.run(["playwright", "install"], check=True)
24
 
@@ -329,45 +330,52 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
329
 
330
  # Check for an associated filename and enhance the prompt
331
  file_name = item.get("file_name")
 
332
  if file_name:
333
- print(f"Task {task_id} requires file: '{file_name}'. Attempting to download...")
334
-
335
- headers = {"Authorization": f"Bearer {hf_token}"}
336
  file_downloaded = False
337
- # List of URLs to try in order
338
- urls_to_try = [
339
- (f"{GAIA_DATASET_URL_1}/{file_name}", headers),
340
- (f"{GAIA_DATASET_URL_2}/{file_name}", headers)
341
- ]
 
342
 
343
- for i, (url, request_headers) in enumerate(urls_to_try):
344
  try:
345
- print(f"Attempting download from URL #{i+1}: {url}")
346
- file_response = requests.get(url, headers=request_headers, timeout=30)
 
 
 
 
 
 
347
 
348
- if file_response.status_code == 200:
349
- # Success! Save the file locally.
350
- with open(file_name, "wb") as f:
351
- f.write(file_response.content)
352
- print(f"Successfully downloaded and saved '{file_name}' locally.")
353
- question_text += f"\n\n[System Note: The required file named '{file_name}' has been successfully downloaded and is available for analysis.]"
354
- file_downloaded = True
355
- break # Exit the loop since we found the file
356
- elif file_response.status_code == 404:
357
- print(f"File not found at URL #{i+1} (404 Error). Trying next URL if available.")
358
- continue # Go to the next URL
359
  else:
360
- # For other errors (like 500), stop trying for this file
361
- print(f"Received unexpected status code {file_response.status_code} from {url}. Aborting download for this file.")
362
  break
363
-
364
- except requests.exceptions.RequestException as e:
365
- print(f"Error downloading from {url}: {e}. Trying next URL if available.")
366
- continue
367
 
368
  if not file_downloaded:
369
  print(f"Failed to download '{file_name}' from all provided sources.")
370
- question_text += f"\n\n[System Note: A file named '{file_name}' was required for this task, but it could not be downloaded from any known source. Please report that the file is inaccessible.]"
371
 
372
  if not task_id or question_text is None:
373
  print(f"Skipping item with missing task_id or question: {item}")
 
19
  WebpageStructureAnalyzerTool, SummarizeWebpageContentTool, ExtractTableFromWebpageTool, GetWikipediaSectionTool,
20
  ImageContentDescriberTool, TranscribeAudioTool, CachedWebSearchTool, CachedWikiTool, PreloadedPythonTool
21
  )
22
+ from huggingface_hub import hf_hub_download
23
 
24
  subprocess.run(["playwright", "install"], check=True)
25
 
 
330
 
331
  # Check for an associated filename and enhance the prompt
332
  file_name = item.get("file_name")
333
+
334
  if file_name:
335
+ print(f"Task {task_id} requires file: '{file_name}'. Downloading via hf_hub_download...")
336
+
 
337
  file_downloaded = False
338
+ local_file_path = None # Will be updated if download is successful
339
+ repo_id = "gaia-benchmark/GAIA"
340
+ potential_paths = [
341
+ f"2023/validation/{file_name}",
342
+ f"2023/test/{file_name}"
343
+ ]
344
 
345
+ for path_in_repo in potential_paths:
346
  try:
347
+ print(f"Attempting to download from repo path: '{path_in_repo}'")
348
+ # Use the official library to download the file
349
+ local_file_path = hf_hub_download(
350
+ repo_id=repo_id,
351
+ filename=path_in_repo,
352
+ repo_type="dataset",
353
+ token=hf_token
354
+ )
355
 
356
+ print(f"Successfully downloaded '{file_name}' to cache path: {local_file_path}")
357
+ # Inform the agent about the successful download and the exact path
358
+ question_text += f"\n\n[System Note: The required file named '{file_name}' has been successfully downloaded and is available for analysis at the path '{local_file_path}'.]"
359
+ file_downloaded = True
360
+ break # Exit the loop on success
361
+
362
+ except HfHubHTTPError as e:
363
+ # Specifically catch 404 Not Found errors and try the next path
364
+ if e.response.status_code == 404:
365
+ print(f"File not found at '{path_in_repo}'. Trying next location.")
366
+ continue
367
  else:
368
+ # For other HTTP errors (like 401), stop trying
369
+ print(f"HTTP Error {e.response.status_code} downloading '{path_in_repo}'. Aborting download for this file. Error: {e}")
370
  break
371
+ except Exception as e:
372
+ # For other exceptions (like network issues), stop trying
373
+ print(f"An unexpected error occurred downloading '{path_in_repo}': {e}")
374
+ break
375
 
376
  if not file_downloaded:
377
  print(f"Failed to download '{file_name}' from all provided sources.")
378
+ question_text += f"\n\n[System Note: A file named '{file_name}' was required for this task, but it could not be downloaded. Please report that the file is inaccessible.]"
379
 
380
  if not task_id or question_text is None:
381
  print(f"Skipping item with missing task_id or question: {item}")