ai_agent_course_final_project

Running

App Files Files Community

keynes42 commited on 13 days ago

Commit

e924cff

verified ·

1 Parent(s): aa427a5

Update app.py

Browse files

Use hf_hub_download to download the attachments from Github repo of GAIA.

Files changed (1) hide show

app.py +37 -29

app.py CHANGED Viewed

@@ -19,6 +19,7 @@ from custom_tools import (
     WebpageStructureAnalyzerTool, SummarizeWebpageContentTool, ExtractTableFromWebpageTool, GetWikipediaSectionTool,
     ImageContentDescriberTool, TranscribeAudioTool, CachedWebSearchTool, CachedWikiTool, PreloadedPythonTool
 )
 subprocess.run(["playwright", "install"], check=True)
@@ -329,45 +330,52 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         # Check for an associated filename and enhance the prompt
         file_name = item.get("file_name")
         if file_name:
-            print(f"Task {task_id} requires file: '{file_name}'. Attempting to download...")
-            headers = {"Authorization": f"Bearer {hf_token}"}
             file_downloaded = False
-            # List of URLs to try in order
-            urls_to_try = [
-                (f"{GAIA_DATASET_URL_1}/{file_name}", headers),
-                (f"{GAIA_DATASET_URL_2}/{file_name}", headers)
-            ]
-            for i, (url, request_headers) in enumerate(urls_to_try):
                 try:
-                    print(f"Attempting download from URL #{i+1}: {url}")
-                    file_response = requests.get(url, headers=request_headers, timeout=30)
-                    if file_response.status_code == 200:
-                        # Success! Save the file locally.
-                        with open(file_name, "wb") as f:
-                            f.write(file_response.content)
-                        print(f"Successfully downloaded and saved '{file_name}' locally.")
-                        question_text += f"\n\n[System Note: The required file named '{file_name}' has been successfully downloaded and is available for analysis.]"
-                        file_downloaded = True
-                        break # Exit the loop since we found the file
-                    elif file_response.status_code == 404:
-                        print(f"File not found at URL #{i+1} (404 Error). Trying next URL if available.")
-                        continue # Go to the next URL
                     else:
-                        # For other errors (like 500), stop trying for this file
-                        print(f"Received unexpected status code {file_response.status_code} from {url}. Aborting download for this file.")
                         break
-                except requests.exceptions.RequestException as e:
-                    print(f"Error downloading from {url}: {e}. Trying next URL if available.")
-                    continue
             if not file_downloaded:
                 print(f"Failed to download '{file_name}' from all provided sources.")
-                question_text += f"\n\n[System Note: A file named '{file_name}' was required for this task, but it could not be downloaded from any known source. Please report that the file is inaccessible.]"
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")

     WebpageStructureAnalyzerTool, SummarizeWebpageContentTool, ExtractTableFromWebpageTool, GetWikipediaSectionTool,
     ImageContentDescriberTool, TranscribeAudioTool, CachedWebSearchTool, CachedWikiTool, PreloadedPythonTool
 )
+from huggingface_hub import hf_hub_download
 subprocess.run(["playwright", "install"], check=True)
         # Check for an associated filename and enhance the prompt
         file_name = item.get("file_name")
         if file_name:
+            print(f"Task {task_id} requires file: '{file_name}'. Downloading via hf_hub_download...")
             file_downloaded = False
+            local_file_path = None # Will be updated if download is successful
+            repo_id = "gaia-benchmark/GAIA"
+            potential_paths = [
+                    f"2023/validation/{file_name}",
+                    f"2023/test/{file_name}"
+                ]
+            for path_in_repo in potential_paths:
                 try:
+                    print(f"Attempting to download from repo path: '{path_in_repo}'")
+                    # Use the official library to download the file
+                    local_file_path = hf_hub_download(
+                        repo_id=repo_id,
+                        filename=path_in_repo,
+                        repo_type="dataset",
+                        token=hf_token
+                    )
+                    print(f"Successfully downloaded '{file_name}' to cache path: {local_file_path}")
+                    # Inform the agent about the successful download and the exact path
+                    question_text += f"\n\n[System Note: The required file named '{file_name}' has been successfully downloaded and is available for analysis at the path '{local_file_path}'.]"
+                    file_downloaded = True
+                    break # Exit the loop on success
+                except HfHubHTTPError as e:
+                    # Specifically catch 404 Not Found errors and try the next path
+                    if e.response.status_code == 404:
+                        print(f"File not found at '{path_in_repo}'. Trying next location.")
+                        continue
                     else:
+                        # For other HTTP errors (like 401), stop trying
+                        print(f"HTTP Error {e.response.status_code} downloading '{path_in_repo}'. Aborting download for this file. Error: {e}")
                         break
+                except Exception as e:
+                    # For other exceptions (like network issues), stop trying
+                    print(f"An unexpected error occurred downloading '{path_in_repo}': {e}")
+                    break
             if not file_downloaded:
                 print(f"Failed to download '{file_name}' from all provided sources.")
+                question_text += f"\n\n[System Note: A file named '{file_name}' was required for this task, but it could not be downloaded. Please report that the file is inaccessible.]"
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")