Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| from typing import Optional | |
| def find_file_by_task_id(task_id: str, metadata_path: str = "validation/metadata.jsonl") -> Optional[str]: | |
| """ | |
| Search for a filename that matches a given task_id in the metadata.jsonl file. | |
| Args: | |
| task_id (str): The task_id to search for | |
| metadata_path (str): Path to the metadata.jsonl file. Defaults to the validation directory path. | |
| Returns: | |
| Optional[str]: The filename if found, None if not found or if task_id has no associated file | |
| Example: | |
| >>> find_file_by_task_id("32102e3e-d12a-4209-9163-7b3a104efe5d") | |
| "32102e3e-d12a-4209-9163-7b3a104efe5d.xlsx" | |
| """ | |
| if not os.path.exists(metadata_path): | |
| try: | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| metadata_path = os.path.join(current_dir, "validation", "metadata.jsonl") | |
| except Exception as e: | |
| raise FileNotFoundError(f"Metadata file not found at {metadata_path}") | |
| with open(metadata_path, 'r', encoding='utf-8') as f: | |
| for line in f: | |
| try: | |
| data = json.loads(line.strip()) | |
| #print("DATA: ", data) | |
| #print("TASK ID: ", task_id) | |
| # print("DATA GET TASK ID: ", data.get('task_id')) | |
| # print("DATA GET FILE NAME: ", data.get('file_name')) | |
| if data.get('task_id') == task_id: | |
| filename = data.get('file_name', '') | |
| return filename if filename else None | |
| except json.JSONDecodeError: | |
| continue | |
| return None | |
| def get_full_file_path(task_id: str, base_dir: str = "validation") -> Optional[str]: | |
| """ | |
| Get the full file path for a given task_id if it exists. | |
| Args: | |
| task_id (str): The task_id to search for | |
| base_dir (str): Base directory where files are stored. Defaults to validation directory. | |
| Returns: | |
| Optional[str]: Full path to the file if found, None if not found | |
| Example: | |
| >>> get_full_file_path("32102e3e-d12a-4209-9163-7b3a104efe5d") | |
| "validation/32102e3e-d12a-4209-9163-7b3a104efe5d.xlsx" | |
| """ | |
| filename = find_file_by_task_id(task_id) | |
| if not filename: | |
| print("FILE NOT FOUND FOR TASK ID: ", task_id) | |
| return None | |
| full_path = os.path.join(base_dir, filename) | |
| return full_path if os.path.exists(full_path) else None | |