import pandas as pd import os import json from typing import List, Dict, Any, Optional def load_data(csv_file_path: str) -> List[Dict[str, Any]]: """ Load data from a CSV file and convert to a list of dictionaries Args: csv_file_path: Path to the CSV file Returns: List of dictionaries representing the data """ # Check if file exists if not os.path.exists(csv_file_path): raise FileNotFoundError(f"File not found: {csv_file_path}") # Read the CSV file df = pd.read_csv(csv_file_path) # Convert to list of dictionaries for JSON serialization return df.to_dict(orient="records") def get_columns(csv_file_path: str) -> List[str]: """ Get all column names from the CSV file Args: csv_file_path: Path to the CSV file Returns: List of column names """ # Check if file exists if not os.path.exists(csv_file_path): raise FileNotFoundError(f"File not found: {csv_file_path}") # Read the CSV file df = pd.read_csv(csv_file_path) # Return column names return df.columns.tolist() def filter_data( data: List[Dict[str, Any]], filter_columns: List[str] ) -> List[Dict[str, Any]]: """ Filter the data to include only specific columns Args: data: List of dictionaries representing the data filter_columns: List of column names to include Returns: Filtered data """ if not filter_columns: return data filtered_data = [] for item in data: filtered_item = {k: v for k, v in item.items() if k in filter_columns} filtered_data.append(filtered_item) return filtered_data def search_data(data: List[Dict[str, Any]], search_query: str) -> List[Dict[str, Any]]: """ Search the data for items matching the search query Args: data: List of dictionaries representing the data search_query: Search query string Returns: Filtered data containing only matching items """ if not search_query: return data # Convert to lowercase for case-insensitive search search_query = search_query.lower() filtered_data = [] for item in data: # Check if any value contains the search query for value in item.values(): if ( isinstance(value, (str, int, float)) and str(value).lower().find(search_query) != -1 ): filtered_data.append(item) break return filtered_data def load_json_data(json_file_path: str) -> Dict[str, Any]: """ Load data from a JSON file Args: json_file_path: Path to the JSON file Returns: Dictionary representing the JSON data """ # Check if file exists if not os.path.exists(json_file_path): raise FileNotFoundError(f"File not found: {json_file_path}") # Read the JSON file with open(json_file_path, "r") as f: data = json.load(f) return data def get_available_benchmarks() -> List[Dict[str, str]]: """ Get a list of available benchmarks in the data folder Returns: List of dictionaries with benchmark information """ benchmarks = [] # Look for image benchmarks if os.path.exists("./data/image_benchmark.csv"): benchmarks.append( { "id": "image", "name": "Image Watermarks", "description": "Evaluation of image watermarking techniques", "file": "image_benchmark.csv", } ) # Look for audio benchmarks if os.path.exists("./data/audio_benchmark.csv"): benchmarks.append( { "id": "audio", "name": "Audio Watermarks", "description": "Evaluation of audio watermarking techniques", "file": "audio_benchmark.csv", } ) return benchmarks