Spaces:
Sleeping
Sleeping
File size: 3,972 Bytes
9a03fcf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import pandas as pd
import os
import json
from typing import List, Dict, Any, Optional
def load_data(csv_file_path: str) -> List[Dict[str, Any]]:
"""
Load data from a CSV file and convert to a list of dictionaries
Args:
csv_file_path: Path to the CSV file
Returns:
List of dictionaries representing the data
"""
# Check if file exists
if not os.path.exists(csv_file_path):
raise FileNotFoundError(f"File not found: {csv_file_path}")
# Read the CSV file
df = pd.read_csv(csv_file_path)
# Convert to list of dictionaries for JSON serialization
return df.to_dict(orient="records")
def get_columns(csv_file_path: str) -> List[str]:
"""
Get all column names from the CSV file
Args:
csv_file_path: Path to the CSV file
Returns:
List of column names
"""
# Check if file exists
if not os.path.exists(csv_file_path):
raise FileNotFoundError(f"File not found: {csv_file_path}")
# Read the CSV file
df = pd.read_csv(csv_file_path)
# Return column names
return df.columns.tolist()
def filter_data(
data: List[Dict[str, Any]], filter_columns: List[str]
) -> List[Dict[str, Any]]:
"""
Filter the data to include only specific columns
Args:
data: List of dictionaries representing the data
filter_columns: List of column names to include
Returns:
Filtered data
"""
if not filter_columns:
return data
filtered_data = []
for item in data:
filtered_item = {k: v for k, v in item.items() if k in filter_columns}
filtered_data.append(filtered_item)
return filtered_data
def search_data(data: List[Dict[str, Any]], search_query: str) -> List[Dict[str, Any]]:
"""
Search the data for items matching the search query
Args:
data: List of dictionaries representing the data
search_query: Search query string
Returns:
Filtered data containing only matching items
"""
if not search_query:
return data
# Convert to lowercase for case-insensitive search
search_query = search_query.lower()
filtered_data = []
for item in data:
# Check if any value contains the search query
for value in item.values():
if (
isinstance(value, (str, int, float))
and str(value).lower().find(search_query) != -1
):
filtered_data.append(item)
break
return filtered_data
def load_json_data(json_file_path: str) -> Dict[str, Any]:
"""
Load data from a JSON file
Args:
json_file_path: Path to the JSON file
Returns:
Dictionary representing the JSON data
"""
# Check if file exists
if not os.path.exists(json_file_path):
raise FileNotFoundError(f"File not found: {json_file_path}")
# Read the JSON file
with open(json_file_path, "r") as f:
data = json.load(f)
return data
def get_available_benchmarks() -> List[Dict[str, str]]:
"""
Get a list of available benchmarks in the data folder
Returns:
List of dictionaries with benchmark information
"""
benchmarks = []
# Look for image benchmarks
if os.path.exists("./data/image_benchmark.csv"):
benchmarks.append(
{
"id": "image",
"name": "Image Watermarks",
"description": "Evaluation of image watermarking techniques",
"file": "image_benchmark.csv",
}
)
# Look for audio benchmarks
if os.path.exists("./data/audio_benchmark.csv"):
benchmarks.append(
{
"id": "audio",
"name": "Audio Watermarks",
"description": "Evaluation of audio watermarking techniques",
"file": "audio_benchmark.csv",
}
)
return benchmarks
|