omnisealbench_mduppes / backend /data_processor.py
Mark Duppenthaler
Test docker
9a03fcf
import pandas as pd
import os
import json
from typing import List, Dict, Any, Optional
def load_data(csv_file_path: str) -> List[Dict[str, Any]]:
"""
Load data from a CSV file and convert to a list of dictionaries
Args:
csv_file_path: Path to the CSV file
Returns:
List of dictionaries representing the data
"""
# Check if file exists
if not os.path.exists(csv_file_path):
raise FileNotFoundError(f"File not found: {csv_file_path}")
# Read the CSV file
df = pd.read_csv(csv_file_path)
# Convert to list of dictionaries for JSON serialization
return df.to_dict(orient="records")
def get_columns(csv_file_path: str) -> List[str]:
"""
Get all column names from the CSV file
Args:
csv_file_path: Path to the CSV file
Returns:
List of column names
"""
# Check if file exists
if not os.path.exists(csv_file_path):
raise FileNotFoundError(f"File not found: {csv_file_path}")
# Read the CSV file
df = pd.read_csv(csv_file_path)
# Return column names
return df.columns.tolist()
def filter_data(
data: List[Dict[str, Any]], filter_columns: List[str]
) -> List[Dict[str, Any]]:
"""
Filter the data to include only specific columns
Args:
data: List of dictionaries representing the data
filter_columns: List of column names to include
Returns:
Filtered data
"""
if not filter_columns:
return data
filtered_data = []
for item in data:
filtered_item = {k: v for k, v in item.items() if k in filter_columns}
filtered_data.append(filtered_item)
return filtered_data
def search_data(data: List[Dict[str, Any]], search_query: str) -> List[Dict[str, Any]]:
"""
Search the data for items matching the search query
Args:
data: List of dictionaries representing the data
search_query: Search query string
Returns:
Filtered data containing only matching items
"""
if not search_query:
return data
# Convert to lowercase for case-insensitive search
search_query = search_query.lower()
filtered_data = []
for item in data:
# Check if any value contains the search query
for value in item.values():
if (
isinstance(value, (str, int, float))
and str(value).lower().find(search_query) != -1
):
filtered_data.append(item)
break
return filtered_data
def load_json_data(json_file_path: str) -> Dict[str, Any]:
"""
Load data from a JSON file
Args:
json_file_path: Path to the JSON file
Returns:
Dictionary representing the JSON data
"""
# Check if file exists
if not os.path.exists(json_file_path):
raise FileNotFoundError(f"File not found: {json_file_path}")
# Read the JSON file
with open(json_file_path, "r") as f:
data = json.load(f)
return data
def get_available_benchmarks() -> List[Dict[str, str]]:
"""
Get a list of available benchmarks in the data folder
Returns:
List of dictionaries with benchmark information
"""
benchmarks = []
# Look for image benchmarks
if os.path.exists("./data/image_benchmark.csv"):
benchmarks.append(
{
"id": "image",
"name": "Image Watermarks",
"description": "Evaluation of image watermarking techniques",
"file": "image_benchmark.csv",
}
)
# Look for audio benchmarks
if os.path.exists("./data/audio_benchmark.csv"):
benchmarks.append(
{
"id": "audio",
"name": "Audio Watermarks",
"description": "Evaluation of audio watermarking techniques",
"file": "audio_benchmark.csv",
}
)
return benchmarks