omnisealbench_mduppes

Sleeping

omnisealbench_mduppes / backend /data_processor.py

Mark Duppenthaler

Test docker

9a03fcf 21 days ago

3.97 kB

	import pandas as pd
	import os
	import json
	from typing import List, Dict, Any, Optional


	def load_data(csv_file_path: str) -> List[Dict[str, Any]]:
	"""
	Load data from a CSV file and convert to a list of dictionaries

	Args:
	csv_file_path: Path to the CSV file

	Returns:
	List of dictionaries representing the data
	"""
	# Check if file exists
	if not os.path.exists(csv_file_path):
	raise FileNotFoundError(f"File not found: {csv_file_path}")

	# Read the CSV file
	df = pd.read_csv(csv_file_path)

	# Convert to list of dictionaries for JSON serialization
	return df.to_dict(orient="records")


	def get_columns(csv_file_path: str) -> List[str]:
	"""
	Get all column names from the CSV file

	Args:
	csv_file_path: Path to the CSV file

	Returns:
	List of column names
	"""
	# Check if file exists
	if not os.path.exists(csv_file_path):
	raise FileNotFoundError(f"File not found: {csv_file_path}")

	# Read the CSV file
	df = pd.read_csv(csv_file_path)

	# Return column names
	return df.columns.tolist()


	def filter_data(
	data: List[Dict[str, Any]], filter_columns: List[str]
	) -> List[Dict[str, Any]]:
	"""
	Filter the data to include only specific columns

	Args:
	data: List of dictionaries representing the data
	filter_columns: List of column names to include

	Returns:
	Filtered data
	"""
	if not filter_columns:
	return data

	filtered_data = []
	for item in data:
	filtered_item = {k: v for k, v in item.items() if k in filter_columns}
	filtered_data.append(filtered_item)

	return filtered_data


	def search_data(data: List[Dict[str, Any]], search_query: str) -> List[Dict[str, Any]]:
	"""
	Search the data for items matching the search query

	Args:
	data: List of dictionaries representing the data
	search_query: Search query string

	Returns:
	Filtered data containing only matching items
	"""
	if not search_query:
	return data

	# Convert to lowercase for case-insensitive search
	search_query = search_query.lower()

	filtered_data = []
	for item in data:
	# Check if any value contains the search query
	for value in item.values():
	if (
	isinstance(value, (str, int, float))
	and str(value).lower().find(search_query) != -1
	):
	filtered_data.append(item)
	break

	return filtered_data


	def load_json_data(json_file_path: str) -> Dict[str, Any]:
	"""
	Load data from a JSON file

	Args:
	json_file_path: Path to the JSON file

	Returns:
	Dictionary representing the JSON data
	"""
	# Check if file exists
	if not os.path.exists(json_file_path):
	raise FileNotFoundError(f"File not found: {json_file_path}")

	# Read the JSON file
	with open(json_file_path, "r") as f:
	data = json.load(f)

	return data


	def get_available_benchmarks() -> List[Dict[str, str]]:
	"""
	Get a list of available benchmarks in the data folder

	Returns:
	List of dictionaries with benchmark information
	"""
	benchmarks = []

	# Look for image benchmarks
	if os.path.exists("./data/image_benchmark.csv"):
	benchmarks.append(
	{
	"id": "image",
	"name": "Image Watermarks",
	"description": "Evaluation of image watermarking techniques",
	"file": "image_benchmark.csv",
	}
	)

	# Look for audio benchmarks
	if os.path.exists("./data/audio_benchmark.csv"):
	benchmarks.append(
	{
	"id": "audio",
	"name": "Audio Watermarks",
	"description": "Evaluation of audio watermarking techniques",
	"file": "audio_benchmark.csv",
	}
	)

	return benchmarks