omnisealbench_mduppes

Sleeping

File size: 3,972 Bytes

9a03fcf

import pandas as pd
import os
import json
from typing import List, Dict, Any, Optional


def load_data(csv_file_path: str) -> List[Dict[str, Any]]:
    """
    Load data from a CSV file and convert to a list of dictionaries

    Args:
        csv_file_path: Path to the CSV file

    Returns:
        List of dictionaries representing the data
    """
    # Check if file exists
    if not os.path.exists(csv_file_path):
        raise FileNotFoundError(f"File not found: {csv_file_path}")

    # Read the CSV file
    df = pd.read_csv(csv_file_path)

    # Convert to list of dictionaries for JSON serialization
    return df.to_dict(orient="records")


def get_columns(csv_file_path: str) -> List[str]:
    """
    Get all column names from the CSV file

    Args:
        csv_file_path: Path to the CSV file

    Returns:
        List of column names
    """
    # Check if file exists
    if not os.path.exists(csv_file_path):
        raise FileNotFoundError(f"File not found: {csv_file_path}")

    # Read the CSV file
    df = pd.read_csv(csv_file_path)

    # Return column names
    return df.columns.tolist()


def filter_data(
    data: List[Dict[str, Any]], filter_columns: List[str]
) -> List[Dict[str, Any]]:
    """
    Filter the data to include only specific columns

    Args:
        data: List of dictionaries representing the data
        filter_columns: List of column names to include

    Returns:
        Filtered data
    """
    if not filter_columns:
        return data

    filtered_data = []
    for item in data:
        filtered_item = {k: v for k, v in item.items() if k in filter_columns}
        filtered_data.append(filtered_item)

    return filtered_data


def search_data(data: List[Dict[str, Any]], search_query: str) -> List[Dict[str, Any]]:
    """
    Search the data for items matching the search query

    Args:
        data: List of dictionaries representing the data
        search_query: Search query string

    Returns:
        Filtered data containing only matching items
    """
    if not search_query:
        return data

    # Convert to lowercase for case-insensitive search
    search_query = search_query.lower()

    filtered_data = []
    for item in data:
        # Check if any value contains the search query
        for value in item.values():
            if (
                isinstance(value, (str, int, float))
                and str(value).lower().find(search_query) != -1
            ):
                filtered_data.append(item)
                break

    return filtered_data


def load_json_data(json_file_path: str) -> Dict[str, Any]:
    """
    Load data from a JSON file

    Args:
        json_file_path: Path to the JSON file

    Returns:
        Dictionary representing the JSON data
    """
    # Check if file exists
    if not os.path.exists(json_file_path):
        raise FileNotFoundError(f"File not found: {json_file_path}")

    # Read the JSON file
    with open(json_file_path, "r") as f:
        data = json.load(f)

    return data


def get_available_benchmarks() -> List[Dict[str, str]]:
    """
    Get a list of available benchmarks in the data folder

    Returns:
        List of dictionaries with benchmark information
    """
    benchmarks = []

    # Look for image benchmarks
    if os.path.exists("./data/image_benchmark.csv"):
        benchmarks.append(
            {
                "id": "image",
                "name": "Image Watermarks",
                "description": "Evaluation of image watermarking techniques",
                "file": "image_benchmark.csv",
            }
        )

    # Look for audio benchmarks
    if os.path.exists("./data/audio_benchmark.csv"):
        benchmarks.append(
            {
                "id": "audio",
                "name": "Audio Watermarks",
                "description": "Evaluation of audio watermarking techniques",
                "file": "audio_benchmark.csv",
            }
        )

    return benchmarks