File size: 3,972 Bytes
9a03fcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import pandas as pd
import os
import json
from typing import List, Dict, Any, Optional


def load_data(csv_file_path: str) -> List[Dict[str, Any]]:
    """
    Load data from a CSV file and convert to a list of dictionaries

    Args:
        csv_file_path: Path to the CSV file

    Returns:
        List of dictionaries representing the data
    """
    # Check if file exists
    if not os.path.exists(csv_file_path):
        raise FileNotFoundError(f"File not found: {csv_file_path}")

    # Read the CSV file
    df = pd.read_csv(csv_file_path)

    # Convert to list of dictionaries for JSON serialization
    return df.to_dict(orient="records")


def get_columns(csv_file_path: str) -> List[str]:
    """
    Get all column names from the CSV file

    Args:
        csv_file_path: Path to the CSV file

    Returns:
        List of column names
    """
    # Check if file exists
    if not os.path.exists(csv_file_path):
        raise FileNotFoundError(f"File not found: {csv_file_path}")

    # Read the CSV file
    df = pd.read_csv(csv_file_path)

    # Return column names
    return df.columns.tolist()


def filter_data(
    data: List[Dict[str, Any]], filter_columns: List[str]
) -> List[Dict[str, Any]]:
    """
    Filter the data to include only specific columns

    Args:
        data: List of dictionaries representing the data
        filter_columns: List of column names to include

    Returns:
        Filtered data
    """
    if not filter_columns:
        return data

    filtered_data = []
    for item in data:
        filtered_item = {k: v for k, v in item.items() if k in filter_columns}
        filtered_data.append(filtered_item)

    return filtered_data


def search_data(data: List[Dict[str, Any]], search_query: str) -> List[Dict[str, Any]]:
    """
    Search the data for items matching the search query

    Args:
        data: List of dictionaries representing the data
        search_query: Search query string

    Returns:
        Filtered data containing only matching items
    """
    if not search_query:
        return data

    # Convert to lowercase for case-insensitive search
    search_query = search_query.lower()

    filtered_data = []
    for item in data:
        # Check if any value contains the search query
        for value in item.values():
            if (
                isinstance(value, (str, int, float))
                and str(value).lower().find(search_query) != -1
            ):
                filtered_data.append(item)
                break

    return filtered_data


def load_json_data(json_file_path: str) -> Dict[str, Any]:
    """
    Load data from a JSON file

    Args:
        json_file_path: Path to the JSON file

    Returns:
        Dictionary representing the JSON data
    """
    # Check if file exists
    if not os.path.exists(json_file_path):
        raise FileNotFoundError(f"File not found: {json_file_path}")

    # Read the JSON file
    with open(json_file_path, "r") as f:
        data = json.load(f)

    return data


def get_available_benchmarks() -> List[Dict[str, str]]:
    """
    Get a list of available benchmarks in the data folder

    Returns:
        List of dictionaries with benchmark information
    """
    benchmarks = []

    # Look for image benchmarks
    if os.path.exists("./data/image_benchmark.csv"):
        benchmarks.append(
            {
                "id": "image",
                "name": "Image Watermarks",
                "description": "Evaluation of image watermarking techniques",
                "file": "image_benchmark.csv",
            }
        )

    # Look for audio benchmarks
    if os.path.exists("./data/audio_benchmark.csv"):
        benchmarks.append(
            {
                "id": "audio",
                "name": "Audio Watermarks",
                "description": "Evaluation of audio watermarking techniques",
                "file": "audio_benchmark.csv",
            }
        )

    return benchmarks