Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -24,11 +24,15 @@ ALL_DB_COLS = BASE_COLS + EXTRA_COLS
|
|
| 24 |
TABLE_COLS = ["id","filename","category","dataset",
|
| 25 |
"tags","keywords","notes","uploaded_at"]
|
| 26 |
|
| 27 |
-
#
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
INDEX_FILENAME
|
|
|
|
| 31 |
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
# at the top
|
| 34 |
import os
|
|
@@ -122,27 +126,29 @@ def _df_from_table_value(table_value):
|
|
| 122 |
return pd.DataFrame(table_value, columns=cols)
|
| 123 |
return pd.DataFrame(columns=cols)
|
| 124 |
|
| 125 |
-
|
| 126 |
def _load_hf_index() -> pd.DataFrame:
|
| 127 |
"""
|
| 128 |
-
Download + read index.csv from the HF dataset repo.
|
| 129 |
Required columns: id, filename, relpath, category, dataset, tags, keywords, notes, uploaded_at
|
| 130 |
"""
|
| 131 |
local = hf_hub_download(
|
| 132 |
-
repo_id=HF_INDEX_REPO_ID,
|
| 133 |
-
repo_type=HF_INDEX_REPO_TYPE,
|
| 134 |
filename=INDEX_FILENAME,
|
| 135 |
)
|
|
|
|
|
|
|
| 136 |
|
| 137 |
df = pd.read_csv(local)
|
| 138 |
for c in ["id","filename","relpath","category","dataset","tags","keywords","notes","uploaded_at"]:
|
| 139 |
if c not in df.columns:
|
| 140 |
df[c] = ""
|
| 141 |
-
# normalize types
|
| 142 |
for c in ["id","filename","relpath","category","dataset","tags","keywords","notes","uploaded_at"]:
|
| 143 |
df[c] = df[c].fillna("").astype(str)
|
| 144 |
return df
|
| 145 |
|
|
|
|
| 146 |
# ---------- Sync by model (prefix inside HF dataset) ----------
|
| 147 |
from urllib.parse import unquote # ensure this import exists at top
|
| 148 |
|
|
|
|
| 24 |
TABLE_COLS = ["id","filename","category","dataset",
|
| 25 |
"tags","keywords","notes","uploaded_at"]
|
| 26 |
|
| 27 |
+
# ---------- Load HF index ----------
|
| 28 |
+
# ----------- HF DATASET CONFIG -----------
|
| 29 |
+
HF_DATASET_REPO = "akazemian/audio-html" # dataset holding the HTMLs + index.csv
|
| 30 |
+
INDEX_FILENAME = "index.csv"
|
| 31 |
+
# -----------------------------------------
|
| 32 |
|
| 33 |
+
# Always read index.csv from the DATASET (not the Space)
|
| 34 |
+
HF_INDEX_REPO_ID = HF_DATASET_REPO
|
| 35 |
+
HF_INDEX_REPO_TYPE = "dataset"
|
| 36 |
|
| 37 |
# at the top
|
| 38 |
import os
|
|
|
|
| 126 |
return pd.DataFrame(table_value, columns=cols)
|
| 127 |
return pd.DataFrame(columns=cols)
|
| 128 |
|
| 129 |
+
|
| 130 |
def _load_hf_index() -> pd.DataFrame:
|
| 131 |
"""
|
| 132 |
+
Download + read index.csv from the HF *dataset* repo.
|
| 133 |
Required columns: id, filename, relpath, category, dataset, tags, keywords, notes, uploaded_at
|
| 134 |
"""
|
| 135 |
local = hf_hub_download(
|
| 136 |
+
repo_id=HF_INDEX_REPO_ID, # = HF_DATASET_REPO
|
| 137 |
+
repo_type=HF_INDEX_REPO_TYPE, # = "dataset"
|
| 138 |
filename=INDEX_FILENAME,
|
| 139 |
)
|
| 140 |
+
# Optional: log where we loaded from (shows in Space logs)
|
| 141 |
+
print(f"[index] loaded from {HF_INDEX_REPO_TYPE}:{HF_INDEX_REPO_ID}/{INDEX_FILENAME} -> {local}")
|
| 142 |
|
| 143 |
df = pd.read_csv(local)
|
| 144 |
for c in ["id","filename","relpath","category","dataset","tags","keywords","notes","uploaded_at"]:
|
| 145 |
if c not in df.columns:
|
| 146 |
df[c] = ""
|
|
|
|
| 147 |
for c in ["id","filename","relpath","category","dataset","tags","keywords","notes","uploaded_at"]:
|
| 148 |
df[c] = df[c].fillna("").astype(str)
|
| 149 |
return df
|
| 150 |
|
| 151 |
+
|
| 152 |
# ---------- Sync by model (prefix inside HF dataset) ----------
|
| 153 |
from urllib.parse import unquote # ensure this import exists at top
|
| 154 |
|