akazemian commited on
Commit
f1ac269
·
verified ·
1 Parent(s): af82024

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -24,11 +24,15 @@ ALL_DB_COLS = BASE_COLS + EXTRA_COLS
24
  TABLE_COLS = ["id","filename","category","dataset",
25
  "tags","keywords","notes","uploaded_at"]
26
 
27
- # At top-level config
28
- HF_INDEX_REPO_ID = "akazemian/audio-library" # where index.csv lives *now*
29
- HF_INDEX_REPO_TYPE = "space" # <— it's a Space, not a dataset
30
- INDEX_FILENAME = "index.csv"
 
31
 
 
 
 
32
 
33
  # at the top
34
  import os
@@ -122,27 +126,29 @@ def _df_from_table_value(table_value):
122
  return pd.DataFrame(table_value, columns=cols)
123
  return pd.DataFrame(columns=cols)
124
 
125
- # ---------- Load HF index ----------
126
  def _load_hf_index() -> pd.DataFrame:
127
  """
128
- Download + read index.csv from the HF dataset repo.
129
  Required columns: id, filename, relpath, category, dataset, tags, keywords, notes, uploaded_at
130
  """
131
  local = hf_hub_download(
132
- repo_id=HF_INDEX_REPO_ID,
133
- repo_type=HF_INDEX_REPO_TYPE,
134
  filename=INDEX_FILENAME,
135
  )
 
 
136
 
137
  df = pd.read_csv(local)
138
  for c in ["id","filename","relpath","category","dataset","tags","keywords","notes","uploaded_at"]:
139
  if c not in df.columns:
140
  df[c] = ""
141
- # normalize types
142
  for c in ["id","filename","relpath","category","dataset","tags","keywords","notes","uploaded_at"]:
143
  df[c] = df[c].fillna("").astype(str)
144
  return df
145
 
 
146
  # ---------- Sync by model (prefix inside HF dataset) ----------
147
  from urllib.parse import unquote # ensure this import exists at top
148
 
 
24
  TABLE_COLS = ["id","filename","category","dataset",
25
  "tags","keywords","notes","uploaded_at"]
26
 
27
+ # ---------- Load HF index ----------
28
+ # ----------- HF DATASET CONFIG -----------
29
+ HF_DATASET_REPO = "akazemian/audio-html" # dataset holding the HTMLs + index.csv
30
+ INDEX_FILENAME = "index.csv"
31
+ # -----------------------------------------
32
 
33
+ # Always read index.csv from the DATASET (not the Space)
34
+ HF_INDEX_REPO_ID = HF_DATASET_REPO
35
+ HF_INDEX_REPO_TYPE = "dataset"
36
 
37
  # at the top
38
  import os
 
126
  return pd.DataFrame(table_value, columns=cols)
127
  return pd.DataFrame(columns=cols)
128
 
129
+
130
  def _load_hf_index() -> pd.DataFrame:
131
  """
132
+ Download + read index.csv from the HF *dataset* repo.
133
  Required columns: id, filename, relpath, category, dataset, tags, keywords, notes, uploaded_at
134
  """
135
  local = hf_hub_download(
136
+ repo_id=HF_INDEX_REPO_ID, # = HF_DATASET_REPO
137
+ repo_type=HF_INDEX_REPO_TYPE, # = "dataset"
138
  filename=INDEX_FILENAME,
139
  )
140
+ # Optional: log where we loaded from (shows in Space logs)
141
+ print(f"[index] loaded from {HF_INDEX_REPO_TYPE}:{HF_INDEX_REPO_ID}/{INDEX_FILENAME} -> {local}")
142
 
143
  df = pd.read_csv(local)
144
  for c in ["id","filename","relpath","category","dataset","tags","keywords","notes","uploaded_at"]:
145
  if c not in df.columns:
146
  df[c] = ""
 
147
  for c in ["id","filename","relpath","category","dataset","tags","keywords","notes","uploaded_at"]:
148
  df[c] = df[c].fillna("").astype(str)
149
  return df
150
 
151
+
152
  # ---------- Sync by model (prefix inside HF dataset) ----------
153
  from urllib.parse import unquote # ensure this import exists at top
154