Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- library.csv +2 -2
- sync_library_and_hf.py +8 -7
library.csv
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed730a77eb0d3d1ffd3c94288bb944014280dbdd587d9ae3f4331f0a908544a2
|
| 3 |
+
size 346166
|
sync_library_and_hf.py
CHANGED
|
@@ -2,13 +2,7 @@
|
|
| 2 |
# sync_library_and_hf.py
|
| 3 |
''''
|
| 4 |
RUN BELOW FOR NEW HTML FILES TO UPDATE OLD ONES ON DFATASET REPO
|
| 5 |
-
python sync_library_and_hf.py
|
| 6 |
-
--db-path library.csv
|
| 7 |
-
--repo-id akazemian/audio-html
|
| 8 |
-
--model-name wavcoch_audio-preds-sr=16000
|
| 9 |
-
--index-filename index.csv
|
| 10 |
-
--wipe-remote
|
| 11 |
-
--wipe-local
|
| 12 |
'''
|
| 13 |
import argparse, datetime, uuid, posixpath, sys, traceback, os, hashlib
|
| 14 |
from pathlib import Path
|
|
@@ -16,6 +10,7 @@ from typing import List, Tuple, Set
|
|
| 16 |
from urllib.parse import unquote
|
| 17 |
import os
|
| 18 |
import pandas as pd
|
|
|
|
| 19 |
from huggingface_hub import (
|
| 20 |
HfApi,
|
| 21 |
hf_hub_download,
|
|
@@ -57,6 +52,12 @@ def create_file_specific_manifest(csv_path: Path) -> pd.DataFrame:
|
|
| 57 |
audio_dir = "/data/atlask/BAU-Quant/val"
|
| 58 |
manifest = pd.read_csv(csv_path)
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
# 1) Build a files dataframe
|
| 61 |
files = pd.DataFrame({"file_name": os.listdir(audio_dir)})
|
| 62 |
# keep only audio files if needed
|
|
|
|
| 2 |
# sync_library_and_hf.py
|
| 3 |
''''
|
| 4 |
RUN BELOW FOR NEW HTML FILES TO UPDATE OLD ONES ON DFATASET REPO
|
| 5 |
+
python sync_library_and_hf.py --db-path library.csv --repo-id akazemian/audio-html --model-name wavcoch_audio-preds-sr=16000 --index-filename index.csv --wipe-remote --wipe-local
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
'''
|
| 7 |
import argparse, datetime, uuid, posixpath, sys, traceback, os, hashlib
|
| 8 |
from pathlib import Path
|
|
|
|
| 10 |
from urllib.parse import unquote
|
| 11 |
import os
|
| 12 |
import pandas as pd
|
| 13 |
+
import numpy as np
|
| 14 |
from huggingface_hub import (
|
| 15 |
HfApi,
|
| 16 |
hf_hub_download,
|
|
|
|
| 52 |
audio_dir = "/data/atlask/BAU-Quant/val"
|
| 53 |
manifest = pd.read_csv(csv_path)
|
| 54 |
|
| 55 |
+
mask = manifest['dataset'].eq('TUT_urban_acoustic_scenes')
|
| 56 |
+
manifest['audio_category'] = np.where(mask, manifest['dataset'], manifest['audio_category'])
|
| 57 |
+
manifest = manifest.assign(
|
| 58 |
+
audio_category = manifest['audio_category'].where(~mask, manifest['dataset'])
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
# 1) Build a files dataframe
|
| 62 |
files = pd.DataFrame({"file_name": os.listdir(audio_dir)})
|
| 63 |
# keep only audio files if needed
|