akazemian commited on
Commit
f294685
·
verified ·
1 Parent(s): 347540a

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. library.csv +2 -2
  2. sync_library_and_hf.py +8 -7
library.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e32e0f5bdb86dd24f0f7531469b106315fe68914fc48b427f0d859e733c2493
3
- size 88173
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed730a77eb0d3d1ffd3c94288bb944014280dbdd587d9ae3f4331f0a908544a2
3
+ size 346166
sync_library_and_hf.py CHANGED
@@ -2,13 +2,7 @@
2
  # sync_library_and_hf.py
3
  ''''
4
  RUN BELOW FOR NEW HTML FILES TO UPDATE OLD ONES ON DFATASET REPO
5
- python sync_library_and_hf.py
6
- --db-path library.csv
7
- --repo-id akazemian/audio-html
8
- --model-name wavcoch_audio-preds-sr=16000
9
- --index-filename index.csv
10
- --wipe-remote
11
- --wipe-local
12
  '''
13
  import argparse, datetime, uuid, posixpath, sys, traceback, os, hashlib
14
  from pathlib import Path
@@ -16,6 +10,7 @@ from typing import List, Tuple, Set
16
  from urllib.parse import unquote
17
  import os
18
  import pandas as pd
 
19
  from huggingface_hub import (
20
  HfApi,
21
  hf_hub_download,
@@ -57,6 +52,12 @@ def create_file_specific_manifest(csv_path: Path) -> pd.DataFrame:
57
  audio_dir = "/data/atlask/BAU-Quant/val"
58
  manifest = pd.read_csv(csv_path)
59
 
 
 
 
 
 
 
60
  # 1) Build a files dataframe
61
  files = pd.DataFrame({"file_name": os.listdir(audio_dir)})
62
  # keep only audio files if needed
 
2
  # sync_library_and_hf.py
3
  ''''
4
  RUN BELOW FOR NEW HTML FILES TO UPDATE OLD ONES ON DFATASET REPO
5
+ python sync_library_and_hf.py --db-path library.csv --repo-id akazemian/audio-html --model-name wavcoch_audio-preds-sr=16000 --index-filename index.csv --wipe-remote --wipe-local
 
 
 
 
 
 
6
  '''
7
  import argparse, datetime, uuid, posixpath, sys, traceback, os, hashlib
8
  from pathlib import Path
 
10
  from urllib.parse import unquote
11
  import os
12
  import pandas as pd
13
+ import numpy as np
14
  from huggingface_hub import (
15
  HfApi,
16
  hf_hub_download,
 
52
  audio_dir = "/data/atlask/BAU-Quant/val"
53
  manifest = pd.read_csv(csv_path)
54
 
55
+ mask = manifest['dataset'].eq('TUT_urban_acoustic_scenes')
56
+ manifest['audio_category'] = np.where(mask, manifest['dataset'], manifest['audio_category'])
57
+ manifest = manifest.assign(
58
+ audio_category = manifest['audio_category'].where(~mask, manifest['dataset'])
59
+ )
60
+
61
  # 1) Build a files dataframe
62
  files = pd.DataFrame({"file_name": os.listdir(audio_dir)})
63
  # keep only audio files if needed