Luigi commited on
Commit
3ed4a9b
Β·
1 Parent(s): afc9f36

bugfix on hotword biasing

Browse files
Files changed (1) hide show
  1. app/asr_worker.py +38 -24
app/asr_worker.py CHANGED
@@ -7,6 +7,7 @@ from opencc import OpenCC
7
  from huggingface_hub import hf_hub_download
8
  from typing import List
9
  import tempfile
 
10
 
11
  # Ensure Hugging Face cache is in a user-writable directory
12
  CACHE_DIR = Path(__file__).parent / "hf_cache"
@@ -26,7 +27,7 @@ STREAMING_ZIPFORMER_MODELS = {
26
  "joiner_fp32": "exp/96/joiner-epoch-99-avg-1.onnx",
27
  "joiner_int8": "exp/96/joiner-epoch-99-avg-1.int8.onnx",
28
  "modeling_unit":"cjkchar+bpe",
29
- "bpe_vocab": "data/lang_char_bpe/bpe.vocab",
30
  },
31
  # mixed Chinese+English (char+BPE)
32
  "pfluo/k2fsa-zipformer-chinese-english-mixed": {
@@ -38,7 +39,7 @@ STREAMING_ZIPFORMER_MODELS = {
38
  "joiner_fp32": "exp/joiner-epoch-99-avg-1.onnx",
39
  "joiner_int8": "exp/joiner-epoch-99-avg-1.int8.onnx",
40
  "modeling_unit":"cjkchar+bpe",
41
- "bpe_vocab": None,
42
  },
43
  # Korean-only (CJK chars)
44
  "k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16": {
@@ -50,7 +51,7 @@ STREAMING_ZIPFORMER_MODELS = {
50
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
51
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
52
  "modeling_unit":"cjkchar",
53
- "bpe_vocab": None,
54
  },
55
  # multi Chinese (Hans) (CJK chars)
56
  "k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12": {
@@ -62,7 +63,7 @@ STREAMING_ZIPFORMER_MODELS = {
62
  "joiner_fp32": "joiner-epoch-20-avg-1-chunk-16-left-128.onnx",
63
  "joiner_int8": "joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx",
64
  "modeling_unit":"cjkchar",
65
- "bpe_vocab": None,
66
  },
67
  # wenetspeech streaming (CJK chars)
68
  "pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615": {
@@ -74,7 +75,7 @@ STREAMING_ZIPFORMER_MODELS = {
74
  "joiner_fp32": "exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx",
75
  "joiner_int8": "exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx",
76
  "modeling_unit":"cjkchar",
77
- "bpe_vocab": None,
78
  },
79
  # English-only (BPE)
80
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26": {
@@ -86,7 +87,7 @@ STREAMING_ZIPFORMER_MODELS = {
86
  "joiner_fp32": "joiner-epoch-99-avg-1-chunk-16-left-128.onnx",
87
  "joiner_int8": "joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx",
88
  "modeling_unit":"bpe",
89
- "bpe_vocab": None,
90
  },
91
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-21": {
92
  "tokens": "tokens.txt",
@@ -97,7 +98,7 @@ STREAMING_ZIPFORMER_MODELS = {
97
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
98
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
99
  "modeling_unit":"bpe",
100
- "bpe_vocab": None,
101
  },
102
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21": {
103
  "tokens": "tokens.txt",
@@ -108,7 +109,7 @@ STREAMING_ZIPFORMER_MODELS = {
108
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
109
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
110
  "modeling_unit":"bpe",
111
- "bpe_vocab": None,
112
  },
113
  # older bilingual zh-en (cjkchar+BPE) – no bpe.vocab shipped
114
  "csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": {
@@ -120,7 +121,7 @@ STREAMING_ZIPFORMER_MODELS = {
120
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
121
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
122
  "modeling_unit":"cjkchar+bpe",
123
- "bpe_vocab": None,
124
  },
125
  # French-only (BPE)
126
  "shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14": {
@@ -132,7 +133,7 @@ STREAMING_ZIPFORMER_MODELS = {
132
  "joiner_fp32": "joiner-epoch-29-avg-9-with-averaged-model.onnx",
133
  "joiner_int8": "joiner-epoch-29-avg-9-with-averaged-model.int8.onnx",
134
  "modeling_unit":"bpe",
135
- "bpe_vocab": None,
136
  },
137
  # Chinese-only small (CJK chars)
138
  "csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23": {
@@ -144,7 +145,7 @@ STREAMING_ZIPFORMER_MODELS = {
144
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
145
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
146
  "modeling_unit":"cjkchar",
147
- "bpe_vocab": None,
148
  },
149
  # English-only 20M (BPE)
150
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17": {
@@ -156,7 +157,7 @@ STREAMING_ZIPFORMER_MODELS = {
156
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
157
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
158
  "modeling_unit":"bpe",
159
- "bpe_vocab": None,
160
  },
161
  }
162
 
@@ -187,24 +188,37 @@ def create_recognizer(
187
  decoder_path = hf_hub_download(repo_id=model_id, filename=decoder_file, cache_dir=str(CACHE_DIR))
188
  joiner_path = hf_hub_download(repo_id=model_id, filename=joiner_file, cache_dir=str(CACHE_DIR))
189
 
190
- # β€”β€”β€” Download BPE vocab if this model has one β€”β€”β€”
191
  modeling_unit = entry.get("modeling_unit")
192
- bpe_rel_path = entry.get("bpe_vocab")
193
  bpe_vocab_path = None
194
- if bpe_rel_path:
195
  try:
196
- bpe_vocab_path = hf_hub_download(
197
- repo_id=model_id,
198
- filename=bpe_rel_path,
199
- cache_dir=str(CACHE_DIR),
200
- )
201
- print(f"[DEBUG asr_worker] Downloaded bpe_vocab: {bpe_vocab_path}")
 
 
 
 
 
 
 
 
 
 
 
202
  except Exception as e:
203
- print(f"[WARNING asr_worker] Could not download bpe_vocab '{bpe_rel_path}': {e}")
204
  bpe_vocab_path = None
205
 
206
- # β€”β€”β€” Decide whether to use beam search with hotword biasing β€”β€”β€”
207
- use_beam = (hotwords and hotwords_score > 0.0) and bpe_vocab_path
 
 
208
  if use_beam:
209
  # Write hotword list to a temp file (one entry per line)
210
  tf = tempfile.NamedTemporaryFile(
 
7
  from huggingface_hub import hf_hub_download
8
  from typing import List
9
  import tempfile
10
+ from sentencepiece import SentencePieceProcessor
11
 
12
  # Ensure Hugging Face cache is in a user-writable directory
13
  CACHE_DIR = Path(__file__).parent / "hf_cache"
 
27
  "joiner_fp32": "exp/96/joiner-epoch-99-avg-1.onnx",
28
  "joiner_int8": "exp/96/joiner-epoch-99-avg-1.int8.onnx",
29
  "modeling_unit":"cjkchar+bpe",
30
+ "bpe_model": "data/lang_char_bpe/bpe.model",
31
  },
32
  # mixed Chinese+English (char+BPE)
33
  "pfluo/k2fsa-zipformer-chinese-english-mixed": {
 
39
  "joiner_fp32": "exp/joiner-epoch-99-avg-1.onnx",
40
  "joiner_int8": "exp/joiner-epoch-99-avg-1.int8.onnx",
41
  "modeling_unit":"cjkchar+bpe",
42
+ "bpe_model": "data/lang_char_bpe/bpe.model",
43
  },
44
  # Korean-only (CJK chars)
45
  "k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16": {
 
51
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
52
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
53
  "modeling_unit":"cjkchar",
54
+ "bpe_model": "bpe.model",
55
  },
56
  # multi Chinese (Hans) (CJK chars)
57
  "k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12": {
 
63
  "joiner_fp32": "joiner-epoch-20-avg-1-chunk-16-left-128.onnx",
64
  "joiner_int8": "joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx",
65
  "modeling_unit":"cjkchar",
66
+ "bpe_model": "bpe.model",
67
  },
68
  # wenetspeech streaming (CJK chars)
69
  "pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615": {
 
75
  "joiner_fp32": "exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx",
76
  "joiner_int8": "exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx",
77
  "modeling_unit":"cjkchar",
78
+ "bpe_model": None,
79
  },
80
  # English-only (BPE)
81
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26": {
 
87
  "joiner_fp32": "joiner-epoch-99-avg-1-chunk-16-left-128.onnx",
88
  "joiner_int8": "joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx",
89
  "modeling_unit":"bpe",
90
+ "bpe_model": "bpe.model",
91
  },
92
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-21": {
93
  "tokens": "tokens.txt",
 
98
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
99
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
100
  "modeling_unit":"bpe",
101
+ "bpe_model": None,
102
  },
103
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21": {
104
  "tokens": "tokens.txt",
 
109
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
110
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
111
  "modeling_unit":"bpe",
112
+ "bpe_model": None,
113
  },
114
  # older bilingual zh-en (cjkchar+BPE) – no bpe.vocab shipped
115
  "csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": {
 
121
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
122
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
123
  "modeling_unit":"cjkchar+bpe",
124
+ "bpe_model": "bpe.model",
125
  },
126
  # French-only (BPE)
127
  "shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14": {
 
133
  "joiner_fp32": "joiner-epoch-29-avg-9-with-averaged-model.onnx",
134
  "joiner_int8": "joiner-epoch-29-avg-9-with-averaged-model.int8.onnx",
135
  "modeling_unit":"bpe",
136
+ "bpe_model": None,
137
  },
138
  # Chinese-only small (CJK chars)
139
  "csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23": {
 
145
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
146
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
147
  "modeling_unit":"cjkchar",
148
+ "bpe_model": None,
149
  },
150
  # English-only 20M (BPE)
151
  "csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17": {
 
157
  "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
158
  "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
159
  "modeling_unit":"bpe",
160
+ "bpe_model": None,
161
  },
162
  }
163
 
 
188
  decoder_path = hf_hub_download(repo_id=model_id, filename=decoder_file, cache_dir=str(CACHE_DIR))
189
  joiner_path = hf_hub_download(repo_id=model_id, filename=joiner_file, cache_dir=str(CACHE_DIR))
190
 
191
+ # Prepare BPE vocab from .model if provided
192
  modeling_unit = entry.get("modeling_unit")
193
+ bpe_model_rel = entry.get("bpe_model")
194
  bpe_vocab_path = None
195
+ if bpe_model_rel:
196
  try:
197
+ bpe_model_path = hf_hub_download(model_id, bpe_model_rel, cache_dir=str(CACHE_DIR))
198
+ print(f"[DEBUG] Downloaded bpe model: {bpe_model_path}")
199
+
200
+ # === export_bpe_vocab.py logic starts here ===
201
+ sp = SentencePieceProcessor()
202
+ sp.Load(str(bpe_model_path))
203
+
204
+ vocab_file = Path(CACHE_DIR) / f"{Path(bpe_model_rel).stem}.vocab"
205
+ with open(vocab_file, "w", encoding="utf-8") as vf:
206
+ for idx in range(sp.get_piece_size()):
207
+ piece = sp.id_to_piece(idx)
208
+ score = sp.get_score(idx)
209
+ vf.write(f"{piece}\t{score}\n")
210
+ bpe_vocab_path = str(vocab_file)
211
+ print(f"[DEBUG] Converted bpe model to vocab: {bpe_vocab_path}")
212
+ # === export_bpe_vocab.py logic ends here ===
213
+
214
  except Exception as e:
215
+ print(f"[WARNING] Failed to build BPE vocab from '{bpe_model_rel}': {e}")
216
  bpe_vocab_path = None
217
 
218
+ # Decide if we should use beam-search hotword biasing
219
+ has_hot = bool(hotwords and hotwords_score > 0.0)
220
+ use_beam = has_hot and ("bpe" not in modeling_unit or bpe_vocab_path is not None)
221
+
222
  if use_beam:
223
  # Write hotword list to a temp file (one entry per line)
224
  tf = tempfile.NamedTemporaryFile(