Spaces:

Luigi
/

Streaming-Zipformer

Running

App Files Files Community

Luigi commited on Jun 9

Commit

3ed4a9b

1 Parent(s): afc9f36

bugfix on hotword biasing

Browse files

Files changed (1) hide show

app/asr_worker.py +38 -24

app/asr_worker.py CHANGED Viewed

@@ -7,6 +7,7 @@ from opencc import OpenCC
 from huggingface_hub import hf_hub_download
 from typing import List
 import tempfile
 # Ensure Hugging Face cache is in a user-writable directory
 CACHE_DIR = Path(__file__).parent / "hf_cache"
@@ -26,7 +27,7 @@ STREAMING_ZIPFORMER_MODELS = {
         "joiner_fp32": "exp/96/joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "exp/96/joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"cjkchar+bpe",
-        "bpe_vocab":   "data/lang_char_bpe/bpe.vocab",
     },
     # mixed Chinese+English (char+BPE)
     "pfluo/k2fsa-zipformer-chinese-english-mixed": {
@@ -38,7 +39,7 @@ STREAMING_ZIPFORMER_MODELS = {
         "joiner_fp32": "exp/joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "exp/joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"cjkchar+bpe",
-        "bpe_vocab":   None,
     },
     # Korean-only (CJK chars)
     "k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16": {
@@ -50,7 +51,7 @@ STREAMING_ZIPFORMER_MODELS = {
         "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"cjkchar",
-        "bpe_vocab":   None,
     },
     # multi Chinese (Hans) (CJK chars)
     "k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12": {
@@ -62,7 +63,7 @@ STREAMING_ZIPFORMER_MODELS = {
         "joiner_fp32": "joiner-epoch-20-avg-1-chunk-16-left-128.onnx",
         "joiner_int8": "joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx",
         "modeling_unit":"cjkchar",
-        "bpe_vocab":   None,
     },
     # wenetspeech streaming (CJK chars)
     "pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615": {
@@ -74,7 +75,7 @@ STREAMING_ZIPFORMER_MODELS = {
         "joiner_fp32": "exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx",
         "joiner_int8": "exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx",
         "modeling_unit":"cjkchar",
-        "bpe_vocab":   None,
     },
     # English-only (BPE)
     "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26": {
@@ -86,7 +87,7 @@ STREAMING_ZIPFORMER_MODELS = {
         "joiner_fp32": "joiner-epoch-99-avg-1-chunk-16-left-128.onnx",
         "joiner_int8": "joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx",
         "modeling_unit":"bpe",
-        "bpe_vocab":   None,
     },
     "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-21": {
         "tokens": "tokens.txt",
@@ -97,7 +98,7 @@ STREAMING_ZIPFORMER_MODELS = {
         "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"bpe",
-        "bpe_vocab":   None,
     },
     "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21": {
         "tokens": "tokens.txt",
@@ -108,7 +109,7 @@ STREAMING_ZIPFORMER_MODELS = {
         "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"bpe",
-        "bpe_vocab":   None,
     },
     # older bilingual zh-en (cjkchar+BPE) – no bpe.vocab shipped
     "csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": {
@@ -120,7 +121,7 @@ STREAMING_ZIPFORMER_MODELS = {
         "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"cjkchar+bpe",
-        "bpe_vocab":   None,
     },
     # French-only (BPE)
     "shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14": {
@@ -132,7 +133,7 @@ STREAMING_ZIPFORMER_MODELS = {
         "joiner_fp32": "joiner-epoch-29-avg-9-with-averaged-model.onnx",
         "joiner_int8": "joiner-epoch-29-avg-9-with-averaged-model.int8.onnx",
         "modeling_unit":"bpe",
-        "bpe_vocab":   None,
     },
     # Chinese-only small (CJK chars)
     "csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23": {
@@ -144,7 +145,7 @@ STREAMING_ZIPFORMER_MODELS = {
         "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"cjkchar",
-        "bpe_vocab":   None,
     },
     # English-only 20M (BPE)
     "csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17": {
@@ -156,7 +157,7 @@ STREAMING_ZIPFORMER_MODELS = {
         "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"bpe",
-        "bpe_vocab":   None,
     },
 }
@@ -187,24 +188,37 @@ def create_recognizer(
     decoder_path = hf_hub_download(repo_id=model_id, filename=decoder_file, cache_dir=str(CACHE_DIR))
     joiner_path = hf_hub_download(repo_id=model_id, filename=joiner_file, cache_dir=str(CACHE_DIR))
-    # ——— Download BPE vocab if this model has one ———
     modeling_unit = entry.get("modeling_unit")
-    bpe_rel_path  = entry.get("bpe_vocab")
     bpe_vocab_path = None
-    if bpe_rel_path:
         try:
-            bpe_vocab_path = hf_hub_download(
-                repo_id=model_id,
-                filename=bpe_rel_path,
-                cache_dir=str(CACHE_DIR),
-            )
-            print(f"[DEBUG asr_worker] Downloaded bpe_vocab: {bpe_vocab_path}")
         except Exception as e:
-            print(f"[WARNING asr_worker] Could not download bpe_vocab '{bpe_rel_path}': {e}")
             bpe_vocab_path = None
-    # ——— Decide whether to use beam search with hotword biasing ———
-    use_beam = (hotwords and hotwords_score > 0.0) and bpe_vocab_path
     if use_beam:
             # Write hotword list to a temp file (one entry per line)
             tf = tempfile.NamedTemporaryFile(

 from huggingface_hub import hf_hub_download
 from typing import List
 import tempfile
+from sentencepiece import SentencePieceProcessor
 # Ensure Hugging Face cache is in a user-writable directory
 CACHE_DIR = Path(__file__).parent / "hf_cache"
         "joiner_fp32": "exp/96/joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "exp/96/joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"cjkchar+bpe",
+        "bpe_model":   "data/lang_char_bpe/bpe.model",
     },
     # mixed Chinese+English (char+BPE)
     "pfluo/k2fsa-zipformer-chinese-english-mixed": {
         "joiner_fp32": "exp/joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "exp/joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"cjkchar+bpe",
+        "bpe_model":   "data/lang_char_bpe/bpe.model",
     },
     # Korean-only (CJK chars)
     "k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16": {
         "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"cjkchar",
+        "bpe_model":   "bpe.model",
     },
     # multi Chinese (Hans) (CJK chars)
     "k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12": {
         "joiner_fp32": "joiner-epoch-20-avg-1-chunk-16-left-128.onnx",
         "joiner_int8": "joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx",
         "modeling_unit":"cjkchar",
+        "bpe_model":   "bpe.model",
     },
     # wenetspeech streaming (CJK chars)
     "pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615": {
         "joiner_fp32": "exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx",
         "joiner_int8": "exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx",
         "modeling_unit":"cjkchar",
+        "bpe_model":   None,
     },
     # English-only (BPE)
     "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26": {
         "joiner_fp32": "joiner-epoch-99-avg-1-chunk-16-left-128.onnx",
         "joiner_int8": "joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx",
         "modeling_unit":"bpe",
+        "bpe_model":   "bpe.model",
     },
     "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-21": {
         "tokens": "tokens.txt",
         "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"bpe",
+        "bpe_model":   None,
     },
     "csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21": {
         "tokens": "tokens.txt",
         "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"bpe",
+        "bpe_model":   None,
     },
     # older bilingual zh-en (cjkchar+BPE) – no bpe.vocab shipped
     "csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": {
         "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"cjkchar+bpe",
+        "bpe_model":   "bpe.model",
     },
     # French-only (BPE)
     "shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14": {
         "joiner_fp32": "joiner-epoch-29-avg-9-with-averaged-model.onnx",
         "joiner_int8": "joiner-epoch-29-avg-9-with-averaged-model.int8.onnx",
         "modeling_unit":"bpe",
+        "bpe_model":   None,
     },
     # Chinese-only small (CJK chars)
     "csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23": {
         "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"cjkchar",
+        "bpe_model":   None,
     },
     # English-only 20M (BPE)
     "csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17": {
         "joiner_fp32": "joiner-epoch-99-avg-1.onnx",
         "joiner_int8": "joiner-epoch-99-avg-1.int8.onnx",
         "modeling_unit":"bpe",
+        "bpe_model":   None,
     },
 }
     decoder_path = hf_hub_download(repo_id=model_id, filename=decoder_file, cache_dir=str(CACHE_DIR))
     joiner_path = hf_hub_download(repo_id=model_id, filename=joiner_file, cache_dir=str(CACHE_DIR))
+    # Prepare BPE vocab from .model if provided
     modeling_unit = entry.get("modeling_unit")
+    bpe_model_rel  = entry.get("bpe_model")
     bpe_vocab_path = None
+    if bpe_model_rel:
         try:
+            bpe_model_path = hf_hub_download(model_id, bpe_model_rel, cache_dir=str(CACHE_DIR))
+            print(f"[DEBUG] Downloaded bpe model: {bpe_model_path}")
+            # === export_bpe_vocab.py logic starts here ===
+            sp = SentencePieceProcessor()
+            sp.Load(str(bpe_model_path))
+            vocab_file = Path(CACHE_DIR) / f"{Path(bpe_model_rel).stem}.vocab"
+            with open(vocab_file, "w", encoding="utf-8") as vf:
+                for idx in range(sp.get_piece_size()):
+                    piece = sp.id_to_piece(idx)
+                    score = sp.get_score(idx)
+                    vf.write(f"{piece}\t{score}\n")
+            bpe_vocab_path = str(vocab_file)
+            print(f"[DEBUG] Converted bpe model to vocab: {bpe_vocab_path}")
+            # === export_bpe_vocab.py logic ends here ===
         except Exception as e:
+            print(f"[WARNING] Failed to build BPE vocab from '{bpe_model_rel}': {e}")
             bpe_vocab_path = None
+    # Decide if we should use beam-search hotword biasing
+    has_hot = bool(hotwords and hotwords_score > 0.0)
+    use_beam = has_hot and ("bpe" not in modeling_unit or bpe_vocab_path is not None)
     if use_beam:
             # Write hotword list to a temp file (one entry per line)
             tf = tempfile.NamedTemporaryFile(