Spaces:
Running
Running
1. convert every hotword into zhcn for zhcn models
Browse files- app/asr_worker.py +4 -2
app/asr_worker.py
CHANGED
@@ -13,7 +13,8 @@ from sentencepiece import SentencePieceProcessor
|
|
13 |
CACHE_DIR = Path(__file__).parent / "hf_cache"
|
14 |
os.makedirs(CACHE_DIR, exist_ok=True)
|
15 |
|
16 |
-
|
|
|
17 |
|
18 |
# Streaming Zipformer model registry: paths relative to repo root
|
19 |
STREAMING_ZIPFORMER_MODELS = {
|
@@ -227,6 +228,7 @@ def create_recognizer(
|
|
227 |
for w in hotwords:
|
228 |
# Remove backslashes and angle-bracket tokens
|
229 |
clean = w.replace("\\", "").replace("<unk>", "").strip()
|
|
|
230 |
if clean: # only write non-empty lines
|
231 |
tf.write(f"{clean}\n")
|
232 |
tf.flush()
|
@@ -276,4 +278,4 @@ def stream_audio(raw_pcm_bytes, stream, recognizer, orig_sr):
|
|
276 |
if recognizer.is_ready(stream):
|
277 |
recognizer.decode_streams([stream])
|
278 |
result = recognizer.get_result(stream)
|
279 |
-
return
|
|
|
13 |
CACHE_DIR = Path(__file__).parent / "hf_cache"
|
14 |
os.makedirs(CACHE_DIR, exist_ok=True)
|
15 |
|
16 |
+
to_ZHTW = OpenCC('s2t')
|
17 |
+
to_ZHCN = OpenCC('t2s')
|
18 |
|
19 |
# Streaming Zipformer model registry: paths relative to repo root
|
20 |
STREAMING_ZIPFORMER_MODELS = {
|
|
|
228 |
for w in hotwords:
|
229 |
# Remove backslashes and angle-bracket tokens
|
230 |
clean = w.replace("\\", "").replace("<unk>", "").strip()
|
231 |
+
clean = to_ZHCN.convert(clean) # convert all hotword into zh-cn for zh-cn models
|
232 |
if clean: # only write non-empty lines
|
233 |
tf.write(f"{clean}\n")
|
234 |
tf.flush()
|
|
|
278 |
if recognizer.is_ready(stream):
|
279 |
recognizer.decode_streams([stream])
|
280 |
result = recognizer.get_result(stream)
|
281 |
+
return to_ZHTW.convert(result), rms
|