Luigi commited on
Commit
c335e79
·
1 Parent(s): 69b815b

1. convert every hotword into zhcn for zhcn models

Browse files
Files changed (1) hide show
  1. app/asr_worker.py +4 -2
app/asr_worker.py CHANGED
@@ -13,7 +13,8 @@ from sentencepiece import SentencePieceProcessor
13
  CACHE_DIR = Path(__file__).parent / "hf_cache"
14
  os.makedirs(CACHE_DIR, exist_ok=True)
15
 
16
- converter = OpenCC('s2t')
 
17
 
18
  # Streaming Zipformer model registry: paths relative to repo root
19
  STREAMING_ZIPFORMER_MODELS = {
@@ -227,6 +228,7 @@ def create_recognizer(
227
  for w in hotwords:
228
  # Remove backslashes and angle-bracket tokens
229
  clean = w.replace("\\", "").replace("<unk>", "").strip()
 
230
  if clean: # only write non-empty lines
231
  tf.write(f"{clean}\n")
232
  tf.flush()
@@ -276,4 +278,4 @@ def stream_audio(raw_pcm_bytes, stream, recognizer, orig_sr):
276
  if recognizer.is_ready(stream):
277
  recognizer.decode_streams([stream])
278
  result = recognizer.get_result(stream)
279
- return converter.convert(result), rms
 
13
  CACHE_DIR = Path(__file__).parent / "hf_cache"
14
  os.makedirs(CACHE_DIR, exist_ok=True)
15
 
16
+ to_ZHTW = OpenCC('s2t')
17
+ to_ZHCN = OpenCC('t2s')
18
 
19
  # Streaming Zipformer model registry: paths relative to repo root
20
  STREAMING_ZIPFORMER_MODELS = {
 
228
  for w in hotwords:
229
  # Remove backslashes and angle-bracket tokens
230
  clean = w.replace("\\", "").replace("<unk>", "").strip()
231
+ clean = to_ZHCN.convert(clean) # convert all hotword into zh-cn for zh-cn models
232
  if clean: # only write non-empty lines
233
  tf.write(f"{clean}\n")
234
  tf.flush()
 
278
  if recognizer.is_ready(stream):
279
  recognizer.decode_streams([stream])
280
  result = recognizer.get_result(stream)
281
+ return to_ZHTW.convert(result), rms