Update app.py
Browse files
app.py
CHANGED
@@ -16,14 +16,16 @@ language = "zh"
|
|
16 |
pipe = pipeline(
|
17 |
task="automatic-speech-recognition",
|
18 |
model=MODEL_NAME,
|
19 |
-
chunk_length_s=30,
|
20 |
device=device,
|
21 |
generate_kwargs={
|
22 |
"no_repeat_ngram_size": 3,
|
23 |
-
"repetition_penalty": 1.
|
24 |
"temperature": 0.7,
|
25 |
-
"top_p": 0.
|
26 |
-
"top_k":
|
|
|
|
|
27 |
}
|
28 |
)
|
29 |
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=language, task="transcribe")
|
|
|
16 |
pipe = pipeline(
|
17 |
task="automatic-speech-recognition",
|
18 |
model=MODEL_NAME,
|
19 |
+
chunk_length_s=30, # Reduce chunk size for better memory handling
|
20 |
device=device,
|
21 |
generate_kwargs={
|
22 |
"no_repeat_ngram_size": 3,
|
23 |
+
"repetition_penalty": 1.15,
|
24 |
"temperature": 0.7,
|
25 |
+
"top_p": 0.97,
|
26 |
+
"top_k": 40,
|
27 |
+
"max_new_tokens": 300, # Reduced from 500 to avoid exceeding 448
|
28 |
+
"do_sample": True # Required for `top_p` and `top_k` to take effect
|
29 |
}
|
30 |
)
|
31 |
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=language, task="transcribe")
|