DeeeeeeM
commited on
Commit
·
e8fae30
1
Parent(s):
5899607
added initial prompt
Browse files
app.py
CHANGED
@@ -11,11 +11,15 @@ import time
|
|
11 |
def process_media(
|
12 |
model_size, source_lang, upload, model_type,
|
13 |
max_chars, max_words, extend_in, extend_out, collapse_gaps,
|
14 |
-
max_lines_per_segment, line_penalty, longest_line_char_penalty,
|
|
|
|
|
15 |
):
|
|
|
|
|
|
|
16 |
start_time = time.time()
|
17 |
|
18 |
-
# ----- is file empty? checker ----- #
|
19 |
if upload is None:
|
20 |
return None, None, None, None
|
21 |
|
@@ -25,11 +29,26 @@ def process_media(
|
|
25 |
if model_type == "faster whisper":
|
26 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
27 |
model = stable_whisper.load_faster_whisper(model_size, device=device)
|
28 |
-
result = model.transcribe(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
else:
|
30 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
31 |
model = stable_whisper.load_model(model_size, device=device)
|
32 |
-
result = model.transcribe(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
#, batch_size=16, denoiser="demucs"
|
34 |
#result.save_as_json(word_transcription_path)
|
35 |
|
@@ -299,9 +318,8 @@ with gr.Blocks() as interface:
|
|
299 |
source_lang = gr.Dropdown(
|
300 |
choices=WHISPER_LANGUAGES,
|
301 |
label="Source Language",
|
302 |
-
value="tl",
|
303 |
-
interactive=True
|
304 |
-
allow_custom_value=False
|
305 |
)
|
306 |
model_type = gr.Dropdown(
|
307 |
choices=["faster whisper", "whisper"],
|
@@ -324,6 +342,12 @@ with gr.Blocks() as interface:
|
|
324 |
value="deepdml/faster-whisper-large-v3-turbo-ct2",
|
325 |
interactive=True
|
326 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
|
328 |
#Advanced Settings
|
329 |
with gr.Accordion("Advanced Settings", open=False):
|
|
|
11 |
def process_media(
|
12 |
model_size, source_lang, upload, model_type,
|
13 |
max_chars, max_words, extend_in, extend_out, collapse_gaps,
|
14 |
+
max_lines_per_segment, line_penalty, longest_line_char_penalty,
|
15 |
+
initial_prompt=None, #
|
16 |
+
*args
|
17 |
):
|
18 |
+
if not initial_prompt:
|
19 |
+
initial_prompt = None
|
20 |
+
|
21 |
start_time = time.time()
|
22 |
|
|
|
23 |
if upload is None:
|
24 |
return None, None, None, None
|
25 |
|
|
|
29 |
if model_type == "faster whisper":
|
30 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
31 |
model = stable_whisper.load_faster_whisper(model_size, device=device)
|
32 |
+
result = model.transcribe(
|
33 |
+
temp_path,
|
34 |
+
language=source_lang,
|
35 |
+
vad=True,
|
36 |
+
regroup=False,
|
37 |
+
no_speech_threshold=0.9,
|
38 |
+
initial_prompt=initial_prompt # <-- pass here
|
39 |
+
)
|
40 |
else:
|
41 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
42 |
model = stable_whisper.load_model(model_size, device=device)
|
43 |
+
result = model.transcribe(
|
44 |
+
temp_path,
|
45 |
+
language=source_lang,
|
46 |
+
vad=True,
|
47 |
+
regroup=False,
|
48 |
+
no_speech_threshold=0.9,
|
49 |
+
denoiser="demucs",
|
50 |
+
initial_prompt=initial_prompt # <-- pass here
|
51 |
+
)
|
52 |
#, batch_size=16, denoiser="demucs"
|
53 |
#result.save_as_json(word_transcription_path)
|
54 |
|
|
|
318 |
source_lang = gr.Dropdown(
|
319 |
choices=WHISPER_LANGUAGES,
|
320 |
label="Source Language",
|
321 |
+
value="tl",
|
322 |
+
interactive=True
|
|
|
323 |
)
|
324 |
model_type = gr.Dropdown(
|
325 |
choices=["faster whisper", "whisper"],
|
|
|
342 |
value="deepdml/faster-whisper-large-v3-turbo-ct2",
|
343 |
interactive=True
|
344 |
)
|
345 |
+
initial_prompt = gr.Textbox(
|
346 |
+
label="Initial Prompt (optional)",
|
347 |
+
lines=3,
|
348 |
+
placeholder="Add context, names, or style for the model here",
|
349 |
+
interactive=True
|
350 |
+
)
|
351 |
|
352 |
#Advanced Settings
|
353 |
with gr.Accordion("Advanced Settings", open=False):
|