DeeeeeeM commited on
Commit
e8fae30
·
1 Parent(s): 5899607

added initial prompt

Browse files
Files changed (1) hide show
  1. app.py +31 -7
app.py CHANGED
@@ -11,11 +11,15 @@ import time
11
  def process_media(
12
  model_size, source_lang, upload, model_type,
13
  max_chars, max_words, extend_in, extend_out, collapse_gaps,
14
- max_lines_per_segment, line_penalty, longest_line_char_penalty, *args
 
 
15
  ):
 
 
 
16
  start_time = time.time()
17
 
18
- # ----- is file empty? checker ----- #
19
  if upload is None:
20
  return None, None, None, None
21
 
@@ -25,11 +29,26 @@ def process_media(
25
  if model_type == "faster whisper":
26
  device = "cuda" if torch.cuda.is_available() else "cpu"
27
  model = stable_whisper.load_faster_whisper(model_size, device=device)
28
- result = model.transcribe(temp_path, language=source_lang, vad=True, regroup=False,no_speech_threshold=0.9)
 
 
 
 
 
 
 
29
  else:
30
  device = "cuda" if torch.cuda.is_available() else "cpu"
31
  model = stable_whisper.load_model(model_size, device=device)
32
- result = model.transcribe(temp_path, language=source_lang, vad=True, regroup=False, no_speech_threshold=0.9, denoiser="demucs")
 
 
 
 
 
 
 
 
33
  #, batch_size=16, denoiser="demucs"
34
  #result.save_as_json(word_transcription_path)
35
 
@@ -299,9 +318,8 @@ with gr.Blocks() as interface:
299
  source_lang = gr.Dropdown(
300
  choices=WHISPER_LANGUAGES,
301
  label="Source Language",
302
- value="tl",
303
- interactive=True,
304
- allow_custom_value=False
305
  )
306
  model_type = gr.Dropdown(
307
  choices=["faster whisper", "whisper"],
@@ -324,6 +342,12 @@ with gr.Blocks() as interface:
324
  value="deepdml/faster-whisper-large-v3-turbo-ct2",
325
  interactive=True
326
  )
 
 
 
 
 
 
327
 
328
  #Advanced Settings
329
  with gr.Accordion("Advanced Settings", open=False):
 
11
  def process_media(
12
  model_size, source_lang, upload, model_type,
13
  max_chars, max_words, extend_in, extend_out, collapse_gaps,
14
+ max_lines_per_segment, line_penalty, longest_line_char_penalty,
15
+ initial_prompt=None, #
16
+ *args
17
  ):
18
+ if not initial_prompt:
19
+ initial_prompt = None
20
+
21
  start_time = time.time()
22
 
 
23
  if upload is None:
24
  return None, None, None, None
25
 
 
29
  if model_type == "faster whisper":
30
  device = "cuda" if torch.cuda.is_available() else "cpu"
31
  model = stable_whisper.load_faster_whisper(model_size, device=device)
32
+ result = model.transcribe(
33
+ temp_path,
34
+ language=source_lang,
35
+ vad=True,
36
+ regroup=False,
37
+ no_speech_threshold=0.9,
38
+ initial_prompt=initial_prompt # <-- pass here
39
+ )
40
  else:
41
  device = "cuda" if torch.cuda.is_available() else "cpu"
42
  model = stable_whisper.load_model(model_size, device=device)
43
+ result = model.transcribe(
44
+ temp_path,
45
+ language=source_lang,
46
+ vad=True,
47
+ regroup=False,
48
+ no_speech_threshold=0.9,
49
+ denoiser="demucs",
50
+ initial_prompt=initial_prompt # <-- pass here
51
+ )
52
  #, batch_size=16, denoiser="demucs"
53
  #result.save_as_json(word_transcription_path)
54
 
 
318
  source_lang = gr.Dropdown(
319
  choices=WHISPER_LANGUAGES,
320
  label="Source Language",
321
+ value="tl",
322
+ interactive=True
 
323
  )
324
  model_type = gr.Dropdown(
325
  choices=["faster whisper", "whisper"],
 
342
  value="deepdml/faster-whisper-large-v3-turbo-ct2",
343
  interactive=True
344
  )
345
+ initial_prompt = gr.Textbox(
346
+ label="Initial Prompt (optional)",
347
+ lines=3,
348
+ placeholder="Add context, names, or style for the model here",
349
+ interactive=True
350
+ )
351
 
352
  #Advanced Settings
353
  with gr.Accordion("Advanced Settings", open=False):