Files changed (1) hide show
  1. whisper.py +3 -4
whisper.py CHANGED
@@ -107,7 +107,9 @@ def transcribe_pipeline(audio, task):
107
  return text
108
 
109
  def generate(audio_path, use_v2):
110
-
 
 
111
  if use_v2:
112
  split_stereo_channels(audio_path)
113
 
@@ -126,7 +128,6 @@ def generate(audio_path, use_v2):
126
  right_segs = [(seg["timestamp"][0], seg["timestamp"][1], "Speaker 2", post_process_transcription(seg["text"])) for seg in right_result["chunks"]]
127
 
128
  merged_transcript = sorted(left_segs + right_segs, key=lambda x: x[0])
129
- merged_text = " ".join([seg[3] for seg in merged_transcript])
130
 
131
  output = ""
132
  for start, end, speaker, text in merged_transcript:
@@ -134,14 +135,12 @@ def generate(audio_path, use_v2):
134
 
135
  else:
136
  audio = AudioSegment.from_wav(audio_path)
137
- temp_mono_path = None
138
 
139
  if audio.channels != 1: #stereo2mono
140
  audio = audio.set_channels(1)
141
  temp_mono_path = "temp_mono.wav"
142
  audio.export(temp_mono_path, format="wav")
143
  audio_path = temp_mono_path
144
- task = "transcribe"
145
  output = transcribe_pipeline(format_audio(audio_path), task)
146
 
147
  clean_output = post_process_transcription(output, max_repeats=1) #check
 
107
  return text
108
 
109
  def generate(audio_path, use_v2):
110
+ task = "transcribe
111
+ temp_mono_path = None
112
+
113
  if use_v2:
114
  split_stereo_channels(audio_path)
115
 
 
128
  right_segs = [(seg["timestamp"][0], seg["timestamp"][1], "Speaker 2", post_process_transcription(seg["text"])) for seg in right_result["chunks"]]
129
 
130
  merged_transcript = sorted(left_segs + right_segs, key=lambda x: x[0])
 
131
 
132
  output = ""
133
  for start, end, speaker, text in merged_transcript:
 
135
 
136
  else:
137
  audio = AudioSegment.from_wav(audio_path)
 
138
 
139
  if audio.channels != 1: #stereo2mono
140
  audio = audio.set_channels(1)
141
  temp_mono_path = "temp_mono.wav"
142
  audio.export(temp_mono_path, format="wav")
143
  audio_path = temp_mono_path
 
144
  output = transcribe_pipeline(format_audio(audio_path), task)
145
 
146
  clean_output = post_process_transcription(output, max_repeats=1) #check