Spaces:
Running
on
Zero
Running
on
Zero
Update whisper.py
#10
by
ssolito
- opened
- whisper.py +3 -4
whisper.py
CHANGED
@@ -107,7 +107,9 @@ def transcribe_pipeline(audio, task):
|
|
107 |
return text
|
108 |
|
109 |
def generate(audio_path, use_v2):
|
110 |
-
|
|
|
|
|
111 |
if use_v2:
|
112 |
split_stereo_channels(audio_path)
|
113 |
|
@@ -126,7 +128,6 @@ def generate(audio_path, use_v2):
|
|
126 |
right_segs = [(seg["timestamp"][0], seg["timestamp"][1], "Speaker 2", post_process_transcription(seg["text"])) for seg in right_result["chunks"]]
|
127 |
|
128 |
merged_transcript = sorted(left_segs + right_segs, key=lambda x: x[0])
|
129 |
-
merged_text = " ".join([seg[3] for seg in merged_transcript])
|
130 |
|
131 |
output = ""
|
132 |
for start, end, speaker, text in merged_transcript:
|
@@ -134,14 +135,12 @@ def generate(audio_path, use_v2):
|
|
134 |
|
135 |
else:
|
136 |
audio = AudioSegment.from_wav(audio_path)
|
137 |
-
temp_mono_path = None
|
138 |
|
139 |
if audio.channels != 1: #stereo2mono
|
140 |
audio = audio.set_channels(1)
|
141 |
temp_mono_path = "temp_mono.wav"
|
142 |
audio.export(temp_mono_path, format="wav")
|
143 |
audio_path = temp_mono_path
|
144 |
-
task = "transcribe"
|
145 |
output = transcribe_pipeline(format_audio(audio_path), task)
|
146 |
|
147 |
clean_output = post_process_transcription(output, max_repeats=1) #check
|
|
|
107 |
return text
|
108 |
|
109 |
def generate(audio_path, use_v2):
|
110 |
+
task = "transcribe
|
111 |
+
temp_mono_path = None
|
112 |
+
|
113 |
if use_v2:
|
114 |
split_stereo_channels(audio_path)
|
115 |
|
|
|
128 |
right_segs = [(seg["timestamp"][0], seg["timestamp"][1], "Speaker 2", post_process_transcription(seg["text"])) for seg in right_result["chunks"]]
|
129 |
|
130 |
merged_transcript = sorted(left_segs + right_segs, key=lambda x: x[0])
|
|
|
131 |
|
132 |
output = ""
|
133 |
for start, end, speaker, text in merged_transcript:
|
|
|
135 |
|
136 |
else:
|
137 |
audio = AudioSegment.from_wav(audio_path)
|
|
|
138 |
|
139 |
if audio.channels != 1: #stereo2mono
|
140 |
audio = audio.set_channels(1)
|
141 |
temp_mono_path = "temp_mono.wav"
|
142 |
audio.export(temp_mono_path, format="wav")
|
143 |
audio_path = temp_mono_path
|
|
|
144 |
output = transcribe_pipeline(format_audio(audio_path), task)
|
145 |
|
146 |
clean_output = post_process_transcription(output, max_repeats=1) #check
|