Spaces:
Running
on
Zero
Running
on
Zero
Update whisper_cs.py
Browse files- whisper_cs.py +5 -27
whisper_cs.py
CHANGED
@@ -105,6 +105,7 @@ def cleanup_temp_files(*file_paths):
|
|
105 |
if path and os.path.exists(path):
|
106 |
os.remove(path)
|
107 |
|
|
|
108 |
try:
|
109 |
faster_model = WhisperModel(
|
110 |
MODEL_PATH_V2_FAST,
|
@@ -118,6 +119,9 @@ except RuntimeError as e:
|
|
118 |
device="cpu",
|
119 |
compute_type="int8"
|
120 |
)
|
|
|
|
|
|
|
121 |
|
122 |
def load_whisper_model(model_path: str):
|
123 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -165,33 +169,7 @@ def transcribe_audio(model, audio_path: str) -> Dict:
|
|
165 |
'error': str(e),
|
166 |
'success': False
|
167 |
}
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
diarization_pipeline = DiarizationPipeline.from_pretrained("./pyannote/config.yaml")
|
172 |
-
align_model, metadata = whisperx.load_align_model(language_code="en", device=DEVICE)
|
173 |
-
|
174 |
-
asr_pipe = pipeline(
|
175 |
-
task="automatic-speech-recognition",
|
176 |
-
model=MODEL_PATH_1,
|
177 |
-
chunk_length_s=30,
|
178 |
-
device=DEVICE,
|
179 |
-
return_timestamps=True)
|
180 |
-
|
181 |
-
def diarization(audio_path):
|
182 |
-
diarization_result = diarization_pipeline(audio_path)
|
183 |
-
diarized_segments = list(diarization_result.itertracks(yield_label=True))
|
184 |
-
print('diarized_segments',diarized_segments)
|
185 |
-
return diarized_segments
|
186 |
-
|
187 |
-
def asr(audio_path):
|
188 |
-
print(f"[DEBUG] Starting ASR on audio: {audio_path}")
|
189 |
-
asr_result = asr_pipe(audio_path, return_timestamps=True)
|
190 |
-
print(f"[DEBUG] Raw ASR result: {asr_result}")
|
191 |
-
asr_segments = hf_chunks_to_whisperx_segments(asr_result['chunks'])
|
192 |
-
asr_segments = assign_timestamps(asr_segments, audio_path)
|
193 |
-
return asr_segments
|
194 |
-
|
195 |
|
196 |
def generate(audio_path, use_v2_fast):
|
197 |
|
|
|
105 |
if path and os.path.exists(path):
|
106 |
os.remove(path)
|
107 |
|
108 |
+
'''
|
109 |
try:
|
110 |
faster_model = WhisperModel(
|
111 |
MODEL_PATH_V2_FAST,
|
|
|
119 |
device="cpu",
|
120 |
compute_type="int8"
|
121 |
)
|
122 |
+
'''
|
123 |
+
|
124 |
+
faster_model = WhisperModel(MODEL_PATH_V2_FAST, device=DEVICE, compute_type="int8")
|
125 |
|
126 |
def load_whisper_model(model_path: str):
|
127 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
169 |
'error': str(e),
|
170 |
'success': False
|
171 |
}
|
172 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
|
174 |
def generate(audio_path, use_v2_fast):
|
175 |
|