Spaces:

Issamohammed
/

Transcriber

Running

App Files Files Community

Issamohammed commited on Apr 17

Commit

f64cacf

verified ·

1 Parent(s): 37f7d1f

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -14

app.py CHANGED Viewed

@@ -2,13 +2,13 @@ import os
 import torch
 import gradio as gr
 import logging
 from pydub import AudioSegment
 from pydub.exceptions import CouldntDecodeError
 from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 from pathlib import Path
 from tempfile import NamedTemporaryFile
 from datetime import timedelta
-import time
 # Setup logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -21,6 +21,16 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
 SUPPORTED_FORMATS = {".wav", ".mp3", ".m4a"}
 # Initialize model and pipeline
 def initialize_pipeline():
     try:
@@ -46,38 +56,43 @@ def initialize_pipeline():
 # Convert audio if needed
 def convert_to_wav(audio_path: str) -> str:
     try:
         ext = str(Path(audio_path).suffix).lower()
         if ext not in SUPPORTED_FORMATS:
             raise ValueError(f"Unsupported audio format: {ext}. Supported formats: {', '.join(SUPPORTED_FORMATS)}")
         if ext != ".wav":
             audio = AudioSegment.from_file(audio_path)
             wav_path = str(Path(audio_path).with_suffix(".converted.wav"))
             audio.export(wav_path, format="wav")
             return wav_path
         return audio_path
     except CouldntDecodeError:
-        logger.error(f"Failed to decode audio file: {audio_path}")
-        raise ValueError("Audio file is corrupted or in an unsupported format.")
     except OSError as e:
         logger.error(f"OS error during audio conversion: {str(e)}")
-        raise ValueError("Failed to process audio file due to a system error.")
     except Exception as e:
-        logger.error(f"Unexpected error during audio conversion: {str(e)}")
-        raise ValueError("An unexpected error occurred while converting the audio.")
 # Split audio into chunks
 def split_audio(audio_path: str) -> list:
     try:
         audio = AudioSegment.from_file(audio_path)
         if len(audio) == 0:
-            raise ValueError("Audio file is empty or invalid.")
         return [audio[i:i + CHUNK_DURATION_MS] for i in range(0, len(audio), CHUNK_DURATION_MS)]
     except CouldntDecodeError:
         logger.error(f"Failed to decode audio for splitting: {audio_path}")
-        raise ValueError("Audio file is corrupted or in an unsupported format.")
     except Exception as e:
         logger.error(f"Failed to split audio: {str(e)}")
-        raise ValueError(f"Failed to process audio: {str(e)}")
 # Helper to compute chunk start time
 def get_chunk_time(index: int, chunk_duration_ms: int) -> str:
@@ -89,7 +104,7 @@ def transcribe(audio_path: str, include_timestamps: bool = False, progress=gr.Pr
     try:
         if not audio_path or not os.path.exists(audio_path):
             logger.warning("Invalid or missing audio file path.")
-            return "Please upload a valid audio file.", None
         # Convert to WAV if needed
         wav_path = convert_to_wav(audio_path)
@@ -110,7 +125,7 @@ def transcribe(audio_path: str, include_timestamps: bool = False, progress=gr.Pr
                     result = PIPELINE(temp_file.name,
                                    generate_kwargs={"task": "transcribe", "language": "sv"})
                     text = result["text"].strip()
-                    if text:  # Only append non-empty transcriptions
                         transcript.append(text)
                         if include_timestamps:
                             timestamp = get_chunk_time(i, CHUNK_DURATION_MS)
@@ -168,7 +183,7 @@ def transcribe(audio_path: str, include_timestamps: bool = False, progress=gr.Pr
         return str(e), None
     except Exception as e:
         logger.error(f"Unexpected error during transcription: {str(e)}")
-        return f"An unexpected error occurred: {str(e)}. Please try again or contact support.", None
 # Initialize pipeline globally
 try:
@@ -181,11 +196,11 @@ except RuntimeError as e:
 def create_interface():
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("# Swedish Whisper Transcriber")
-        gr.Markdown("Upload audio (.wav, .mp3, .m4a) for real-time Swedish speech transcription.")
         with gr.Row():
             with gr.Column():
-                audio_input = gr.Audio(type="filepath", label="Upload Audio")
                 timestamp_toggle = gr.Checkbox(label="Include Timestamps in Download", value=False)
                 transcribe_btn = gr.Button("Transcribe")
@@ -203,6 +218,9 @@ def create_interface():
 if __name__ == "__main__":
     try:
         create_interface().launch()
     except Exception as e:
         logger.critical(f"Failed to launch Gradio interface: {str(e)}")

 import torch
 import gradio as gr
 import logging
+import subprocess
 from pydub import AudioSegment
 from pydub.exceptions import CouldntDecodeError
 from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 from pathlib import Path
 from tempfile import NamedTemporaryFile
 from datetime import timedelta
 # Setup logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
 SUPPORTED_FORMATS = {".wav", ".mp3", ".m4a"}
+# Check for ffmpeg availability
+def check_ffmpeg():
+    try:
+        subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
+        logger.info("ffmpeg is installed and accessible.")
+        return True
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        logger.error("ffmpeg is not installed or not found in PATH.")
+        return False
 # Initialize model and pipeline
 def initialize_pipeline():
     try:
 # Convert audio if needed
 def convert_to_wav(audio_path: str) -> str:
     try:
+        if not check_ffmpeg():
+            raise RuntimeError("ffmpeg is required to process .m4a files. Please install ffmpeg and ensure it's in your PATH.")
         ext = str(Path(audio_path).suffix).lower()
         if ext not in SUPPORTED_FORMATS:
             raise ValueError(f"Unsupported audio format: {ext}. Supported formats: {', '.join(SUPPORTED_FORMATS)}")
         if ext != ".wav":
+            logger.info(f"Converting {ext} file to WAV: {audio_path}")
             audio = AudioSegment.from_file(audio_path)
             wav_path = str(Path(audio_path).with_suffix(".converted.wav"))
             audio.export(wav_path, format="wav")
+            logger.info(f"Conversion successful: {wav_path}")
             return wav_path
         return audio_path
     except CouldntDecodeError:
+        logger.error(f"Failed to decode .m4a file: {audio_path}")
+        raise ValueError("The .m4a file is corrupted or not supported. Ensure it's a valid iPhone recording and ffmpeg is installed.")
     except OSError as e:
         logger.error(f"OS error during audio conversion: {str(e)}")
+        raise ValueError("Failed to process the .m4a file due to a system error. Check file permissions or disk space.")
     except Exception as e:
+        logger.error(f"Unexpected error during .m4a conversion: {str(e)}")
+        raise ValueError(f"An unexpected error occurred while converting the .m4a file: {str(e)}")
 # Split audio into chunks
 def split_audio(audio_path: str) -> list:
     try:
         audio = AudioSegment.from_file(audio_path)
         if len(audio) == 0:
+            raise ValueError("The .m4a file is empty or invalid.")
+        logger.info(f"Splitting audio into {CHUNK_DURATION_MS/1000}-second chunks: {audio_path}")
         return [audio[i:i + CHUNK_DURATION_MS] for i in range(0, len(audio), CHUNK_DURATION_MS)]
     except CouldntDecodeError:
         logger.error(f"Failed to decode audio for splitting: {audio_path}")
+        raise ValueError("The .m4a file is corrupted or not supported. Ensure it's a valid iPhone recording.")
     except Exception as e:
         logger.error(f"Failed to split audio: {str(e)}")
+        raise ValueError(f"Failed to process the .m4a file: {str(e)}")
 # Helper to compute chunk start time
 def get_chunk_time(index: int, chunk_duration_ms: int) -> str:
     try:
         if not audio_path or not os.path.exists(audio_path):
             logger.warning("Invalid or missing audio file path.")
+            return "Please upload a valid .m4a file.", None
         # Convert to WAV if needed
         wav_path = convert_to_wav(audio_path)
                     result = PIPELINE(temp_file.name,
                                    generate_kwargs={"task": "transcribe", "language": "sv"})
                     text = result["text"].strip()
+                    if text:
                         transcript.append(text)
                         if include_timestamps:
                             timestamp = get_chunk_time(i, CHUNK_DURATION_MS)
         return str(e), None
     except Exception as e:
         logger.error(f"Unexpected error during transcription: {str(e)}")
+        return f"An unexpected error occurred while processing the .m4a file: {str(e)}. Please ensure the file is a valid iPhone recording and try again.", None
 # Initialize pipeline globally
 try:
 def create_interface():
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("# Swedish Whisper Transcriber")
+        gr.Markdown("Upload an .m4a file from your iPhone for real-time Swedish speech transcription.")
         with gr.Row():
             with gr.Column():
+                audio_input = gr.Audio(type="filepath", label="Upload .m4a Audio")
                 timestamp_toggle = gr.Checkbox(label="Include Timestamps in Download", value=False)
                 transcribe_btn = gr.Button("Transcribe")
 if __name__ == "__main__":
     try:
+        if not check_ffmpeg():
+            print("Error: ffmpeg is required to process .m4a files. Please install ffmpeg and ensure it's in your PATH.")
+            exit(1)
         create_interface().launch()
     except Exception as e:
         logger.critical(f"Failed to launch Gradio interface: {str(e)}")