Spaces:

sam-mata
/

Lecture-Transcriber

Running

App Files Files Community

sam-mata commited on Jul 11

Commit

174bd6d

1 Parent(s): 8d7ef26

Refined Project

Browse files

- Video Input
- Raw text output

Files changed (4) hide show

.gitignore +1 -0
README.md +0 -14
app.py +76 -23
requirements.txt +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

README.md DELETED Viewed

@@ -1,14 +0,0 @@
----
-title: Lecture Transcriber
-emoji: 🔥
-colorFrom: green
-colorTo: gray
-sdk: gradio
-sdk_version: 5.36.2
-app_file: app.py
-pinned: false
-license: afl-3.0
-short_description: Automatically transcribes lecture recordings.
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,35 +1,88 @@
 import gradio as gr
 import replicate
 import os
 REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")
-def process_audio(audio):
-    if audio is None:
-        return "No audio file uploaded."
-    output = replicate.run(
-        "victor-upmeet/whisperx:84d2ad2d6194fe98a17d2b60bef1c7f910c46b2f6fd38996ca457afd9c8abfcb",
-        input={"audio_file": open(audio, "rb")},
-        api_token=REPLICATE_API_TOKEN
-    )
-    segments = output.get("segments") if isinstance(output, dict) else output
     script = " ".join(seg["text"] for seg in segments) if segments else output.get("text", "No transcription found.")
     return script
-with gr.Blocks(theme="monochrome") as demo:
-    gr.Markdown("# AIML430 Lecture Transcription Tool")
-    gr.Markdown("Upload an audio file to begin.")
-    audio_input = gr.Audio(type="filepath", sources=["upload"], label="Audio File")
-    raw_text_output = gr.Textbox(
-        label="Raw Text Output",
-        show_copy_button=True,
-        lines=10
-    )
-    audio_input.change(
-        fn=process_audio,
-        inputs=audio_input,
-        outputs=raw_text_output
-    )
 demo.launch()

 import gradio as gr
 import replicate
 import os
+import tempfile
+from moviepy import VideoFileClip
 REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")
+def process_video(video_file):
+    if not video_file:
+        return "No video file uploaded."
+    temp_audio_file = None
+    try:
+        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
+            temp_audio_file = temp_audio.name
+        try:
+            video = VideoFileClip(video_file)
+            video.audio.write_audiofile(temp_audio_file, logger=None)
+        except Exception as e:
+            return f"Failed to extract audio from video: {e}"
+        with open(temp_audio_file, "rb") as audio_f:
+            output = replicate.run(
+                "victor-upmeet/whisperx:84d2ad2d6194fe98a17d2b60bef1c7f910c46b2f6fd38996ca457afd9c8abfcb",
+                input={"audio_file": audio_f},
+                language="en",
+                api_token=REPLICATE_API_TOKEN
+            )
+    finally:
+        if temp_audio_file and os.path.exists(temp_audio_file):
+            os.remove(temp_audio_file)
+    segments = output.get("segments") if isinstance(output, dict) else output
     script = " ".join(seg["text"] for seg in segments) if segments else output.get("text", "No transcription found.")
     return script
+with gr.Blocks(theme="monochrome", css="""
+.centered-container {
+    width: 80vw;
+    min-width: 400px;
+    max-width: 1100px;
+    margin-left: auto !important;
+    margin-right: auto !important;
+    margin-top: 2.5em;
+    margin-bottom: 2.5em;
+    background: var(--block-background-fill);
+    border-radius: 1.2em;
+    box-shadow: 0 0 16px 0 #0001;
+    padding: 2em 2em 2em 2em;
+}
+@media (max-width: 900px) {
+    .centered-container {
+        width: 98vw;
+        padding: 1em 0.5em 1em 0.5em;
+    }
+}
+.transcribe-btn-center {
+    display: flex;
+    justify-content: center;
+    margin-top: 1em;
+}
+""") as demo:
+    with gr.Column(elem_classes="centered-container"):
+        gr.Markdown("# AIML430 Lecture Transcription", elem_id="title")
+        gr.Markdown("## Upload a video file and click 'Transcribe' to begin.", elem_id="subtitle")
+        with gr.Row():
+            with gr.Column(scale=1, min_width=320):
+                video_input = gr.Video(
+                    label="Input Video File (.mp4)",
+                    interactive=True,
+                    sources=["upload"]
+                )
+                with gr.Row(elem_classes="transcribe-btn-center"):
+                    transcribe_btn = gr.Button("Transcribe", scale=0)
+            with gr.Column(scale=1, min_width=320):
+                text_output = gr.Textbox(
+                    label="Raw Text Output",
+                    show_copy_button=True,
+                    lines=16,
+                    interactive=False,
+                )
+        transcribe_btn.click(
+            fn=process_video,
+            inputs=video_input,
+            outputs=text_output
+        )
 demo.launch()

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 gradio==5.36.2
 replicate==1.0.7

 gradio==5.36.2
+moviepy==2.2.1
 replicate==1.0.7