Spaces:

sam-mata
/

Lecture-Transcriber

Running

File size: 3,324 Bytes

ee1c1bd
 
 
174bd6d
 
ee1c1bd
 
 
174bd6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be6da01
493e130
 
 
174bd6d
 
 
 
ee1c1bd
174bd6d
ee1c1bd
 
 
174bd6d
 
 
 
be6da01
174bd6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f9d625
174bd6d
 
 
 
 
 
493e130
174bd6d
 
 
1f9d625
174bd6d
 
 
 
be6da01
174bd6d
 
 
 
 
 
 
ee1c1bd
493e130

import gradio as gr
import replicate
import os
import tempfile
from moviepy import VideoFileClip

REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")

def process_video(video_file):
    if not video_file:
        return "No video file uploaded."
    temp_audio_file = None
    try:
        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
            temp_audio_file = temp_audio.name
        try:
            video = VideoFileClip(video_file)
            video.audio.write_audiofile(temp_audio_file, logger=None)
        except Exception as e:
            return f"Failed to extract audio from video: {e}"

        with open(temp_audio_file, "rb") as audio_f:
            output = replicate.run(
                "victor-upmeet/whisperx:84d2ad2d6194fe98a17d2b60bef1c7f910c46b2f6fd38996ca457afd9c8abfcb",
                input={"audio_file": audio_f},
                language="en",
                batch_size=512,
                api_token=REPLICATE_API_TOKEN,
                align_output=False,
                diarization=False
            )
    finally:
        if temp_audio_file and os.path.exists(temp_audio_file):
            os.remove(temp_audio_file)

    segments = output.get("segments") if isinstance(output, dict) else output
    script = " ".join(seg["text"] for seg in segments) if segments else output.get("text", "No transcription found.")
    return script

with gr.Blocks(theme="monochrome", css="""
.centered-container {
    width: 80vw;
    min-width: 400px;
    max-width: 1400px;
    margin-left: auto !important;
    margin-right: auto !important;
    margin-top: 2.5em;
    margin-bottom: 2.5em;
    background: var(--block-background-fill);
    border-radius: 1.2em;
    box-shadow: 0 0 16px 0 #0001;
    padding: 2em 2em 2em 2em;
}
@media (max-width: 900px) {
    .centered-container {
        width: 98vw;
        padding: 1em 0.5em 1em 0.5em;
    }
}
.transcribe-btn-center {
    display: flex;
    justify-content: center;
    margin-top: 1em;
}
""") as demo:
    with gr.Column(elem_classes="centered-container"):
        gr.Markdown("# Automatic Video Transcriber", elem_id="title")
        gr.Markdown("## Upload a video file and click 'Transcribe' to begin.", elem_id="subtitle")
        with gr.Row():
            with gr.Column(scale=1, min_width=320):
                video_input = gr.Video(
                    label="Input Video File (.mp4)",
                    interactive=True,
                    sources=["upload"],
                )
                with gr.Row(elem_classes="transcribe-btn-center"):
                    transcribe_btn = gr.Button("Transcribe", scale=0)
                gr.Markdown("### Please note that file uploads may take a few minutes to process due to network rate limits. A local version of this app is available [here](https://github.com/sam-mata/video-transcriber).", elem_id="note")
            with gr.Column(scale=1, min_width=320):
                text_output = gr.Textbox(
                    label="Raw Text Output",
                    show_copy_button=True,
                    lines=14,
                    interactive=False,
                )
        transcribe_btn.click(
            fn=process_video,
            inputs=video_input,
            outputs=text_output
        )

demo.launch(max_file_size="200MB")