Spaces:
Running
Running
import gradio as gr | |
import replicate | |
import os | |
import tempfile | |
from moviepy import VideoFileClip | |
REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN") | |
def process_video(video_file): | |
if not video_file: | |
return "No video file uploaded." | |
temp_audio_file = None | |
try: | |
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio: | |
temp_audio_file = temp_audio.name | |
try: | |
video = VideoFileClip(video_file) | |
video.audio.write_audiofile(temp_audio_file, logger=None) | |
except Exception as e: | |
return f"Failed to extract audio from video: {e}" | |
with open(temp_audio_file, "rb") as audio_f: | |
output = replicate.run( | |
"victor-upmeet/whisperx:84d2ad2d6194fe98a17d2b60bef1c7f910c46b2f6fd38996ca457afd9c8abfcb", | |
input={"audio_file": audio_f}, | |
language="en", | |
batch_size=512, | |
api_token=REPLICATE_API_TOKEN, | |
align_output=False, | |
diarization=False | |
) | |
finally: | |
if temp_audio_file and os.path.exists(temp_audio_file): | |
os.remove(temp_audio_file) | |
segments = output.get("segments") if isinstance(output, dict) else output | |
script = " ".join(seg["text"] for seg in segments) if segments else output.get("text", "No transcription found.") | |
return script | |
with gr.Blocks(theme="monochrome", css=""" | |
.centered-container { | |
width: 80vw; | |
min-width: 400px; | |
max-width: 1400px; | |
margin-left: auto !important; | |
margin-right: auto !important; | |
margin-top: 2.5em; | |
margin-bottom: 2.5em; | |
background: var(--block-background-fill); | |
border-radius: 1.2em; | |
box-shadow: 0 0 16px 0 #0001; | |
padding: 2em 2em 2em 2em; | |
} | |
@media (max-width: 900px) { | |
.centered-container { | |
width: 98vw; | |
padding: 1em 0.5em 1em 0.5em; | |
} | |
} | |
.transcribe-btn-center { | |
display: flex; | |
justify-content: center; | |
margin-top: 1em; | |
} | |
""") as demo: | |
with gr.Column(elem_classes="centered-container"): | |
gr.Markdown("# Automatic Video Transcriber", elem_id="title") | |
gr.Markdown("## Upload a video file and click 'Transcribe' to begin.", elem_id="subtitle") | |
with gr.Row(): | |
with gr.Column(scale=1, min_width=320): | |
video_input = gr.Video( | |
label="Input Video File (.mp4)", | |
interactive=True, | |
sources=["upload"], | |
) | |
with gr.Row(elem_classes="transcribe-btn-center"): | |
transcribe_btn = gr.Button("Transcribe", scale=0) | |
gr.Markdown("### Please note that file uploads may take a few minutes to process due to network rate limits. A local version of this app is available [here](https://github.com/sam-mata/video-transcriber).", elem_id="note") | |
with gr.Column(scale=1, min_width=320): | |
text_output = gr.Textbox( | |
label="Raw Text Output", | |
show_copy_button=True, | |
lines=14, | |
interactive=False, | |
) | |
transcribe_btn.click( | |
fn=process_video, | |
inputs=video_input, | |
outputs=text_output | |
) | |
demo.launch(max_file_size="200MB") |