Spaces:
Running
Running
File size: 3,324 Bytes
ee1c1bd 174bd6d ee1c1bd 174bd6d be6da01 493e130 174bd6d ee1c1bd 174bd6d ee1c1bd 174bd6d be6da01 174bd6d 1f9d625 174bd6d 493e130 174bd6d 1f9d625 174bd6d be6da01 174bd6d ee1c1bd 493e130 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import gradio as gr
import replicate
import os
import tempfile
from moviepy import VideoFileClip
REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")
def process_video(video_file):
if not video_file:
return "No video file uploaded."
temp_audio_file = None
try:
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
temp_audio_file = temp_audio.name
try:
video = VideoFileClip(video_file)
video.audio.write_audiofile(temp_audio_file, logger=None)
except Exception as e:
return f"Failed to extract audio from video: {e}"
with open(temp_audio_file, "rb") as audio_f:
output = replicate.run(
"victor-upmeet/whisperx:84d2ad2d6194fe98a17d2b60bef1c7f910c46b2f6fd38996ca457afd9c8abfcb",
input={"audio_file": audio_f},
language="en",
batch_size=512,
api_token=REPLICATE_API_TOKEN,
align_output=False,
diarization=False
)
finally:
if temp_audio_file and os.path.exists(temp_audio_file):
os.remove(temp_audio_file)
segments = output.get("segments") if isinstance(output, dict) else output
script = " ".join(seg["text"] for seg in segments) if segments else output.get("text", "No transcription found.")
return script
with gr.Blocks(theme="monochrome", css="""
.centered-container {
width: 80vw;
min-width: 400px;
max-width: 1400px;
margin-left: auto !important;
margin-right: auto !important;
margin-top: 2.5em;
margin-bottom: 2.5em;
background: var(--block-background-fill);
border-radius: 1.2em;
box-shadow: 0 0 16px 0 #0001;
padding: 2em 2em 2em 2em;
}
@media (max-width: 900px) {
.centered-container {
width: 98vw;
padding: 1em 0.5em 1em 0.5em;
}
}
.transcribe-btn-center {
display: flex;
justify-content: center;
margin-top: 1em;
}
""") as demo:
with gr.Column(elem_classes="centered-container"):
gr.Markdown("# Automatic Video Transcriber", elem_id="title")
gr.Markdown("## Upload a video file and click 'Transcribe' to begin.", elem_id="subtitle")
with gr.Row():
with gr.Column(scale=1, min_width=320):
video_input = gr.Video(
label="Input Video File (.mp4)",
interactive=True,
sources=["upload"],
)
with gr.Row(elem_classes="transcribe-btn-center"):
transcribe_btn = gr.Button("Transcribe", scale=0)
gr.Markdown("### Please note that file uploads may take a few minutes to process due to network rate limits. A local version of this app is available [here](https://github.com/sam-mata/video-transcriber).", elem_id="note")
with gr.Column(scale=1, min_width=320):
text_output = gr.Textbox(
label="Raw Text Output",
show_copy_button=True,
lines=14,
interactive=False,
)
transcribe_btn.click(
fn=process_video,
inputs=video_input,
outputs=text_output
)
demo.launch(max_file_size="200MB") |