Spaces:
Running
Running
Refined Project
Browse files- Video Input
- Raw text output
- .gitignore +1 -0
- README.md +0 -14
- app.py +76 -23
- requirements.txt +1 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.env
|
README.md
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Lecture Transcriber
|
3 |
-
emoji: 🔥
|
4 |
-
colorFrom: green
|
5 |
-
colorTo: gray
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 5.36.2
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: afl-3.0
|
11 |
-
short_description: Automatically transcribes lecture recordings.
|
12 |
-
---
|
13 |
-
|
14 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,35 +1,88 @@
|
|
1 |
import gradio as gr
|
2 |
import replicate
|
3 |
import os
|
|
|
|
|
4 |
|
5 |
REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")
|
6 |
|
7 |
-
def
|
8 |
-
if
|
9 |
-
return "No
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
|
|
17 |
script = " ".join(seg["text"] for seg in segments) if segments else output.get("text", "No transcription found.")
|
18 |
return script
|
19 |
|
20 |
-
with gr.Blocks(theme="monochrome"
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import replicate
|
3 |
import os
|
4 |
+
import tempfile
|
5 |
+
from moviepy import VideoFileClip
|
6 |
|
7 |
REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")
|
8 |
|
9 |
+
def process_video(video_file):
|
10 |
+
if not video_file:
|
11 |
+
return "No video file uploaded."
|
12 |
+
temp_audio_file = None
|
13 |
+
try:
|
14 |
+
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
|
15 |
+
temp_audio_file = temp_audio.name
|
16 |
+
try:
|
17 |
+
video = VideoFileClip(video_file)
|
18 |
+
video.audio.write_audiofile(temp_audio_file, logger=None)
|
19 |
+
except Exception as e:
|
20 |
+
return f"Failed to extract audio from video: {e}"
|
21 |
+
|
22 |
+
with open(temp_audio_file, "rb") as audio_f:
|
23 |
+
output = replicate.run(
|
24 |
+
"victor-upmeet/whisperx:84d2ad2d6194fe98a17d2b60bef1c7f910c46b2f6fd38996ca457afd9c8abfcb",
|
25 |
+
input={"audio_file": audio_f},
|
26 |
+
language="en",
|
27 |
+
api_token=REPLICATE_API_TOKEN
|
28 |
+
)
|
29 |
+
finally:
|
30 |
+
if temp_audio_file and os.path.exists(temp_audio_file):
|
31 |
+
os.remove(temp_audio_file)
|
32 |
|
33 |
+
segments = output.get("segments") if isinstance(output, dict) else output
|
34 |
script = " ".join(seg["text"] for seg in segments) if segments else output.get("text", "No transcription found.")
|
35 |
return script
|
36 |
|
37 |
+
with gr.Blocks(theme="monochrome", css="""
|
38 |
+
.centered-container {
|
39 |
+
width: 80vw;
|
40 |
+
min-width: 400px;
|
41 |
+
max-width: 1100px;
|
42 |
+
margin-left: auto !important;
|
43 |
+
margin-right: auto !important;
|
44 |
+
margin-top: 2.5em;
|
45 |
+
margin-bottom: 2.5em;
|
46 |
+
background: var(--block-background-fill);
|
47 |
+
border-radius: 1.2em;
|
48 |
+
box-shadow: 0 0 16px 0 #0001;
|
49 |
+
padding: 2em 2em 2em 2em;
|
50 |
+
}
|
51 |
+
@media (max-width: 900px) {
|
52 |
+
.centered-container {
|
53 |
+
width: 98vw;
|
54 |
+
padding: 1em 0.5em 1em 0.5em;
|
55 |
+
}
|
56 |
+
}
|
57 |
+
.transcribe-btn-center {
|
58 |
+
display: flex;
|
59 |
+
justify-content: center;
|
60 |
+
margin-top: 1em;
|
61 |
+
}
|
62 |
+
""") as demo:
|
63 |
+
with gr.Column(elem_classes="centered-container"):
|
64 |
+
gr.Markdown("# AIML430 Lecture Transcription", elem_id="title")
|
65 |
+
gr.Markdown("## Upload a video file and click 'Transcribe' to begin.", elem_id="subtitle")
|
66 |
+
with gr.Row():
|
67 |
+
with gr.Column(scale=1, min_width=320):
|
68 |
+
video_input = gr.Video(
|
69 |
+
label="Input Video File (.mp4)",
|
70 |
+
interactive=True,
|
71 |
+
sources=["upload"]
|
72 |
+
)
|
73 |
+
with gr.Row(elem_classes="transcribe-btn-center"):
|
74 |
+
transcribe_btn = gr.Button("Transcribe", scale=0)
|
75 |
+
with gr.Column(scale=1, min_width=320):
|
76 |
+
text_output = gr.Textbox(
|
77 |
+
label="Raw Text Output",
|
78 |
+
show_copy_button=True,
|
79 |
+
lines=16,
|
80 |
+
interactive=False,
|
81 |
+
)
|
82 |
+
transcribe_btn.click(
|
83 |
+
fn=process_video,
|
84 |
+
inputs=video_input,
|
85 |
+
outputs=text_output
|
86 |
+
)
|
87 |
|
88 |
demo.launch()
|
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
gradio==5.36.2
|
|
|
2 |
replicate==1.0.7
|
|
|
1 |
gradio==5.36.2
|
2 |
+
moviepy==2.2.1
|
3 |
replicate==1.0.7
|