sam-mata commited on
Commit
174bd6d
·
1 Parent(s): 8d7ef26

Refined Project

Browse files

- Video Input
- Raw text output

Files changed (4) hide show
  1. .gitignore +1 -0
  2. README.md +0 -14
  3. app.py +76 -23
  4. requirements.txt +1 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
README.md DELETED
@@ -1,14 +0,0 @@
1
- ---
2
- title: Lecture Transcriber
3
- emoji: 🔥
4
- colorFrom: green
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.36.2
8
- app_file: app.py
9
- pinned: false
10
- license: afl-3.0
11
- short_description: Automatically transcribes lecture recordings.
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,35 +1,88 @@
1
  import gradio as gr
2
  import replicate
3
  import os
 
 
4
 
5
  REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")
6
 
7
- def process_audio(audio):
8
- if audio is None:
9
- return "No audio file uploaded."
10
- output = replicate.run(
11
- "victor-upmeet/whisperx:84d2ad2d6194fe98a17d2b60bef1c7f910c46b2f6fd38996ca457afd9c8abfcb",
12
- input={"audio_file": open(audio, "rb")},
13
- api_token=REPLICATE_API_TOKEN
14
- )
15
- segments = output.get("segments") if isinstance(output, dict) else output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
 
17
  script = " ".join(seg["text"] for seg in segments) if segments else output.get("text", "No transcription found.")
18
  return script
19
 
20
- with gr.Blocks(theme="monochrome") as demo:
21
- gr.Markdown("# AIML430 Lecture Transcription Tool")
22
- gr.Markdown("Upload an audio file to begin.")
23
- audio_input = gr.Audio(type="filepath", sources=["upload"], label="Audio File")
24
- raw_text_output = gr.Textbox(
25
- label="Raw Text Output",
26
- show_copy_button=True,
27
- lines=10
28
- )
29
- audio_input.change(
30
- fn=process_audio,
31
- inputs=audio_input,
32
- outputs=raw_text_output
33
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  demo.launch()
 
1
  import gradio as gr
2
  import replicate
3
  import os
4
+ import tempfile
5
+ from moviepy import VideoFileClip
6
 
7
  REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN")
8
 
9
+ def process_video(video_file):
10
+ if not video_file:
11
+ return "No video file uploaded."
12
+ temp_audio_file = None
13
+ try:
14
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
15
+ temp_audio_file = temp_audio.name
16
+ try:
17
+ video = VideoFileClip(video_file)
18
+ video.audio.write_audiofile(temp_audio_file, logger=None)
19
+ except Exception as e:
20
+ return f"Failed to extract audio from video: {e}"
21
+
22
+ with open(temp_audio_file, "rb") as audio_f:
23
+ output = replicate.run(
24
+ "victor-upmeet/whisperx:84d2ad2d6194fe98a17d2b60bef1c7f910c46b2f6fd38996ca457afd9c8abfcb",
25
+ input={"audio_file": audio_f},
26
+ language="en",
27
+ api_token=REPLICATE_API_TOKEN
28
+ )
29
+ finally:
30
+ if temp_audio_file and os.path.exists(temp_audio_file):
31
+ os.remove(temp_audio_file)
32
 
33
+ segments = output.get("segments") if isinstance(output, dict) else output
34
  script = " ".join(seg["text"] for seg in segments) if segments else output.get("text", "No transcription found.")
35
  return script
36
 
37
+ with gr.Blocks(theme="monochrome", css="""
38
+ .centered-container {
39
+ width: 80vw;
40
+ min-width: 400px;
41
+ max-width: 1100px;
42
+ margin-left: auto !important;
43
+ margin-right: auto !important;
44
+ margin-top: 2.5em;
45
+ margin-bottom: 2.5em;
46
+ background: var(--block-background-fill);
47
+ border-radius: 1.2em;
48
+ box-shadow: 0 0 16px 0 #0001;
49
+ padding: 2em 2em 2em 2em;
50
+ }
51
+ @media (max-width: 900px) {
52
+ .centered-container {
53
+ width: 98vw;
54
+ padding: 1em 0.5em 1em 0.5em;
55
+ }
56
+ }
57
+ .transcribe-btn-center {
58
+ display: flex;
59
+ justify-content: center;
60
+ margin-top: 1em;
61
+ }
62
+ """) as demo:
63
+ with gr.Column(elem_classes="centered-container"):
64
+ gr.Markdown("# AIML430 Lecture Transcription", elem_id="title")
65
+ gr.Markdown("## Upload a video file and click 'Transcribe' to begin.", elem_id="subtitle")
66
+ with gr.Row():
67
+ with gr.Column(scale=1, min_width=320):
68
+ video_input = gr.Video(
69
+ label="Input Video File (.mp4)",
70
+ interactive=True,
71
+ sources=["upload"]
72
+ )
73
+ with gr.Row(elem_classes="transcribe-btn-center"):
74
+ transcribe_btn = gr.Button("Transcribe", scale=0)
75
+ with gr.Column(scale=1, min_width=320):
76
+ text_output = gr.Textbox(
77
+ label="Raw Text Output",
78
+ show_copy_button=True,
79
+ lines=16,
80
+ interactive=False,
81
+ )
82
+ transcribe_btn.click(
83
+ fn=process_video,
84
+ inputs=video_input,
85
+ outputs=text_output
86
+ )
87
 
88
  demo.launch()
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  gradio==5.36.2
 
2
  replicate==1.0.7
 
1
  gradio==5.36.2
2
+ moviepy==2.2.1
3
  replicate==1.0.7