divython commited on
Commit
03410b4
·
verified ·
1 Parent(s): e490bc6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -37
app.py CHANGED
@@ -1,27 +1,15 @@
1
  import gradio as gr
2
- import yt_dlp
3
  import os
4
  import torch
5
  import gc
6
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
7
  import tempfile
8
- import whisper
9
-
10
- # Load summarizer
11
- @torch.no_grad()
12
- def load_summarizer():
13
- model_name = "facebook/bart-large-cnn"
14
- tokenizer = AutoTokenizer.from_pretrained(model_name)
15
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
16
- return pipeline("summarization", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
17
-
18
- summarizer = load_summarizer()
19
 
20
- # Load Whisper model
21
- whisper_model = whisper.load_model("base") # or "small" for better accuracy
22
 
23
  def download_audio(url: str, temp_dir: str) -> str:
24
- """Download audio using yt-dlp and return path"""
25
  output_path = os.path.join(temp_dir, "audio.%(ext)s")
26
  ydl_opts = {
27
  'format': 'bestaudio/best',
@@ -37,31 +25,21 @@ def download_audio(url: str, temp_dir: str) -> str:
37
  ydl.download([url])
38
  return output_path.replace('%(ext)s', 'mp3')
39
 
40
- def transcribe_audio(audio_path: str) -> str:
41
- """Transcribe audio with Whisper"""
42
- result = whisper_model.transcribe(audio_path)
43
- return result['text']
44
-
45
- def summarize_text(text: str) -> str:
46
- """Summarize text"""
47
- if len(text.strip()) < 50:
48
- return "❌ Transcription too short to summarize"
49
- gc.collect()
50
- if torch.cuda.is_available():
51
- torch.cuda.empty_cache()
52
- summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
53
- return summary[0]['summary_text']
54
-
55
  def process_video(url: str) -> str:
56
  with tempfile.TemporaryDirectory() as tmpdir:
57
  audio_path = download_audio(url, tmpdir)
58
- transcription = transcribe_audio(audio_path)
59
- summary = summarize_text(transcription)
60
- return summary
61
-
62
- def main(youtube_url):
63
- return process_video(youtube_url)
 
 
 
 
 
 
64
 
65
  iface = gr.Interface(fn=main, inputs="text", outputs="text", title="YouTube Audio Summarizer")
66
-
67
  iface.launch()
 
1
  import gradio as gr
 
2
  import os
3
  import torch
4
  import gc
5
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
6
  import tempfile
7
+ import yt_dlp
 
 
 
 
 
 
 
 
 
 
8
 
9
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
10
+ asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-base")
11
 
12
  def download_audio(url: str, temp_dir: str) -> str:
 
13
  output_path = os.path.join(temp_dir, "audio.%(ext)s")
14
  ydl_opts = {
15
  'format': 'bestaudio/best',
 
25
  ydl.download([url])
26
  return output_path.replace('%(ext)s', 'mp3')
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def process_video(url: str) -> str:
29
  with tempfile.TemporaryDirectory() as tmpdir:
30
  audio_path = download_audio(url, tmpdir)
31
+ transcription_result = asr_pipeline(audio_path)
32
+ text = transcription_result['text']
33
+ if len(text.strip()) < 50:
34
+ return "Transcription too short or unclear"
35
+ gc.collect()
36
+ if torch.cuda.is_available():
37
+ torch.cuda.empty_cache()
38
+ summary_result = summarizer(text, max_length=150, min_length=50, do_sample=False)
39
+ return summary_result[0]['summary_text']
40
+
41
+ def main(url):
42
+ return process_video(url)
43
 
44
  iface = gr.Interface(fn=main, inputs="text", outputs="text", title="YouTube Audio Summarizer")
 
45
  iface.launch()