divython commited on
Commit
c64a626
Β·
verified Β·
1 Parent(s): b3e2e09

Update app.py

Browse files

fourth changes

Files changed (1) hide show
  1. app.py +112 -23
app.py CHANGED
@@ -1,36 +1,125 @@
1
  import gradio as gr
2
  import pytube
3
  from transformers import pipeline
 
 
4
 
5
  # Initialize pipelines
6
  asr = pipeline("automatic-speech-recognition", model="openai/whisper-base", chunk_length_s=30)
7
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
8
 
9
- def summarize_youtube(url):
10
- # Download audio
11
- yt = pytube.YouTube(url)
12
- stream = yt.streams.filter(only_audio=True).first()
13
- stream.download(filename="audio.mp3")
14
-
15
- # Transcribe
16
- result = asr("audio.mp3")
17
- transcript = result["text"]
18
-
19
- # Summarize
20
- summary = summarizer(transcript, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
21
 
22
- # Embed video
23
- v_id = url.split("v=")[-1]
24
- embed_html = f'<iframe width="560" height="315" src="https://www.youtube.com/embed/{v_id}" frameborder="0" allowfullscreen></iframe>'
25
-
26
- return embed_html, transcript, summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # Build Gradio app
29
- with gr.Blocks() as demo:
30
- gr.Markdown("## πŸŽ“ Multi‑lingual YouTube Summarizer (Hindi / Hinglish / English)")
31
- url_input = gr.Textbox(label="YouTube URL")
32
- vid, txt, summ = gr.HTML(), gr.Textbox(label="Transcript"), gr.Textbox(label="Summary")
33
- btn = gr.Button("Summarize")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  btn.click(summarize_youtube, inputs=url_input, outputs=[vid, txt, summ])
 
 
 
 
 
 
 
 
35
 
36
- demo.launch()
 
 
1
  import gradio as gr
2
  import pytube
3
  from transformers import pipeline
4
+ import os
5
+ import re
6
 
7
  # Initialize pipelines
8
  asr = pipeline("automatic-speech-recognition", model="openai/whisper-base", chunk_length_s=30)
9
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
10
 
11
+ def extract_video_id(url):
12
+ """Extract video ID from various YouTube URL formats"""
13
+ patterns = [
14
+ r'(?:v=|\/)([0-9A-Za-z_-]{11}).*',
15
+ r'(?:embed\/)([0-9A-Za-z_-]{11})',
16
+ r'(?:v\/)([0-9A-Za-z_-]{11})'
17
+ ]
18
+ for pattern in patterns:
19
+ match = re.search(pattern, url)
20
+ if match:
21
+ return match.group(1)
22
+ return None
23
 
24
+ def summarize_youtube(url):
25
+ try:
26
+ # Clean up any existing audio file
27
+ if os.path.exists("audio.mp4"):
28
+ os.remove("audio.mp4")
29
+
30
+ # Create YouTube object with error handling
31
+ yt = pytube.YouTube(url, use_oauth=False, allow_oauth_cache=False)
32
+
33
+ # Get audio stream with better filtering
34
+ audio_streams = yt.streams.filter(only_audio=True, file_extension='mp4')
35
+ if not audio_streams:
36
+ # Fallback to any audio stream
37
+ audio_streams = yt.streams.filter(only_audio=True)
38
+
39
+ if not audio_streams:
40
+ return "❌ Error: No audio streams available", "Could not extract audio from video", "No summary available"
41
+
42
+ stream = audio_streams.first()
43
+
44
+ # Download with proper filename
45
+ audio_file = stream.download(filename="audio")
46
+
47
+ # Transcribe
48
+ result = asr(audio_file)
49
+ transcript = result["text"]
50
+
51
+ # Clean up audio file
52
+ if os.path.exists(audio_file):
53
+ os.remove(audio_file)
54
+
55
+ # Check transcript length for summarization
56
+ if len(transcript.split()) < 10:
57
+ return "❌ Error: Transcript too short", transcript, "Cannot summarize - transcript too brief"
58
+
59
+ # Summarize with better parameters
60
+ max_chunk = 1024 # BART's max input length
61
+ if len(transcript) > max_chuck:
62
+ # Split transcript into chunks if too long
63
+ words = transcript.split()
64
+ chunks = [' '.join(words[i:i+200]) for i in range(0, len(words), 200)]
65
+ summaries = []
66
+
67
+ for chunk in chunks[:3]: # Limit to first 3 chunks to avoid timeout
68
+ if len(chunk.strip()) > 50:
69
+ chunk_summary = summarizer(chunk, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
70
+ summaries.append(chunk_summary)
71
+
72
+ summary = " ".join(summaries)
73
+ else:
74
+ summary = summarizer(transcript, max_length=150, min_length=50, do_sample=False)[0]["summary_text"]
75
+
76
+ # Create embed HTML
77
+ v_id = extract_video_id(url)
78
+ if v_id:
79
+ embed_html = f'<iframe width="560" height="315" src="https://www.youtube.com/embed/{v_id}" frameborder="0" allowfullscreen></iframe>'
80
+ else:
81
+ embed_html = "❌ Could not extract video ID"
82
+
83
+ return embed_html, transcript, summary
84
+
85
+ except pytube.exceptions.RegexMatchError:
86
+ return "❌ Error: Invalid YouTube URL", "Please check the URL format", "No summary available"
87
+ except pytube.exceptions.VideoUnavailable:
88
+ return "❌ Error: Video unavailable", "Video may be private or deleted", "No summary available"
89
+ except Exception as e:
90
+ return f"❌ Error: {str(e)}", "An error occurred during processing", "No summary available"
91
 
92
  # Build Gradio app
93
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
94
+ gr.Markdown("## πŸŽ“ Multi-lingual YouTube Summarizer (Hindi / Hinglish / English)")
95
+ gr.Markdown("Enter a YouTube URL to get an AI-generated summary of the video content.")
96
+
97
+ with gr.Row():
98
+ with gr.Column():
99
+ url_input = gr.Textbox(
100
+ label="YouTube URL",
101
+ placeholder="https://www.youtube.com/watch?v=...",
102
+ lines=1
103
+ )
104
+ btn = gr.Button("πŸš€ Summarize Video", variant="primary")
105
+
106
+ with gr.Row():
107
+ with gr.Column():
108
+ vid = gr.HTML(label="Video Player")
109
+ with gr.Column():
110
+ with gr.Accordion("πŸ“ Transcript", open=False):
111
+ txt = gr.Textbox(label="Full Transcript", lines=10, max_lines=15)
112
+ summ = gr.Textbox(label="πŸ“‹ Summary", lines=5)
113
+
114
  btn.click(summarize_youtube, inputs=url_input, outputs=[vid, txt, summ])
115
+
116
+ # Add examples
117
+ gr.Examples(
118
+ examples=[
119
+ ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"], # Replace with actual examples
120
+ ],
121
+ inputs=url_input
122
+ )
123
 
124
+ if __name__ == "__main__":
125
+ demo.launch(share=True)