sathwikabhavaraju2005 commited on
Commit
823bf5e
·
verified ·
1 Parent(s): 39d22a6

Create summarizer.py

Browse files
Files changed (1) hide show
  1. utils/summarizer.py +46 -0
utils/summarizer.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import os
3
+ from youtube_transcript_api import YouTubeTranscriptApi
4
+ from dotenv import load_dotenv
5
+ import fitz # PyMuPDF
6
+
7
+ load_dotenv()
8
+ openai.api_key = os.getenv("OPENAI_API_KEY")
9
+
10
+
11
+ # ---------- PDF Summarization ----------
12
+ def extract_text_from_pdf(pdf_path):
13
+ text = ""
14
+ with fitz.open(pdf_path) as doc:
15
+ for page in doc:
16
+ text += page.get_text()
17
+ return text
18
+
19
+
20
+ # ---------- YouTube Summarization ----------
21
+ def get_youtube_transcript(video_url):
22
+ try:
23
+ video_id = video_url.split("v=")[-1].split("&")[0]
24
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
25
+ full_text = " ".join([entry["text"] for entry in transcript])
26
+ return full_text
27
+ except Exception as e:
28
+ return f"Error fetching transcript: {str(e)}"
29
+
30
+
31
+ # ---------- Summarization using GPT ----------
32
+ def summarize_text(text, engine="gpt-3.5-turbo"):
33
+ try:
34
+ response = openai.ChatCompletion.create(
35
+ model=engine,
36
+ messages=[
37
+ {"role": "system", "content": "You are a helpful summarization assistant."},
38
+ {"role": "user", "content": f"Summarize this:\n{text}"}
39
+ ],
40
+ temperature=0.5,
41
+ max_tokens=500
42
+ )
43
+ summary = response['choices'][0]['message']['content']
44
+ return summary
45
+ except Exception as e:
46
+ return f"Error during summarization: {str(e)}"