|
import openai |
|
import os |
|
from youtube_transcript_api import YouTubeTranscriptApi |
|
from dotenv import load_dotenv |
|
import fitz |
|
|
|
load_dotenv() |
|
openai.api_key = os.getenv("OPENAI_API_KEY") |
|
|
|
|
|
|
|
def extract_text_from_pdf(pdf_path): |
|
text = "" |
|
with fitz.open(pdf_path) as doc: |
|
for page in doc: |
|
text += page.get_text() |
|
return text |
|
|
|
|
|
|
|
def get_youtube_transcript(video_url): |
|
try: |
|
video_id = video_url.split("v=")[-1].split("&")[0] |
|
transcript = YouTubeTranscriptApi.get_transcript(video_id) |
|
full_text = " ".join([entry["text"] for entry in transcript]) |
|
return full_text |
|
except Exception as e: |
|
return f"Error fetching transcript: {str(e)}" |
|
|
|
|
|
|
|
def summarize_text(text, engine="gpt-3.5-turbo"): |
|
try: |
|
response = openai.ChatCompletion.create( |
|
model=engine, |
|
messages=[ |
|
{"role": "system", "content": "You are a helpful summarization assistant."}, |
|
{"role": "user", "content": f"Summarize this:\n{text}"} |
|
], |
|
temperature=0.5, |
|
max_tokens=500 |
|
) |
|
summary = response['choices'][0]['message']['content'] |
|
return summary |
|
except Exception as e: |
|
return f"Error during summarization: {str(e)}" |
|
|