import fitz # PyMuPDF import requests from io import BytesIO def parse_pdf_from_url(url): res = requests.get(url) doc = fitz.open(stream=BytesIO(res.content), filetype="pdf") chunks = [] for page in doc: text = page.get_text() if text.strip(): chunks.append(text) return chunks def parse_pdf_from_file(file_path): """Parse a local PDF file and extract text chunks""" try: doc = fitz.open(file_path) chunks = [] for page in doc: text = page.get_text() if text.strip(): chunks.append(text) doc.close() return chunks except Exception as e: raise Exception(f"Error parsing PDF file {file_path}: {str(e)}")