Issurance_Agent_Rag / parser.py
Rivalcoder
Update The Model issues and Prompt
6bc8549
raw
history blame
781 Bytes
import fitz # PyMuPDF
import requests
from io import BytesIO
import time
def parse_pdf_from_url(url):
res = requests.get(url)
doc = fitz.open(stream=BytesIO(res.content), filetype="pdf")
chunks = []
for page in doc:
text = page.get_text()
if text.strip():
chunks.append(text)
doc.close()
return chunks
def parse_pdf_from_file(file_path):
"""Parse a local PDF file and extract text chunks"""
try:
doc = fitz.open(file_path)
chunks = []
for page in doc:
text = page.get_text()
if text.strip():
chunks.append(text)
doc.close()
return chunks
except Exception as e:
raise Exception(f"Error parsing PDF file {file_path}: {str(e)}")