Spaces:

Rivalcoder
/

Issurance_Agent_Rag

Running

Rivalcoder

Update The Model issues and Prompt

eb87b3b 12 days ago

1.64 kB

	import fitz # PyMuPDF
	import requests
	from io import BytesIO
	import time

	def parse_pdf_from_url(url):
	start_time = time.time()
	print(f"Starting PDF download and parsing from URL...")

	download_start = time.time()
	res = requests.get(url)
	download_time = time.time() - download_start
	print(f"PDF Download took: {download_time:.2f} seconds")

	parse_start = time.time()
	doc = fitz.open(stream=BytesIO(res.content), filetype="pdf")
	chunks = []
	for page in doc:
	text = page.get_text()
	if text.strip():
	chunks.append(text)
	doc.close()
	parse_time = time.time() - parse_start
	print(f"PDF Text Extraction took: {parse_time:.2f} seconds")

	total_time = time.time() - start_time
	print(f"Total PDF parsing from URL took: {total_time:.2f} seconds")
	return chunks

	def parse_pdf_from_file(file_path):
	"""Parse a local PDF file and extract text chunks"""
	start_time = time.time()
	print(f"Starting PDF parsing from local file: {file_path}")

	try:
	doc = fitz.open(file_path)
	chunks = []
	for page in doc:
	text = page.get_text()
	if text.strip():
	chunks.append(text)
	doc.close()

	total_time = time.time() - start_time
	print(f"Total PDF parsing from file took: {total_time:.2f} seconds")
	return chunks
	except Exception as e:
	total_time = time.time() - start_time
	print(f"Error parsing PDF file after {total_time:.2f} seconds: {str(e)}")
	raise Exception(f"Error parsing PDF file {file_path}: {str(e)}")