Sampathkumarbandaru's picture
Update utils.py
5478378 verified
import fitz # PyMuPDF
import time
import requests
import os
from transformers import pipeline
from fpdf import FPDF
from datetime import datetime
def extract_text_from_pdf(file):
if hasattr(file, 'read'):
file_bytes = file.read()
else:
file_bytes = file
with fitz.open(stream=file_bytes, filetype="pdf") as doc:
return "\n".join(page.get_text() for page in doc)
def summarize_text(text):
summarizer = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-16384-book-summary")
chunks = [text[i:i+3000] for i in range(0, len(text), 3000)]
summarized = summarizer(chunks, max_length=500, min_length=100, do_sample=False)
return " ".join([chunk['summary_text'] for chunk in summarized])
def extract_fields_from_summary(summary):
fields = {
"Name": f"Contract Summary - {datetime.now().strftime('%Y%m%d%H%M%S')}",
"Obligations": "",
"Parties": "",
"Payment Terms": "",
"Start Date": "",
"Termination Clause": "",
"Validation Status": "Pending"
}
for line in summary.split("\n"):
if "Obligation" in line:
fields["Obligations"] = line
elif "Parties" in line:
fields["Parties"] = line
elif "Payment" in line:
fields["Payment Terms"] = line
elif "Start Date" in line:
fields["Start Date"] = line.split(":")[-1].strip()
elif "Termination" in line:
fields["Termination Clause"] = line
return fields
def send_to_salesforce(summary_data):
SF_INSTANCE = os.getenv("SALESFORCE_INSTANCE_URL", "https://orgfarm-86ce800028-dev-ed.develop.lightning.force.com")
ACCESS_TOKEN = os.getenv("SALESFORCE_ACCESS_TOKEN", " ucoyW2Ou1X3qncBjuDoE92e0X")
OBJECT_API = "/services/data/v60.0/sobjects/Contract_Summary__c/"
url = SF_INSTANCE + OBJECT_API
headers = {
"Authorization": f"Bearer {ACCESS_TOKEN}",
"Content-Type": "application/json"
}
payload = {
"Name": summary_data.get("Name", "Untitled Contract"),
"Obligations__c": summary_data.get("Obligations"),
"Parties__c": summary_data.get("Parties"),
"Payment_Terms__c": summary_data.get("Payment Terms"),
"Start_Date__c": summary_data.get("Start Date"),
"Termination_Clause__c": summary_data.get("Termination Clause"),
"Validation_Status__c": summary_data.get("Validation Status") or "Pending"
}
response = requests.post(url, headers=headers, json=payload)
if response.status_code >= 200 and response.status_code < 300:
return response.json()
else:
raise RuntimeError(f"Salesforce API error: {response.status_code} - {response.text}")
def generate_pdf(summary_text):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
for line in summary_text.split('\n'):
pdf.multi_cell(0, 10, line)
filepath = f"/tmp/summary_{int(time.time())}.pdf"
pdf.output(filepath)
return filepath