import fitz # PyMuPDF import time import requests import os from transformers import pipeline from fpdf import FPDF from datetime import datetime def extract_text_from_pdf(file): if hasattr(file, 'read'): file_bytes = file.read() else: file_bytes = file with fitz.open(stream=file_bytes, filetype="pdf") as doc: return "\n".join(page.get_text() for page in doc) def summarize_text(text): summarizer = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-16384-book-summary") chunks = [text[i:i+3000] for i in range(0, len(text), 3000)] summarized = summarizer(chunks, max_length=500, min_length=100, do_sample=False) return " ".join([chunk['summary_text'] for chunk in summarized]) def extract_fields_from_summary(summary): fields = { "Name": f"Contract Summary - {datetime.now().strftime('%Y%m%d%H%M%S')}", "Obligations": "", "Parties": "", "Payment Terms": "", "Start Date": "", "Termination Clause": "", "Validation Status": "Pending" } for line in summary.split("\n"): if "Obligation" in line: fields["Obligations"] = line elif "Parties" in line: fields["Parties"] = line elif "Payment" in line: fields["Payment Terms"] = line elif "Start Date" in line: fields["Start Date"] = line.split(":")[-1].strip() elif "Termination" in line: fields["Termination Clause"] = line return fields def send_to_salesforce(summary_data): SF_INSTANCE = os.getenv("SALESFORCE_INSTANCE_URL", "https://orgfarm-86ce800028-dev-ed.develop.lightning.force.com") ACCESS_TOKEN = os.getenv("SALESFORCE_ACCESS_TOKEN", " ucoyW2Ou1X3qncBjuDoE92e0X") OBJECT_API = "/services/data/v60.0/sobjects/Contract_Summary__c/" url = SF_INSTANCE + OBJECT_API headers = { "Authorization": f"Bearer {ACCESS_TOKEN}", "Content-Type": "application/json" } payload = { "Name": summary_data.get("Name", "Untitled Contract"), "Obligations__c": summary_data.get("Obligations"), "Parties__c": summary_data.get("Parties"), "Payment_Terms__c": summary_data.get("Payment Terms"), "Start_Date__c": summary_data.get("Start Date"), "Termination_Clause__c": summary_data.get("Termination Clause"), "Validation_Status__c": summary_data.get("Validation Status") or "Pending" } response = requests.post(url, headers=headers, json=payload) if response.status_code >= 200 and response.status_code < 300: return response.json() else: raise RuntimeError(f"Salesforce API error: {response.status_code} - {response.text}") def generate_pdf(summary_text): pdf = FPDF() pdf.add_page() pdf.set_font("Arial", size=12) for line in summary_text.split('\n'): pdf.multi_cell(0, 10, line) filepath = f"/tmp/summary_{int(time.time())}.pdf" pdf.output(filepath) return filepath