|
import fitz |
|
import time |
|
import requests |
|
import os |
|
from transformers import pipeline |
|
from fpdf import FPDF |
|
from datetime import datetime |
|
|
|
def extract_text_from_pdf(file): |
|
if hasattr(file, 'read'): |
|
file_bytes = file.read() |
|
else: |
|
file_bytes = file |
|
with fitz.open(stream=file_bytes, filetype="pdf") as doc: |
|
return "\n".join(page.get_text() for page in doc) |
|
|
|
def summarize_text(text): |
|
summarizer = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-16384-book-summary") |
|
chunks = [text[i:i+3000] for i in range(0, len(text), 3000)] |
|
summarized = summarizer(chunks, max_length=500, min_length=100, do_sample=False) |
|
return " ".join([chunk['summary_text'] for chunk in summarized]) |
|
|
|
def extract_fields_from_summary(summary): |
|
fields = { |
|
"Name": f"Contract Summary - {datetime.now().strftime('%Y%m%d%H%M%S')}", |
|
"Obligations": "", |
|
"Parties": "", |
|
"Payment Terms": "", |
|
"Start Date": "", |
|
"Termination Clause": "", |
|
"Validation Status": "Pending" |
|
} |
|
for line in summary.split("\n"): |
|
if "Obligation" in line: |
|
fields["Obligations"] = line |
|
elif "Parties" in line: |
|
fields["Parties"] = line |
|
elif "Payment" in line: |
|
fields["Payment Terms"] = line |
|
elif "Start Date" in line: |
|
fields["Start Date"] = line.split(":")[-1].strip() |
|
elif "Termination" in line: |
|
fields["Termination Clause"] = line |
|
return fields |
|
|
|
def send_to_salesforce(summary_data): |
|
SF_INSTANCE = os.getenv("SALESFORCE_INSTANCE_URL", "https://orgfarm-86ce800028-dev-ed.develop.lightning.force.com") |
|
ACCESS_TOKEN = os.getenv("SALESFORCE_ACCESS_TOKEN", " ucoyW2Ou1X3qncBjuDoE92e0X") |
|
OBJECT_API = "/services/data/v60.0/sobjects/Contract_Summary__c/" |
|
url = SF_INSTANCE + OBJECT_API |
|
headers = { |
|
"Authorization": f"Bearer {ACCESS_TOKEN}", |
|
"Content-Type": "application/json" |
|
} |
|
payload = { |
|
"Name": summary_data.get("Name", "Untitled Contract"), |
|
"Obligations__c": summary_data.get("Obligations"), |
|
"Parties__c": summary_data.get("Parties"), |
|
"Payment_Terms__c": summary_data.get("Payment Terms"), |
|
"Start_Date__c": summary_data.get("Start Date"), |
|
"Termination_Clause__c": summary_data.get("Termination Clause"), |
|
"Validation_Status__c": summary_data.get("Validation Status") or "Pending" |
|
} |
|
response = requests.post(url, headers=headers, json=payload) |
|
if response.status_code >= 200 and response.status_code < 300: |
|
return response.json() |
|
else: |
|
raise RuntimeError(f"Salesforce API error: {response.status_code} - {response.text}") |
|
|
|
def generate_pdf(summary_text): |
|
pdf = FPDF() |
|
pdf.add_page() |
|
pdf.set_font("Arial", size=12) |
|
for line in summary_text.split('\n'): |
|
pdf.multi_cell(0, 10, line) |
|
filepath = f"/tmp/summary_{int(time.time())}.pdf" |
|
pdf.output(filepath) |
|
return filepath |