File size: 3,009 Bytes
d545986
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5478378
d545986
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5478378
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import fitz  # PyMuPDF
import time
import requests
import os
from transformers import pipeline
from fpdf import FPDF
from datetime import datetime

def extract_text_from_pdf(file):
    if hasattr(file, 'read'):
        file_bytes = file.read()
    else:
        file_bytes = file
    with fitz.open(stream=file_bytes, filetype="pdf") as doc:
        return "\n".join(page.get_text() for page in doc)

def summarize_text(text):
    summarizer = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-16384-book-summary")
    chunks = [text[i:i+3000] for i in range(0, len(text), 3000)]
    summarized = summarizer(chunks, max_length=500, min_length=100, do_sample=False)
    return " ".join([chunk['summary_text'] for chunk in summarized])

def extract_fields_from_summary(summary):
    fields = {
        "Name": f"Contract Summary - {datetime.now().strftime('%Y%m%d%H%M%S')}",
        "Obligations": "",
        "Parties": "",
        "Payment Terms": "",
        "Start Date": "",
        "Termination Clause": "",
        "Validation Status": "Pending"
    }
    for line in summary.split("\n"):
        if "Obligation" in line:
            fields["Obligations"] = line
        elif "Parties" in line:
            fields["Parties"] = line
        elif "Payment" in line:
            fields["Payment Terms"] = line
        elif "Start Date" in line:
            fields["Start Date"] = line.split(":")[-1].strip()
        elif "Termination" in line:
            fields["Termination Clause"] = line
    return fields

def send_to_salesforce(summary_data):
    SF_INSTANCE = os.getenv("SALESFORCE_INSTANCE_URL", "https://orgfarm-86ce800028-dev-ed.develop.lightning.force.com")
    ACCESS_TOKEN = os.getenv("SALESFORCE_ACCESS_TOKEN", " ucoyW2Ou1X3qncBjuDoE92e0X")
    OBJECT_API = "/services/data/v60.0/sobjects/Contract_Summary__c/"
    url = SF_INSTANCE + OBJECT_API
    headers = {
        "Authorization": f"Bearer {ACCESS_TOKEN}",
        "Content-Type": "application/json"
    }
    payload = {
        "Name": summary_data.get("Name", "Untitled Contract"),
        "Obligations__c": summary_data.get("Obligations"),
        "Parties__c": summary_data.get("Parties"),
        "Payment_Terms__c": summary_data.get("Payment Terms"),
        "Start_Date__c": summary_data.get("Start Date"),
        "Termination_Clause__c": summary_data.get("Termination Clause"),
        "Validation_Status__c": summary_data.get("Validation Status") or "Pending"
    }
    response = requests.post(url, headers=headers, json=payload)
    if response.status_code >= 200 and response.status_code < 300:
        return response.json()
    else:
        raise RuntimeError(f"Salesforce API error: {response.status_code} - {response.text}")

def generate_pdf(summary_text):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    for line in summary_text.split('\n'):
        pdf.multi_cell(0, 10, line)
    filepath = f"/tmp/summary_{int(time.time())}.pdf"
    pdf.output(filepath)
    return filepath