Upload 5 files
Browse files- .env +4 -0
- app.py +61 -0
- requirements.txt +9 -0
- salesforce_ai_contract_summarizer (1).zip +3 -0
- utils.py +78 -0
.env
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# .env - Store secrets securely
|
2 |
+
|
3 |
+
SALESFORCE_INSTANCE_URL=https://orgfarm-86ce800028-dev-ed.develop.lightning.force.com
|
4 |
+
SALESFORCE_ACCESS_TOKEN=rv8ltEXanR65uLRD5nrs5VJBk
|
app.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import time
|
3 |
+
from utils import (
|
4 |
+
extract_text_from_pdf,
|
5 |
+
summarize_text,
|
6 |
+
generate_pdf,
|
7 |
+
extract_fields_from_summary,
|
8 |
+
send_to_salesforce
|
9 |
+
)
|
10 |
+
|
11 |
+
def process_and_summarize(file):
|
12 |
+
start_time = time.time()
|
13 |
+
try:
|
14 |
+
text = extract_text_from_pdf(file)
|
15 |
+
char_count = len(text)
|
16 |
+
summary = summarize_text(text)
|
17 |
+
fields = extract_fields_from_summary(summary)
|
18 |
+
sf_response = send_to_salesforce(fields)
|
19 |
+
pdf_path = generate_pdf(summary)
|
20 |
+
|
21 |
+
return summary, pdf_path, char_count, round(time.time() - start_time, 2), fields, sf_response
|
22 |
+
|
23 |
+
except Exception as e:
|
24 |
+
return f"Error: {str(e)}", None, 0, 0.0, {}, {}
|
25 |
+
|
26 |
+
iface = gr.Interface(
|
27 |
+
fn=process_and_summarize,
|
28 |
+
inputs=gr.File(label="📄 Upload Contract (PDF)"),
|
29 |
+
outputs=[
|
30 |
+
gr.Textbox(label="📝 Contract Summary", lines=15, elem_id="summary-box"),
|
31 |
+
gr.File(label="📥 Download PDF Summary"),
|
32 |
+
gr.Number(label="🧮 Character Count"),
|
33 |
+
gr.Number(label="⏱️ Processing Time (s)"),
|
34 |
+
gr.JSON(label="📤 Fields Sent to Salesforce"),
|
35 |
+
gr.JSON(label="📥 Salesforce Response"),
|
36 |
+
gr.HTML(
|
37 |
+
"""
|
38 |
+
<button onclick="printSummary()" style='margin-top:10px; padding:10px 20px; font-size:16px;'>🖨️ Print Summary</button>
|
39 |
+
<script>
|
40 |
+
function printSummary() {
|
41 |
+
const summary = document.querySelector("#summary-box textarea").value;
|
42 |
+
const win = window.open('', '', 'height=700,width=800');
|
43 |
+
win.document.write('<html><head><title>Contract Summary</title></head><body>');
|
44 |
+
win.document.write('<pre style="font-family: Arial; font-size: 14px;">' + summary + '</pre>');
|
45 |
+
win.document.write('</body></html>');
|
46 |
+
win.document.close();
|
47 |
+
win.focus();
|
48 |
+
win.print();
|
49 |
+
win.close();
|
50 |
+
}
|
51 |
+
</script>
|
52 |
+
"""
|
53 |
+
)
|
54 |
+
],
|
55 |
+
title="🧠 AI Contract Summarizer + Salesforce",
|
56 |
+
description="Upload a contract to summarize, download, print, and sync with Salesforce.",
|
57 |
+
allow_flagging="never"
|
58 |
+
)
|
59 |
+
|
60 |
+
if __name__ == "__main__":
|
61 |
+
iface.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
transformers
|
3 |
+
torch
|
4 |
+
fpdf
|
5 |
+
pymupdf
|
6 |
+
pytesseract
|
7 |
+
Pillow
|
8 |
+
requests
|
9 |
+
python-dotenv
|
salesforce_ai_contract_summarizer (1).zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c1494bcb4c75a3fab0cb29969f1772c65ce4f36c205f4265fe35d4b158245ab
|
3 |
+
size 3325
|
utils.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import fitz # PyMuPDF
|
2 |
+
import time
|
3 |
+
import requests
|
4 |
+
import os
|
5 |
+
from transformers import pipeline
|
6 |
+
from fpdf import FPDF
|
7 |
+
from datetime import datetime
|
8 |
+
|
9 |
+
def extract_text_from_pdf(file):
|
10 |
+
if hasattr(file, 'read'):
|
11 |
+
file_bytes = file.read()
|
12 |
+
else:
|
13 |
+
file_bytes = file
|
14 |
+
with fitz.open(stream=file_bytes, filetype="pdf") as doc:
|
15 |
+
return "\n".join(page.get_text() for page in doc)
|
16 |
+
|
17 |
+
def summarize_text(text):
|
18 |
+
summarizer = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-16384-book-summary")
|
19 |
+
chunks = [text[i:i+3000] for i in range(0, len(text), 3000)]
|
20 |
+
summarized = summarizer(chunks, max_length=500, min_length=100, do_sample=False)
|
21 |
+
return " ".join([chunk['summary_text'] for chunk in summarized])
|
22 |
+
|
23 |
+
def extract_fields_from_summary(summary):
|
24 |
+
fields = {
|
25 |
+
"Name": f"Contract Summary - {datetime.now().strftime('%Y%m%d%H%M%S')}",
|
26 |
+
"Obligations": "",
|
27 |
+
"Parties": "",
|
28 |
+
"Payment Terms": "",
|
29 |
+
"Start Date": "",
|
30 |
+
"Termination Clause": "",
|
31 |
+
"Validation Status": "Pending"
|
32 |
+
}
|
33 |
+
for line in summary.split("\n"):
|
34 |
+
if "Obligation" in line:
|
35 |
+
fields["Obligations"] = line
|
36 |
+
elif "Parties" in line:
|
37 |
+
fields["Parties"] = line
|
38 |
+
elif "Payment" in line:
|
39 |
+
fields["Payment Terms"] = line
|
40 |
+
elif "Start Date" in line:
|
41 |
+
fields["Start Date"] = line.split(":")[-1].strip()
|
42 |
+
elif "Termination" in line:
|
43 |
+
fields["Termination Clause"] = line
|
44 |
+
return fields
|
45 |
+
|
46 |
+
def send_to_salesforce(summary_data):
|
47 |
+
SF_INSTANCE = os.getenv("SALESFORCE_INSTANCE_URL", "https://orgfarm-86ce800028-dev-ed.develop.lightning.force.com")
|
48 |
+
ACCESS_TOKEN = os.getenv("SALESFORCE_ACCESS_TOKEN", "rv8ltEXanR65uLRD5nrs5VJBk")
|
49 |
+
OBJECT_API = "/services/data/v60.0/sobjects/Contract_Summary__c/"
|
50 |
+
url = SF_INSTANCE + OBJECT_API
|
51 |
+
headers = {
|
52 |
+
"Authorization": f"Bearer {ACCESS_TOKEN}",
|
53 |
+
"Content-Type": "application/json"
|
54 |
+
}
|
55 |
+
payload = {
|
56 |
+
"Name": summary_data.get("Name", "Untitled Contract"),
|
57 |
+
"Obligations__c": summary_data.get("Obligations"),
|
58 |
+
"Parties__c": summary_data.get("Parties"),
|
59 |
+
"Payment_Terms__c": summary_data.get("Payment Terms"),
|
60 |
+
"Start_Date__c": summary_data.get("Start Date"),
|
61 |
+
"Termination_Clause__c": summary_data.get("Termination Clause"),
|
62 |
+
"Validation_Status__c": summary_data.get("Validation Status") or "Pending"
|
63 |
+
}
|
64 |
+
response = requests.post(url, headers=headers, json=payload)
|
65 |
+
if response.status_code >= 200 and response.status_code < 300:
|
66 |
+
return response.json()
|
67 |
+
else:
|
68 |
+
raise RuntimeError(f"Salesforce API error: {response.status_code} - {response.text}")
|
69 |
+
|
70 |
+
def generate_pdf(summary_text):
|
71 |
+
pdf = FPDF()
|
72 |
+
pdf.add_page()
|
73 |
+
pdf.set_font("Arial", size=12)
|
74 |
+
for line in summary_text.split('\n'):
|
75 |
+
pdf.multi_cell(0, 10, line)
|
76 |
+
filepath = f"/tmp/summary_{int(time.time())}.pdf"
|
77 |
+
pdf.output(filepath)
|
78 |
+
return filepath
|