Sampathkumarbandaru commited on
Commit
d545986
·
verified ·
1 Parent(s): 42078ad

Upload 5 files

Browse files
Files changed (5) hide show
  1. .env +4 -0
  2. app.py +61 -0
  3. requirements.txt +9 -0
  4. salesforce_ai_contract_summarizer (1).zip +3 -0
  5. utils.py +78 -0
.env ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # .env - Store secrets securely
2
+
3
+ SALESFORCE_INSTANCE_URL=https://orgfarm-86ce800028-dev-ed.develop.lightning.force.com
4
+ SALESFORCE_ACCESS_TOKEN=rv8ltEXanR65uLRD5nrs5VJBk
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import time
3
+ from utils import (
4
+ extract_text_from_pdf,
5
+ summarize_text,
6
+ generate_pdf,
7
+ extract_fields_from_summary,
8
+ send_to_salesforce
9
+ )
10
+
11
+ def process_and_summarize(file):
12
+ start_time = time.time()
13
+ try:
14
+ text = extract_text_from_pdf(file)
15
+ char_count = len(text)
16
+ summary = summarize_text(text)
17
+ fields = extract_fields_from_summary(summary)
18
+ sf_response = send_to_salesforce(fields)
19
+ pdf_path = generate_pdf(summary)
20
+
21
+ return summary, pdf_path, char_count, round(time.time() - start_time, 2), fields, sf_response
22
+
23
+ except Exception as e:
24
+ return f"Error: {str(e)}", None, 0, 0.0, {}, {}
25
+
26
+ iface = gr.Interface(
27
+ fn=process_and_summarize,
28
+ inputs=gr.File(label="📄 Upload Contract (PDF)"),
29
+ outputs=[
30
+ gr.Textbox(label="📝 Contract Summary", lines=15, elem_id="summary-box"),
31
+ gr.File(label="📥 Download PDF Summary"),
32
+ gr.Number(label="🧮 Character Count"),
33
+ gr.Number(label="⏱️ Processing Time (s)"),
34
+ gr.JSON(label="📤 Fields Sent to Salesforce"),
35
+ gr.JSON(label="📥 Salesforce Response"),
36
+ gr.HTML(
37
+ """
38
+ <button onclick="printSummary()" style='margin-top:10px; padding:10px 20px; font-size:16px;'>🖨️ Print Summary</button>
39
+ <script>
40
+ function printSummary() {
41
+ const summary = document.querySelector("#summary-box textarea").value;
42
+ const win = window.open('', '', 'height=700,width=800');
43
+ win.document.write('<html><head><title>Contract Summary</title></head><body>');
44
+ win.document.write('<pre style="font-family: Arial; font-size: 14px;">' + summary + '</pre>');
45
+ win.document.write('</body></html>');
46
+ win.document.close();
47
+ win.focus();
48
+ win.print();
49
+ win.close();
50
+ }
51
+ </script>
52
+ """
53
+ )
54
+ ],
55
+ title="🧠 AI Contract Summarizer + Salesforce",
56
+ description="Upload a contract to summarize, download, print, and sync with Salesforce.",
57
+ allow_flagging="never"
58
+ )
59
+
60
+ if __name__ == "__main__":
61
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch
4
+ fpdf
5
+ pymupdf
6
+ pytesseract
7
+ Pillow
8
+ requests
9
+ python-dotenv
salesforce_ai_contract_summarizer (1).zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c1494bcb4c75a3fab0cb29969f1772c65ce4f36c205f4265fe35d4b158245ab
3
+ size 3325
utils.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+ import time
3
+ import requests
4
+ import os
5
+ from transformers import pipeline
6
+ from fpdf import FPDF
7
+ from datetime import datetime
8
+
9
+ def extract_text_from_pdf(file):
10
+ if hasattr(file, 'read'):
11
+ file_bytes = file.read()
12
+ else:
13
+ file_bytes = file
14
+ with fitz.open(stream=file_bytes, filetype="pdf") as doc:
15
+ return "\n".join(page.get_text() for page in doc)
16
+
17
+ def summarize_text(text):
18
+ summarizer = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-16384-book-summary")
19
+ chunks = [text[i:i+3000] for i in range(0, len(text), 3000)]
20
+ summarized = summarizer(chunks, max_length=500, min_length=100, do_sample=False)
21
+ return " ".join([chunk['summary_text'] for chunk in summarized])
22
+
23
+ def extract_fields_from_summary(summary):
24
+ fields = {
25
+ "Name": f"Contract Summary - {datetime.now().strftime('%Y%m%d%H%M%S')}",
26
+ "Obligations": "",
27
+ "Parties": "",
28
+ "Payment Terms": "",
29
+ "Start Date": "",
30
+ "Termination Clause": "",
31
+ "Validation Status": "Pending"
32
+ }
33
+ for line in summary.split("\n"):
34
+ if "Obligation" in line:
35
+ fields["Obligations"] = line
36
+ elif "Parties" in line:
37
+ fields["Parties"] = line
38
+ elif "Payment" in line:
39
+ fields["Payment Terms"] = line
40
+ elif "Start Date" in line:
41
+ fields["Start Date"] = line.split(":")[-1].strip()
42
+ elif "Termination" in line:
43
+ fields["Termination Clause"] = line
44
+ return fields
45
+
46
+ def send_to_salesforce(summary_data):
47
+ SF_INSTANCE = os.getenv("SALESFORCE_INSTANCE_URL", "https://orgfarm-86ce800028-dev-ed.develop.lightning.force.com")
48
+ ACCESS_TOKEN = os.getenv("SALESFORCE_ACCESS_TOKEN", "rv8ltEXanR65uLRD5nrs5VJBk")
49
+ OBJECT_API = "/services/data/v60.0/sobjects/Contract_Summary__c/"
50
+ url = SF_INSTANCE + OBJECT_API
51
+ headers = {
52
+ "Authorization": f"Bearer {ACCESS_TOKEN}",
53
+ "Content-Type": "application/json"
54
+ }
55
+ payload = {
56
+ "Name": summary_data.get("Name", "Untitled Contract"),
57
+ "Obligations__c": summary_data.get("Obligations"),
58
+ "Parties__c": summary_data.get("Parties"),
59
+ "Payment_Terms__c": summary_data.get("Payment Terms"),
60
+ "Start_Date__c": summary_data.get("Start Date"),
61
+ "Termination_Clause__c": summary_data.get("Termination Clause"),
62
+ "Validation_Status__c": summary_data.get("Validation Status") or "Pending"
63
+ }
64
+ response = requests.post(url, headers=headers, json=payload)
65
+ if response.status_code >= 200 and response.status_code < 300:
66
+ return response.json()
67
+ else:
68
+ raise RuntimeError(f"Salesforce API error: {response.status_code} - {response.text}")
69
+
70
+ def generate_pdf(summary_text):
71
+ pdf = FPDF()
72
+ pdf.add_page()
73
+ pdf.set_font("Arial", size=12)
74
+ for line in summary_text.split('\n'):
75
+ pdf.multi_cell(0, 10, line)
76
+ filepath = f"/tmp/summary_{int(time.time())}.pdf"
77
+ pdf.output(filepath)
78
+ return filepath