Spaces:

chandinisaisri
/

formiq

Running

App Files Files Community

chandini2595 commited on May 17

Commit

f825473

1 Parent(s): 0545f86

Update: added handwritten extraction and product list storage

Browse files

Files changed (1) hide show

app.py +66 -13

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import streamlit as st
 import torch
-from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
 from PIL import Image
 import io
 import json
@@ -22,6 +22,9 @@ from torch.utils.tensorboard import SummaryWriter
 import matplotlib.pyplot as plt
 from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
 import matplotlib
 matplotlib.use('Agg')
 # Configure logging
@@ -94,32 +97,45 @@ def extract_fields(image_path):
     return results
 def extract_products(text):
-    # This pattern matches lines like: "1076903 PISTACHIO 14.49"
-    product_pattern = r"\d{6,} ([A-Z0-9 ]+) (\d+\.\d{2})"
     matches = re.findall(product_pattern, text)
-    products = [{"name": name.strip(), "price": float(price)} for name, price in matches]
     return products
 def extract_with_perplexity_llm(ocr_text):
     prompt = f"""
 You are an expert at extracting structured data from receipts.
-From the following OCR text, extract these fields and return them as a flat JSON object with exactly these keys:
 - name (customer name)
 - date (date of purchase)
-- amount_paid (total amount paid, or price if only one product)
 - receipt_no (receipt number)
-- product (the main product name, as a string; if multiple products, pick the most expensive or the only one)
-**Note:** If the receipt has only one product, set 'product' to its name and 'amount_paid' to its price. If there is a 'price' and an 'amount paid', treat them as the same if they are equal.
 Example output:
 {{
   "name": "Mrs. Genevieve Lopez",
   "date": "12/13/2024",
-  "amount_paid": 579.18,
   "receipt_no": "042085",
-  "product": "Wireless Airpods"
 }}
 Text:
@@ -142,9 +158,32 @@ Text:
     )
     return response.choices[0].message.content
 def save_to_dynamodb(data, table_name="Receipts"):
-    # ... existing code ...
-    # data["products"] is a list of dicts
     table.put_item(Item=data)
 def merge_extractions(regex_fields, llm_fields):
@@ -312,5 +351,19 @@ def main():
         else:
             st.info("Confusion matrix not found.")
 if __name__ == "__main__":
     main()

 import streamlit as st
 import torch
+from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification, TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
 import io
 import json
 import matplotlib.pyplot as plt
 from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
 import matplotlib
+import boto3
+from decimal import Decimal
+import uuid
 matplotlib.use('Agg')
 # Configure logging
     return results
 def extract_products(text):
+    # Pattern to match product lines with quantity, name, and price
+    # Example: "2 PISTACHIO 14.49" or "1076903 PISTACHIO 14.49"
+    product_pattern = r"(?:(\d+)\s+)?([A-Z0-9 ]+)\s+(\d+\.\d{2})"
     matches = re.findall(product_pattern, text)
+    products = []
+    for match in matches:
+        quantity, name, price = match
+        product = {
+            "name": name.strip(),
+            "price": float(price),
+            "quantity": int(quantity) if quantity else 1,
+            "total": float(price) * (int(quantity) if quantity else 1)
+        }
+        products.append(product)
     return products
 def extract_with_perplexity_llm(ocr_text):
     prompt = f"""
 You are an expert at extracting structured data from receipts.
+From the following OCR text, extract these fields and return them as a JSON object with exactly these keys:
 - name (customer name)
 - date (date of purchase)
+- amount_paid (total amount paid)
 - receipt_no (receipt number)
+- products (a list of all products, each with name, price, and quantity if available)
 Example output:
 {{
   "name": "Mrs. Genevieve Lopez",
   "date": "12/13/2024",
+  "amount_paid": 29.69,
   "receipt_no": "042085",
+  "products": [
+    {{"name": "Orange Juice", "price": 2.15, "quantity": 1}},
+    {{"name": "Apples", "price": 3.50, "quantity": 1}}
+  ]
 }}
 Text:
     )
     return response.choices[0].message.content
+def convert_floats_to_decimal(obj):
+    if isinstance(obj, float):
+        return Decimal(str(obj))
+    elif isinstance(obj, dict):
+        return {k: convert_floats_to_decimal(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [convert_floats_to_decimal(i) for i in obj]
+    else:
+        return obj
 def save_to_dynamodb(data, table_name="Receipts"):
+    dynamodb = boto3.resource('dynamodb')
+    table = dynamodb.Table(table_name)
+    # Calculate total amount if not provided
+    if "products" in data and not data.get("amount_paid"):
+        total = sum(product["total"] for product in data["products"])
+        data["amount_paid"] = total
+    # Convert all float values to Decimal for DynamoDB
+    data = convert_floats_to_decimal(data)
+    # Generate receipt number if not present
+    if not data.get("receipt_no"):
+        data["receipt_no"] = str(uuid.uuid4())
     table.put_item(Item=data)
 def merge_extractions(regex_fields, llm_fields):
         else:
             st.info("Confusion matrix not found.")
+    # Load model and processor
+    processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
+    model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
+    # Load your image (crop to handwritten region if possible)
+    image = Image.open('handwritten_sample.jpg').convert("RGB")
+    # Preprocess and predict
+    pixel_values = processor(images=image, return_tensors="pt").pixel_values
+    generated_ids = model.generate(pixel_values)
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    print("Handwritten text:", generated_text)
 if __name__ == "__main__":
     main()