Spaces:

PD03
/

talk_to_data

Running

App Files Files Community

PD03 commited on 12 days ago

Commit

53c503a

verified ·

1 Parent(s): 4790b2c

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -47

app.py CHANGED Viewed

@@ -3,10 +3,10 @@ import gradio as gr
 import pandas as pd
 from transformers import pipeline
-# 1) Load your data
 df = pd.read_csv("synthetic_profit.csv")
-# 2) Prepare the TAPAS fallback
 tapas = pipeline(
     "table-question-answering",
     model="google/tapas-base-finetuned-wtq",
@@ -15,82 +15,66 @@ tapas = pipeline(
 )
 table = df.astype(str).to_dict(orient="records")
-# 3) Helpers for parsing
-OPERATIONS = {
-    "total": "sum",
-    "sum":   "sum",
-    "average": "mean",
-    "mean":    "mean"
-}
-COLUMNS = {
-    "revenue":     "Revenue",
-    "cost":        "Cost",
-    "profit":      "Profit",
-    "margin":      "ProfitMargin",
-    "profit margin":"ProfitMargin"
-}
 def parse_and_compute(question: str):
     q = question.lower()
-    # 1) detect op
     op = next((OPERATIONS[k] for k in OPERATIONS if k in q), None)
     # 2) detect column
-    col = next((COLUMNS[k] for k in COLUMNS if k in q), None)
-    # 3) detect product (assumes "Product X")
-    m = re.search(r"product\s*([A-Za-z0-9]+)", q)
-    prod = f"Product {m.group(1)}" if m else None
-    # 4) detect region from known values
-    region = next((r for r in df["Region"].unique() if r.lower() in q), None)
-    # 5) detect year
-    y = re.search(r"\b(20\d{2})\b", q)
-    year = int(y.group(1)) if y else None
-    # 6) detect quarter
-    qtr = next((fq for fq in df["FiscalQuarter"].unique() if fq.lower() in q), None)
     if None in (op, col, prod, region, year, qtr):
-        return None  # fallback
-    # filter
     sub = df[
         (df["Product"] == prod) &
         (df["Region"]  == region) &
         (df["FiscalYear"]   == year) &
-        (df["FiscalQuarter"] == qtr)
     ]
-    # compute
     try:
         val = getattr(sub[col], op)()
-    except Exception:
         return None
     return f"{op.capitalize()} {col} for {prod} in {region}, {qtr} {year}: {val:.2f}"
-# 4) Main answer fn
 def answer(question: str) -> str:
-    res = parse_and_compute(question)
-    if res is not None:
-        return res
-    # fallback to TAPAS
     try:
-        out = tapas(table=table, query=question)
-        return out.get("answer", "No answer found.")
     except Exception as e:
         return f"❌ Error: {e}"
-# 5) Gradio UI
 iface = gr.Interface(
     fn=answer,
     inputs=gr.Textbox(lines=2, placeholder="e.g. What is the total revenue for Product A in EMEA in Q1 2024?"),
     outputs=gr.Textbox(lines=3),
     title="SAP Profitability Q&A",
-    description=(
-        "Supports any basic “total”/“average” question by parsing and computing via Pandas.  \n"
-        "Falls back to TAPAS for anything else."
-    ),
     allow_flagging="never",
 )
-if __name__ == "__main__":
     iface.launch(server_name="0.0.0.0", server_port=7860)

 import pandas as pd
 from transformers import pipeline
+# Load data
 df = pd.read_csv("synthetic_profit.csv")
+# Prepare TAPAS fallback
 tapas = pipeline(
     "table-question-answering",
     model="google/tapas-base-finetuned-wtq",
 )
 table = df.astype(str).to_dict(orient="records")
+# Helpers
+OPERATIONS = {"total": "sum", "sum": "sum", "average": "mean", "mean": "mean"}
+COLUMNS    = {"revenue": "Revenue", "cost": "Cost", "profit": "Profit", "margin":"ProfitMargin","profit margin":"ProfitMargin"}
 def parse_and_compute(question: str):
     q = question.lower()
+    # 1) detect operation
     op = next((OPERATIONS[k] for k in OPERATIONS if k in q), None)
     # 2) detect column
+    col = next((COLUMNS[k]   for k in COLUMNS    if k in q), None)
+    # 3) detect product by scanning your actual values
+    prod = next((p for p in df["Product"].unique() if p.lower() in q), None)
+    # 4) region
+    region = next((r for r in df["Region"].unique()  if r.lower() in q), None)
+    # 5) year
+    yr_match = re.search(r"\b(20\d{2})\b", q)
+    year = int(yr_match.group(1)) if yr_match else None
+    # 6) quarter
+    qtr = next((x for x in df["FiscalQuarter"].unique() if x.lower() in q), None)
+    # if any piece missing, we fallback
     if None in (op, col, prod, region, year, qtr):
+        return None
+    # filter & compute
     sub = df[
         (df["Product"] == prod) &
         (df["Region"]  == region) &
         (df["FiscalYear"]   == year) &
+        (df["FiscalQuarter"]== qtr)
     ]
     try:
         val = getattr(sub[col], op)()
+    except:
         return None
     return f"{op.capitalize()} {col} for {prod} in {region}, {qtr} {year}: {val:.2f}"
 def answer(question: str) -> str:
+    out = parse_and_compute(question)
+    if out is not None:
+        return out
+    # fallback
     try:
+        res = tapas(table=table, query=question)
+        return res.get("answer", "No answer found.")
     except Exception as e:
         return f"❌ Error: {e}"
+# Gradio...
 iface = gr.Interface(
     fn=answer,
     inputs=gr.Textbox(lines=2, placeholder="e.g. What is the total revenue for Product A in EMEA in Q1 2024?"),
     outputs=gr.Textbox(lines=3),
     title="SAP Profitability Q&A",
+    description="Basic total/average queries via Pandas+fallback to TAPAS",
     allow_flagging="never",
 )
+if __name__=="__main__":
     iface.launch(server_name="0.0.0.0", server_port=7860)