PD03 commited on
Commit
4790b2c
·
verified ·
1 Parent(s): 00e6a04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -30
app.py CHANGED
@@ -1,51 +1,94 @@
1
- # app.py
2
-
3
  import gradio as gr
4
  import pandas as pd
5
  from transformers import pipeline
6
 
7
- # 1) Load your synthetic SAP data
8
  df = pd.read_csv("synthetic_profit.csv")
9
- table = df.astype(str).to_dict(orient="records")
10
 
11
- # 2) Use TAPEX fine-tuned on WikiTableQuestions
12
- qa = pipeline(
13
  "table-question-answering",
14
- model="microsoft/tapex-base-finetuned-wtq",
15
- tokenizer="microsoft/tapex-base-finetuned-wtq",
16
- device=-1 # CPU; switch to 0 if you have GPU
17
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- # 3) Few-shot examples teaching “filter + sum”
20
- EXAMPLES = """
21
- Example 1:
22
- Q: What is the total revenue for Product A in EMEA in Q1 2024?
23
- A: Filter Product=A & Region=EMEA & FiscalYear=2024 & FiscalQuarter=Q1, then sum Revenue → 3075162.49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- Example 2:
26
- Q: What is the total cost for Product A in EMEA in Q1 2024?
27
- A: Filter Product=A & Region=EMEA & FiscalYear=2024 & FiscalQuarter=Q1, then sum Cost → 2894321.75
28
 
29
- Example 3:
30
- Q: What is the total margin for Product A in EMEA in Q1 2024?
31
- A: Filter Product=A & Region=EMEA & FiscalYear=2024 & FiscalQuarter=Q1, then sum ProfitMargin → 0.18
32
- """
 
33
 
34
- def answer_question(question: str) -> str:
35
- prompt = EXAMPLES + f"\nQ: {question}\nA:"
36
  try:
37
- out = qa(table=table, query=prompt)
38
  return out.get("answer", "No answer found.")
39
  except Exception as e:
40
  return f"❌ Error: {e}"
41
 
42
- # 4) Gradio UI
43
  iface = gr.Interface(
44
- fn=answer_question,
45
- inputs=gr.Textbox(lines=2, placeholder="e.g. What is the total revenue for Product A in EMEA in Q1 2024?", label="Your question"),
46
- outputs=gr.Textbox(lines=3, label="Answer"),
47
- title="SAP Profitability Q&A (TAPEX)",
48
- description="Ask basic sum questions on SAP data, powered by TAPEX with few-shot examples.",
 
 
 
49
  allow_flagging="never",
50
  )
51
 
 
1
+ import re
 
2
  import gradio as gr
3
  import pandas as pd
4
  from transformers import pipeline
5
 
6
+ # 1) Load your data
7
  df = pd.read_csv("synthetic_profit.csv")
 
8
 
9
+ # 2) Prepare the TAPAS fallback
10
+ tapas = pipeline(
11
  "table-question-answering",
12
+ model="google/tapas-base-finetuned-wtq",
13
+ tokenizer="google/tapas-base-finetuned-wtq",
14
+ device=-1
15
  )
16
+ table = df.astype(str).to_dict(orient="records")
17
+
18
+ # 3) Helpers for parsing
19
+ OPERATIONS = {
20
+ "total": "sum",
21
+ "sum": "sum",
22
+ "average": "mean",
23
+ "mean": "mean"
24
+ }
25
+ COLUMNS = {
26
+ "revenue": "Revenue",
27
+ "cost": "Cost",
28
+ "profit": "Profit",
29
+ "margin": "ProfitMargin",
30
+ "profit margin":"ProfitMargin"
31
+ }
32
 
33
+ def parse_and_compute(question: str):
34
+ q = question.lower()
35
+ # 1) detect op
36
+ op = next((OPERATIONS[k] for k in OPERATIONS if k in q), None)
37
+ # 2) detect column
38
+ col = next((COLUMNS[k] for k in COLUMNS if k in q), None)
39
+ # 3) detect product (assumes "Product X")
40
+ m = re.search(r"product\s*([A-Za-z0-9]+)", q)
41
+ prod = f"Product {m.group(1)}" if m else None
42
+ # 4) detect region from known values
43
+ region = next((r for r in df["Region"].unique() if r.lower() in q), None)
44
+ # 5) detect year
45
+ y = re.search(r"\b(20\d{2})\b", q)
46
+ year = int(y.group(1)) if y else None
47
+ # 6) detect quarter
48
+ qtr = next((fq for fq in df["FiscalQuarter"].unique() if fq.lower() in q), None)
49
+
50
+ if None in (op, col, prod, region, year, qtr):
51
+ return None # fallback
52
+
53
+ # filter
54
+ sub = df[
55
+ (df["Product"] == prod) &
56
+ (df["Region"] == region) &
57
+ (df["FiscalYear"] == year) &
58
+ (df["FiscalQuarter"] == qtr)
59
+ ]
60
+
61
+ # compute
62
+ try:
63
+ val = getattr(sub[col], op)()
64
+ except Exception:
65
+ return None
66
 
67
+ return f"{op.capitalize()} {col} for {prod} in {region}, {qtr} {year}: {val:.2f}"
 
 
68
 
69
+ # 4) Main answer fn
70
+ def answer(question: str) -> str:
71
+ res = parse_and_compute(question)
72
+ if res is not None:
73
+ return res
74
 
75
+ # fallback to TAPAS
 
76
  try:
77
+ out = tapas(table=table, query=question)
78
  return out.get("answer", "No answer found.")
79
  except Exception as e:
80
  return f"❌ Error: {e}"
81
 
82
+ # 5) Gradio UI
83
  iface = gr.Interface(
84
+ fn=answer,
85
+ inputs=gr.Textbox(lines=2, placeholder="e.g. What is the total revenue for Product A in EMEA in Q1 2024?"),
86
+ outputs=gr.Textbox(lines=3),
87
+ title="SAP Profitability Q&A",
88
+ description=(
89
+ "Supports any basic “total”/“average” question by parsing and computing via Pandas. \n"
90
+ "Falls back to TAPAS for anything else."
91
+ ),
92
  allow_flagging="never",
93
  )
94