PD03 commited on
Commit
b1f2bdd
·
verified ·
1 Parent(s): 53c503a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -64
app.py CHANGED
@@ -1,80 +1,83 @@
1
- import re
2
  import gradio as gr
3
  import pandas as pd
4
- from transformers import pipeline
 
 
 
 
 
 
5
 
6
- # Load data
7
  df = pd.read_csv("synthetic_profit.csv")
 
 
 
 
8
 
9
- # Prepare TAPAS fallback
10
- tapas = pipeline(
11
- "table-question-answering",
12
- model="google/tapas-base-finetuned-wtq",
13
- tokenizer="google/tapas-base-finetuned-wtq",
14
- device=-1
 
 
 
15
  )
16
- table = df.astype(str).to_dict(orient="records")
17
-
18
- # Helpers
19
- OPERATIONS = {"total": "sum", "sum": "sum", "average": "mean", "mean": "mean"}
20
- COLUMNS = {"revenue": "Revenue", "cost": "Cost", "profit": "Profit", "margin":"ProfitMargin","profit margin":"ProfitMargin"}
21
-
22
- def parse_and_compute(question: str):
23
- q = question.lower()
24
-
25
- # 1) detect operation
26
- op = next((OPERATIONS[k] for k in OPERATIONS if k in q), None)
27
- # 2) detect column
28
- col = next((COLUMNS[k] for k in COLUMNS if k in q), None)
29
- # 3) detect product by scanning your actual values
30
- prod = next((p for p in df["Product"].unique() if p.lower() in q), None)
31
- # 4) region
32
- region = next((r for r in df["Region"].unique() if r.lower() in q), None)
33
- # 5) year
34
- yr_match = re.search(r"\b(20\d{2})\b", q)
35
- year = int(yr_match.group(1)) if yr_match else None
36
- # 6) quarter
37
- qtr = next((x for x in df["FiscalQuarter"].unique() if x.lower() in q), None)
38
-
39
- # if any piece missing, we fallback
40
- if None in (op, col, prod, region, year, qtr):
41
- return None
42
-
43
- # filter & compute
44
- sub = df[
45
- (df["Product"] == prod) &
46
- (df["Region"] == region) &
47
- (df["FiscalYear"] == year) &
48
- (df["FiscalQuarter"]== qtr)
49
- ]
50
- try:
51
- val = getattr(sub[col], op)()
52
- except:
53
- return None
54
 
55
- return f"{op.capitalize()} {col} for {prod} in {region}, {qtr} {year}: {val:.2f}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- def answer(question: str) -> str:
58
- out = parse_and_compute(question)
59
- if out is not None:
60
- return out
 
61
 
62
- # fallback
63
- try:
64
- res = tapas(table=table, query=question)
65
- return res.get("answer", "No answer found.")
66
- except Exception as e:
67
- return f"❌ Error: {e}"
 
 
 
 
 
 
 
 
68
 
69
- # Gradio...
70
  iface = gr.Interface(
71
- fn=answer,
72
- inputs=gr.Textbox(lines=2, placeholder="e.g. What is the total revenue for Product A in EMEA in Q1 2024?"),
73
  outputs=gr.Textbox(lines=3),
74
- title="SAP Profitability Q&A",
75
- description="Basic total/average queries via Pandas+fallback to TAPAS",
 
 
 
76
  allow_flagging="never",
77
  )
78
 
79
- if __name__=="__main__":
80
  iface.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ import os
2
  import gradio as gr
3
  import pandas as pd
4
+ import tensorflow as tf
5
+ from tapas.scripts import prediction_utils
6
+ from tapas.utils import number_annotation_utils
7
+ from tapas.protos import interaction_pb2
8
+
9
+ # 1) Read CSV and build list-of-lists table
10
+ import pandas as pd
11
 
 
12
  df = pd.read_csv("synthetic_profit.csv")
13
+ # Ensure all values are strings
14
+ df = df.astype(str)
15
+ # Build TAPAS-style table: header row + data rows
16
+ table = [list(df.columns)] + df.values.tolist()
17
 
18
+ # 2) Configure TAPAS conversion with aggregation support
19
+ from tapas.utils import example_utils as tf_example_utils
20
+ config = tf_example_utils.ClassifierConversionConfig(
21
+ vocab_file="tapas_sqa_base/vocab.txt",
22
+ max_seq_length=512,
23
+ max_column_id=512,
24
+ max_row_id=512,
25
+ strip_column_names=False, # Keep header names
26
+ add_aggregation_candidates=True, # Propose SUM/AVERAGE operations
27
  )
28
+ converter = tf_example_utils.ToClassifierTensorflowExample(config)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ # 3) Helper: convert one interaction to model input
31
+ def interaction_from_query(question: str):
32
+ interaction = interaction_pb2.Interaction()
33
+ # Add question
34
+ q = interaction.questions.add()
35
+ q.original_text = question
36
+ # Add table columns
37
+ for col in table[0]:
38
+ interaction.table.columns.add().text = col
39
+ # Add table rows/cells
40
+ for row in table[1:]:
41
+ r = interaction.table.rows.add()
42
+ for cell in row:
43
+ r.cells.add().text = cell
44
+ # Annotate numeric values
45
+ number_annotation_utils.add_numeric_values(interaction)
46
+ return interaction
47
 
48
+ # 4) Instantiate TAPAS model and tokenizer
49
+ from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
50
+ MODEL = "google/tapas-base-finetuned-wtq"
51
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
52
+ model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
53
 
54
+ # 5) Prediction helper
55
+ def predict_answer(question: str):
56
+ interaction = interaction_from_query(question)
57
+ # Convert to TensorFlowExample
58
+ tf_example = converter.convert(interaction)
59
+ # Run prediction
60
+ result = model(tf_example.features)
61
+ # Parse answer coordinates
62
+ coords = prediction_utils.parse_coordinates(result.logits)
63
+ # Map coordinates back to table cells
64
+ answers = []
65
+ for r, c in coords:
66
+ answers.append(table[r+1][c])
67
+ return ", ".join(answers)
68
 
69
+ # 6) Gradio interface
70
  iface = gr.Interface(
71
+ fn=predict_answer,
72
+ inputs=gr.Textbox(lines=2, placeholder="Ask a question…"),
73
  outputs=gr.Textbox(lines=3),
74
+ title="SAP Profitability Q&A (TAPAS Low-Level)",
75
+ description=(
76
+ "Low-level TAPAS: list-of-lists input, numeric annotations, "
77
+ "aggregation candidates, and coordinate post-processing."
78
+ ),
79
  allow_flagging="never",
80
  )
81
 
82
+ if __name__ == "__main__":
83
  iface.launch(server_name="0.0.0.0", server_port=7860)