PD03 commited on
Commit
93045b6
·
verified ·
1 Parent(s): 46e32da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -28
app.py CHANGED
@@ -1,9 +1,7 @@
1
- import os
2
  import gradio as gr
3
  import pandas as pd
4
  import tensorflow as tf
5
 
6
- # TAPAS imports
7
  from tapas.protos import interaction_pb2
8
  from tapas.utils import number_annotation_utils, tf_example_utils, prediction_utils
9
  from tapas.scripts.run_task_main import get_classifier_model, get_task_config
@@ -12,14 +10,11 @@ from tapas.scripts.run_task_main import get_classifier_model, get_task_config
12
  df = pd.read_csv("synthetic_profit.csv")
13
  df = df.astype(str)
14
 
15
- # 2) Build the “list of lists” table
16
- # (header row + all data rows)
17
  table = [list(df.columns)]
18
  table.extend(df.values.tolist())
19
 
20
- # 3) Prepare the TAPAS converter + model
21
- # – add_aggregation_candidates=True to surface SUM/AVG ops
22
- # – strip_column_names=False so your exact headers stay visible
23
  config = tf_example_utils.ClassifierConversionConfig(
24
  vocab_file="tapas_sqa_base/vocab.txt",
25
  max_seq_length=512,
@@ -30,8 +25,7 @@ config = tf_example_utils.ClassifierConversionConfig(
30
  )
31
  converter = tf_example_utils.ToClassifierTensorflowExample(config)
32
 
33
- # 4) Load your pretrained checkpoint
34
- # (uses the same flags as run_task_main.py --mode=predict)
35
  task_config = get_task_config(
36
  task="sqa",
37
  init_checkpoint="tapas_sqa_base/model.ckpt-0",
@@ -41,31 +35,28 @@ task_config = get_task_config(
41
  )
42
  model, tokenizer = get_classifier_model(task_config)
43
 
44
- # 5) Convert a single (table, query) into a TF Example
45
  def make_tf_example(table, query):
46
  interaction = interaction_pb2.Interaction()
47
- # a) question
48
  q = interaction.questions.add()
49
  q.original_text = query
50
- # b) columns
51
  for col in table[0]:
52
  interaction.table.columns.add().text = col
53
- # c) rows
54
  for row_vals in table[1:]:
55
  row = interaction.table.rows.add()
56
  for cell in row_vals:
57
  row.cells.add().text = cell
58
- # d) numeric annotation helps SUM/AVG
59
  number_annotation_utils.add_numeric_values(interaction)
60
- # e) convert to example
61
- serialized = converter.convert(interaction)
62
- return serialized
63
 
64
- # 6) Run TAPAS and parse its coordinate output
65
  def predict_answer(query):
66
- # build TF example
67
  example = make_tf_example(table, query)
68
- # run prediction
69
  input_fn = tf_example_utils.input_fn_builder(
70
  [example],
71
  is_training=False,
@@ -74,13 +65,8 @@ def predict_answer(query):
74
  seq_length=config.max_seq_length,
75
  )
76
  preds = model.predict(input_fn)
77
- # parse answer coordinates
78
  coords = prediction_utils.parse_coordinates(preds[0]["answer_coordinates"])
79
- # map back to table values
80
- answers = []
81
- for (r, c) in coords:
82
- # table[0] is header row, so data starts at index 1
83
- answers.append(table[r+1][c])
84
  return ", ".join(answers) if answers else "No answer found."
85
 
86
  # 7) Gradio interface
@@ -96,8 +82,8 @@ iface = gr.Interface(
96
  outputs=gr.Textbox(label="Answer"),
97
  title="SAP Profitability Q&A (TAPAS Low-Level)",
98
  description=(
99
- "Uses TAPAS’s Interaction + Converter APIs with aggregation candidates "
100
- "and numeric annotations to reliably answer sum/average queries."
101
  ),
102
  allow_flagging="never",
103
  )
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import tensorflow as tf
4
 
 
5
  from tapas.protos import interaction_pb2
6
  from tapas.utils import number_annotation_utils, tf_example_utils, prediction_utils
7
  from tapas.scripts.run_task_main import get_classifier_model, get_task_config
 
10
  df = pd.read_csv("synthetic_profit.csv")
11
  df = df.astype(str)
12
 
13
+ # 2) Build the “list of lists” table (header + rows)
 
14
  table = [list(df.columns)]
15
  table.extend(df.values.tolist())
16
 
17
+ # 3) Prepare the TAPAS converter with aggregation candidates
 
 
18
  config = tf_example_utils.ClassifierConversionConfig(
19
  vocab_file="tapas_sqa_base/vocab.txt",
20
  max_seq_length=512,
 
25
  )
26
  converter = tf_example_utils.ToClassifierTensorflowExample(config)
27
 
28
+ # 4) Load pretrained TAPAS checkpoint
 
29
  task_config = get_task_config(
30
  task="sqa",
31
  init_checkpoint="tapas_sqa_base/model.ckpt-0",
 
35
  )
36
  model, tokenizer = get_classifier_model(task_config)
37
 
38
+ # 5) Build a TF example from (table, query)
39
  def make_tf_example(table, query):
40
  interaction = interaction_pb2.Interaction()
41
+ # question
42
  q = interaction.questions.add()
43
  q.original_text = query
44
+ # columns
45
  for col in table[0]:
46
  interaction.table.columns.add().text = col
47
+ # rows
48
  for row_vals in table[1:]:
49
  row = interaction.table.rows.add()
50
  for cell in row_vals:
51
  row.cells.add().text = cell
52
+ # numeric annotation for SUM/AVG
53
  number_annotation_utils.add_numeric_values(interaction)
54
+ # convert to serialized Example
55
+ return converter.convert(interaction)
 
56
 
57
+ # 6) Run TAPAS & parse coordinates back to cell values
58
  def predict_answer(query):
 
59
  example = make_tf_example(table, query)
 
60
  input_fn = tf_example_utils.input_fn_builder(
61
  [example],
62
  is_training=False,
 
65
  seq_length=config.max_seq_length,
66
  )
67
  preds = model.predict(input_fn)
 
68
  coords = prediction_utils.parse_coordinates(preds[0]["answer_coordinates"])
69
+ answers = [ table[r+1][c] for (r, c) in coords ] # r+1 because row 0 is header
 
 
 
 
70
  return ", ".join(answers) if answers else "No answer found."
71
 
72
  # 7) Gradio interface
 
82
  outputs=gr.Textbox(label="Answer"),
83
  title="SAP Profitability Q&A (TAPAS Low-Level)",
84
  description=(
85
+ "TAPAS with aggregation candidates & numeric annotations—"
86
+ "robust sums/averages on your SAP data."
87
  ),
88
  allow_flagging="never",
89
  )