Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
-
import os
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
4 |
import tensorflow as tf
|
5 |
|
6 |
-
# TAPAS imports
|
7 |
from tapas.protos import interaction_pb2
|
8 |
from tapas.utils import number_annotation_utils, tf_example_utils, prediction_utils
|
9 |
from tapas.scripts.run_task_main import get_classifier_model, get_task_config
|
@@ -12,14 +10,11 @@ from tapas.scripts.run_task_main import get_classifier_model, get_task_config
|
|
12 |
df = pd.read_csv("synthetic_profit.csv")
|
13 |
df = df.astype(str)
|
14 |
|
15 |
-
# 2) Build the “list of lists” table
|
16 |
-
# (header row + all data rows)
|
17 |
table = [list(df.columns)]
|
18 |
table.extend(df.values.tolist())
|
19 |
|
20 |
-
# 3) Prepare the TAPAS converter
|
21 |
-
# – add_aggregation_candidates=True to surface SUM/AVG ops
|
22 |
-
# – strip_column_names=False so your exact headers stay visible
|
23 |
config = tf_example_utils.ClassifierConversionConfig(
|
24 |
vocab_file="tapas_sqa_base/vocab.txt",
|
25 |
max_seq_length=512,
|
@@ -30,8 +25,7 @@ config = tf_example_utils.ClassifierConversionConfig(
|
|
30 |
)
|
31 |
converter = tf_example_utils.ToClassifierTensorflowExample(config)
|
32 |
|
33 |
-
# 4) Load
|
34 |
-
# (uses the same flags as run_task_main.py --mode=predict)
|
35 |
task_config = get_task_config(
|
36 |
task="sqa",
|
37 |
init_checkpoint="tapas_sqa_base/model.ckpt-0",
|
@@ -41,31 +35,28 @@ task_config = get_task_config(
|
|
41 |
)
|
42 |
model, tokenizer = get_classifier_model(task_config)
|
43 |
|
44 |
-
# 5)
|
45 |
def make_tf_example(table, query):
|
46 |
interaction = interaction_pb2.Interaction()
|
47 |
-
#
|
48 |
q = interaction.questions.add()
|
49 |
q.original_text = query
|
50 |
-
#
|
51 |
for col in table[0]:
|
52 |
interaction.table.columns.add().text = col
|
53 |
-
#
|
54 |
for row_vals in table[1:]:
|
55 |
row = interaction.table.rows.add()
|
56 |
for cell in row_vals:
|
57 |
row.cells.add().text = cell
|
58 |
-
#
|
59 |
number_annotation_utils.add_numeric_values(interaction)
|
60 |
-
#
|
61 |
-
|
62 |
-
return serialized
|
63 |
|
64 |
-
# 6) Run TAPAS
|
65 |
def predict_answer(query):
|
66 |
-
# build TF example
|
67 |
example = make_tf_example(table, query)
|
68 |
-
# run prediction
|
69 |
input_fn = tf_example_utils.input_fn_builder(
|
70 |
[example],
|
71 |
is_training=False,
|
@@ -74,13 +65,8 @@ def predict_answer(query):
|
|
74 |
seq_length=config.max_seq_length,
|
75 |
)
|
76 |
preds = model.predict(input_fn)
|
77 |
-
# parse answer coordinates
|
78 |
coords = prediction_utils.parse_coordinates(preds[0]["answer_coordinates"])
|
79 |
-
#
|
80 |
-
answers = []
|
81 |
-
for (r, c) in coords:
|
82 |
-
# table[0] is header row, so data starts at index 1
|
83 |
-
answers.append(table[r+1][c])
|
84 |
return ", ".join(answers) if answers else "No answer found."
|
85 |
|
86 |
# 7) Gradio interface
|
@@ -96,8 +82,8 @@ iface = gr.Interface(
|
|
96 |
outputs=gr.Textbox(label="Answer"),
|
97 |
title="SAP Profitability Q&A (TAPAS Low-Level)",
|
98 |
description=(
|
99 |
-
"
|
100 |
-
"
|
101 |
),
|
102 |
allow_flagging="never",
|
103 |
)
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
import tensorflow as tf
|
4 |
|
|
|
5 |
from tapas.protos import interaction_pb2
|
6 |
from tapas.utils import number_annotation_utils, tf_example_utils, prediction_utils
|
7 |
from tapas.scripts.run_task_main import get_classifier_model, get_task_config
|
|
|
10 |
df = pd.read_csv("synthetic_profit.csv")
|
11 |
df = df.astype(str)
|
12 |
|
13 |
+
# 2) Build the “list of lists” table (header + rows)
|
|
|
14 |
table = [list(df.columns)]
|
15 |
table.extend(df.values.tolist())
|
16 |
|
17 |
+
# 3) Prepare the TAPAS converter with aggregation candidates
|
|
|
|
|
18 |
config = tf_example_utils.ClassifierConversionConfig(
|
19 |
vocab_file="tapas_sqa_base/vocab.txt",
|
20 |
max_seq_length=512,
|
|
|
25 |
)
|
26 |
converter = tf_example_utils.ToClassifierTensorflowExample(config)
|
27 |
|
28 |
+
# 4) Load pretrained TAPAS checkpoint
|
|
|
29 |
task_config = get_task_config(
|
30 |
task="sqa",
|
31 |
init_checkpoint="tapas_sqa_base/model.ckpt-0",
|
|
|
35 |
)
|
36 |
model, tokenizer = get_classifier_model(task_config)
|
37 |
|
38 |
+
# 5) Build a TF example from (table, query)
|
39 |
def make_tf_example(table, query):
|
40 |
interaction = interaction_pb2.Interaction()
|
41 |
+
# question
|
42 |
q = interaction.questions.add()
|
43 |
q.original_text = query
|
44 |
+
# columns
|
45 |
for col in table[0]:
|
46 |
interaction.table.columns.add().text = col
|
47 |
+
# rows
|
48 |
for row_vals in table[1:]:
|
49 |
row = interaction.table.rows.add()
|
50 |
for cell in row_vals:
|
51 |
row.cells.add().text = cell
|
52 |
+
# numeric annotation for SUM/AVG
|
53 |
number_annotation_utils.add_numeric_values(interaction)
|
54 |
+
# convert to serialized Example
|
55 |
+
return converter.convert(interaction)
|
|
|
56 |
|
57 |
+
# 6) Run TAPAS & parse coordinates back to cell values
|
58 |
def predict_answer(query):
|
|
|
59 |
example = make_tf_example(table, query)
|
|
|
60 |
input_fn = tf_example_utils.input_fn_builder(
|
61 |
[example],
|
62 |
is_training=False,
|
|
|
65 |
seq_length=config.max_seq_length,
|
66 |
)
|
67 |
preds = model.predict(input_fn)
|
|
|
68 |
coords = prediction_utils.parse_coordinates(preds[0]["answer_coordinates"])
|
69 |
+
answers = [ table[r+1][c] for (r, c) in coords ] # r+1 because row 0 is header
|
|
|
|
|
|
|
|
|
70 |
return ", ".join(answers) if answers else "No answer found."
|
71 |
|
72 |
# 7) Gradio interface
|
|
|
82 |
outputs=gr.Textbox(label="Answer"),
|
83 |
title="SAP Profitability Q&A (TAPAS Low-Level)",
|
84 |
description=(
|
85 |
+
"TAPAS with aggregation candidates & numeric annotations—"
|
86 |
+
"robust sums/averages on your SAP data."
|
87 |
),
|
88 |
allow_flagging="never",
|
89 |
)
|