Spaces:

PD03
/

talk_to_data

Sleeping

App Files Files Community

PD03 commited on 20 days ago

Commit

93045b6

verified ·

1 Parent(s): 46e32da

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -28

app.py CHANGED Viewed

@@ -1,9 +1,7 @@
-import os
 import gradio as gr
 import pandas as pd
 import tensorflow as tf
-# TAPAS imports
 from tapas.protos import interaction_pb2
 from tapas.utils import number_annotation_utils, tf_example_utils, prediction_utils
 from tapas.scripts.run_task_main import get_classifier_model, get_task_config
@@ -12,14 +10,11 @@ from tapas.scripts.run_task_main import get_classifier_model, get_task_config
 df = pd.read_csv("synthetic_profit.csv")
 df = df.astype(str)
-# 2) Build the “list of lists” table
-#    (header row + all data rows)
 table = [list(df.columns)]
 table.extend(df.values.tolist())
-# 3) Prepare the TAPAS converter + model
-#    – add_aggregation_candidates=True to surface SUM/AVG ops
-#    – strip_column_names=False so your exact headers stay visible
 config = tf_example_utils.ClassifierConversionConfig(
     vocab_file="tapas_sqa_base/vocab.txt",
     max_seq_length=512,
@@ -30,8 +25,7 @@ config = tf_example_utils.ClassifierConversionConfig(
 )
 converter = tf_example_utils.ToClassifierTensorflowExample(config)
-# 4) Load your pretrained checkpoint
-#    (uses the same flags as run_task_main.py --mode=predict)
 task_config = get_task_config(
     task="sqa",
     init_checkpoint="tapas_sqa_base/model.ckpt-0",
@@ -41,31 +35,28 @@ task_config = get_task_config(
 )
 model, tokenizer = get_classifier_model(task_config)
-# 5) Convert a single (table, query) into a TF Example
 def make_tf_example(table, query):
     interaction = interaction_pb2.Interaction()
-    # a) question
     q = interaction.questions.add()
     q.original_text = query
-    # b) columns
     for col in table[0]:
         interaction.table.columns.add().text = col
-    # c) rows
     for row_vals in table[1:]:
         row = interaction.table.rows.add()
         for cell in row_vals:
             row.cells.add().text = cell
-    # d) numeric annotation helps SUM/AVG
     number_annotation_utils.add_numeric_values(interaction)
-    # e) convert to example
-    serialized = converter.convert(interaction)
-    return serialized
-# 6) Run TAPAS and parse its coordinate output
 def predict_answer(query):
-    # build TF example
     example = make_tf_example(table, query)
-    # run prediction
     input_fn = tf_example_utils.input_fn_builder(
         [example],
         is_training=False,
@@ -74,13 +65,8 @@ def predict_answer(query):
         seq_length=config.max_seq_length,
     )
     preds = model.predict(input_fn)
-    # parse answer coordinates
     coords = prediction_utils.parse_coordinates(preds[0]["answer_coordinates"])
-    # map back to table values
-    answers = []
-    for (r, c) in coords:
-        # table[0] is header row, so data starts at index 1
-        answers.append(table[r+1][c])
     return ", ".join(answers) if answers else "No answer found."
 # 7) Gradio interface
@@ -96,8 +82,8 @@ iface = gr.Interface(
     outputs=gr.Textbox(label="Answer"),
     title="SAP Profitability Q&A (TAPAS Low-Level)",
     description=(
-        "Uses TAPAS’s Interaction + Converter APIs with aggregation candidates "
-        "and numeric annotations to reliably answer sum/average queries."
     ),
     allow_flagging="never",
 )

 import gradio as gr
 import pandas as pd
 import tensorflow as tf
 from tapas.protos import interaction_pb2
 from tapas.utils import number_annotation_utils, tf_example_utils, prediction_utils
 from tapas.scripts.run_task_main import get_classifier_model, get_task_config
 df = pd.read_csv("synthetic_profit.csv")
 df = df.astype(str)
+# 2) Build the “list of lists” table (header + rows)
 table = [list(df.columns)]
 table.extend(df.values.tolist())
+# 3) Prepare the TAPAS converter with aggregation candidates
 config = tf_example_utils.ClassifierConversionConfig(
     vocab_file="tapas_sqa_base/vocab.txt",
     max_seq_length=512,
 )
 converter = tf_example_utils.ToClassifierTensorflowExample(config)
+# 4) Load pretrained TAPAS checkpoint
 task_config = get_task_config(
     task="sqa",
     init_checkpoint="tapas_sqa_base/model.ckpt-0",
 )
 model, tokenizer = get_classifier_model(task_config)
+# 5) Build a TF example from (table, query)
 def make_tf_example(table, query):
     interaction = interaction_pb2.Interaction()
+    # question
     q = interaction.questions.add()
     q.original_text = query
+    # columns
     for col in table[0]:
         interaction.table.columns.add().text = col
+    # rows
     for row_vals in table[1:]:
         row = interaction.table.rows.add()
         for cell in row_vals:
             row.cells.add().text = cell
+    # numeric annotation for SUM/AVG
     number_annotation_utils.add_numeric_values(interaction)
+    # convert to serialized Example
+    return converter.convert(interaction)
+# 6) Run TAPAS & parse coordinates back to cell values
 def predict_answer(query):
     example = make_tf_example(table, query)
     input_fn = tf_example_utils.input_fn_builder(
         [example],
         is_training=False,
         seq_length=config.max_seq_length,
     )
     preds = model.predict(input_fn)
     coords = prediction_utils.parse_coordinates(preds[0]["answer_coordinates"])
+    answers = [ table[r+1][c] for (r, c) in coords ]  # r+1 because row 0 is header
     return ", ".join(answers) if answers else "No answer found."
 # 7) Gradio interface
     outputs=gr.Textbox(label="Answer"),
     title="SAP Profitability Q&A (TAPAS Low-Level)",
     description=(
+        "TAPAS with aggregation candidates & numeric annotations—"
+        "robust sums/averages on your SAP data."
     ),
     allow_flagging="never",
 )