talk_to_data / app.py
PD03's picture
Update app.py
d162c32 verified
raw
history blame
3.41 kB
import os
import gradio as gr
import pandas as pd
import tensorflow as tf
# TAPAS imports
from tapas.protos import interaction_pb2
from tapas.utils import number_annotation_utils, tf_example_utils, prediction_utils
from tapas.scripts.run_task_main import get_classifier_model, get_task_config
# 1) Load & stringify your CSV
df = pd.read_csv("synthetic_profit.csv")
df = df.astype(str)
# 2) Build the “list of lists” table
# (header row + all data rows)
table = [list(df.columns)]
table.extend(df.values.tolist())
# 3) Prepare the TAPAS converter + model
# – add_aggregation_candidates=True to surface SUM/AVG ops
# – strip_column_names=False so your exact headers stay visible
config = tf_example_utils.ClassifierConversionConfig(
vocab_file="tapas_sqa_base/vocab.txt",
max_seq_length=512,
max_column_id=512,
max_row_id=512,
strip_column_names=False,
add_aggregation_candidates=True,
)
converter = tf_example_utils.ToClassifierTensorflowExample(config)
# 4) Load your pretrained checkpoint
# (uses the same flags as run_task_main.py --mode=predict)
task_config = get_task_config(
task="sqa",
init_checkpoint="tapas_sqa_base/model.ckpt-0",
vocab_file=config.vocab_file,
bsz=1,
max_seq_length=config.max_seq_length,
)
model, tokenizer = get_classifier_model(task_config)
# 5) Convert a single (table, query) into a TF Example
def make_tf_example(table, query):
interaction = interaction_pb2.Interaction()
# a) question
q = interaction.questions.add()
q.original_text = query
# b) columns
for col in table[0]:
interaction.table.columns.add().text = col
# c) rows
for row_vals in table[1:]:
row = interaction.table.rows.add()
for cell in row_vals:
row.cells.add().text = cell
# d) numeric annotation helps SUM/AVG
number_annotation_utils.add_numeric_values(interaction)
# e) convert to example
serialized = converter.convert(interaction)
return serialized
# 6) Run TAPAS and parse its coordinate output
def predict_answer(query):
# build TF example
example = make_tf_example(table, query)
# run prediction
input_fn = tf_example_utils.input_fn_builder(
[example],
is_training=False,
drop_remainder=False,
batch_size=1,
seq_length=config.max_seq_length,
)
preds = model.predict(input_fn)
# parse answer coordinates
coords = prediction_utils.parse_coordinates(preds[0]["answer_coordinates"])
# map back to table values
answers = []
for (r, c) in coords:
# table[0] is header row, so data starts at index 1
answers.append(table[r+1][c])
return ", ".join(answers) if answers else "No answer found."
# 7) Gradio interface
def answer_fn(question: str) -> str:
try:
return predict_answer(question)
except Exception as e:
return f"❌ Error: {e}"
iface = gr.Interface(
fn=answer_fn,
inputs=gr.Textbox(lines=2, label="Your question"),
outputs=gr.Textbox(label="Answer"),
title="SAP Profitability Q&A (TAPAS Low-Level)",
description=(
"Uses TAPAS’s Interaction + Converter APIs with aggregation candidates "
"and numeric annotations to reliably answer sum/average queries."
),
allow_flagging="never",
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)