Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import pandas as pd | |
import tensorflow as tf | |
# TAPAS imports | |
from tapas.protos import interaction_pb2 | |
from tapas.utils import number_annotation_utils, tf_example_utils, prediction_utils | |
from tapas.scripts.run_task_main import get_classifier_model, get_task_config | |
# 1) Load & stringify your CSV | |
df = pd.read_csv("synthetic_profit.csv") | |
df = df.astype(str) | |
# 2) Build the “list of lists” table | |
# (header row + all data rows) | |
table = [list(df.columns)] | |
table.extend(df.values.tolist()) | |
# 3) Prepare the TAPAS converter + model | |
# – add_aggregation_candidates=True to surface SUM/AVG ops | |
# – strip_column_names=False so your exact headers stay visible | |
config = tf_example_utils.ClassifierConversionConfig( | |
vocab_file="tapas_sqa_base/vocab.txt", | |
max_seq_length=512, | |
max_column_id=512, | |
max_row_id=512, | |
strip_column_names=False, | |
add_aggregation_candidates=True, | |
) | |
converter = tf_example_utils.ToClassifierTensorflowExample(config) | |
# 4) Load your pretrained checkpoint | |
# (uses the same flags as run_task_main.py --mode=predict) | |
task_config = get_task_config( | |
task="sqa", | |
init_checkpoint="tapas_sqa_base/model.ckpt-0", | |
vocab_file=config.vocab_file, | |
bsz=1, | |
max_seq_length=config.max_seq_length, | |
) | |
model, tokenizer = get_classifier_model(task_config) | |
# 5) Convert a single (table, query) into a TF Example | |
def make_tf_example(table, query): | |
interaction = interaction_pb2.Interaction() | |
# a) question | |
q = interaction.questions.add() | |
q.original_text = query | |
# b) columns | |
for col in table[0]: | |
interaction.table.columns.add().text = col | |
# c) rows | |
for row_vals in table[1:]: | |
row = interaction.table.rows.add() | |
for cell in row_vals: | |
row.cells.add().text = cell | |
# d) numeric annotation helps SUM/AVG | |
number_annotation_utils.add_numeric_values(interaction) | |
# e) convert to example | |
serialized = converter.convert(interaction) | |
return serialized | |
# 6) Run TAPAS and parse its coordinate output | |
def predict_answer(query): | |
# build TF example | |
example = make_tf_example(table, query) | |
# run prediction | |
input_fn = tf_example_utils.input_fn_builder( | |
[example], | |
is_training=False, | |
drop_remainder=False, | |
batch_size=1, | |
seq_length=config.max_seq_length, | |
) | |
preds = model.predict(input_fn) | |
# parse answer coordinates | |
coords = prediction_utils.parse_coordinates(preds[0]["answer_coordinates"]) | |
# map back to table values | |
answers = [] | |
for (r, c) in coords: | |
# table[0] is header row, so data starts at index 1 | |
answers.append(table[r+1][c]) | |
return ", ".join(answers) if answers else "No answer found." | |
# 7) Gradio interface | |
def answer_fn(question: str) -> str: | |
try: | |
return predict_answer(question) | |
except Exception as e: | |
return f"❌ Error: {e}" | |
iface = gr.Interface( | |
fn=answer_fn, | |
inputs=gr.Textbox(lines=2, label="Your question"), | |
outputs=gr.Textbox(label="Answer"), | |
title="SAP Profitability Q&A (TAPAS Low-Level)", | |
description=( | |
"Uses TAPAS’s Interaction + Converter APIs with aggregation candidates " | |
"and numeric annotations to reliably answer sum/average queries." | |
), | |
allow_flagging="never", | |
) | |
if __name__ == "__main__": | |
iface.launch(server_name="0.0.0.0", server_port=7860) |