Spaces:

PD03
/

talk_to_data

Sleeping

File size: 3,411 Bytes

b1f2bdd
a5ece8b
aa97025
b1f2bdd
 
d162c32
 
 
 
887b999
d162c32
0b8ba87
b1f2bdd
0e84c33
d162c32
 
 
 
 
 
 
 
b1f2bdd
 
 
 
 
d162c32
 
0b8ba87
b1f2bdd
02d55fb
d162c32
 
 
 
 
 
 
 
 
 
 
 
 
b1f2bdd
d162c32
b1f2bdd
d162c32
 
b1f2bdd
 
d162c32
 
 
 
 
 
b1f2bdd
d162c32
 
 
02d55fb
d162c32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1f2bdd
d162c32
 
b1f2bdd
d162c32
 
 
 
 
 
 
 
79c9d08
e784f1e
d162c32
 
 
b1f2bdd
 
d162c32
 
b1f2bdd
e784f1e
 
79c9d08
b1f2bdd
0e84c33

import os
import gradio as gr
import pandas as pd
import tensorflow as tf

# TAPAS imports
from tapas.protos import interaction_pb2
from tapas.utils import number_annotation_utils, tf_example_utils, prediction_utils
from tapas.scripts.run_task_main import get_classifier_model, get_task_config

# 1) Load & stringify your CSV
df = pd.read_csv("synthetic_profit.csv")
df = df.astype(str)

# 2) Build the “list of lists” table  
#    (header row + all data rows)
table = [list(df.columns)]
table.extend(df.values.tolist())

# 3) Prepare the TAPAS converter + model
#    – add_aggregation_candidates=True to surface SUM/AVG ops
#    – strip_column_names=False so your exact headers stay visible
config = tf_example_utils.ClassifierConversionConfig(
    vocab_file="tapas_sqa_base/vocab.txt",
    max_seq_length=512,
    max_column_id=512,
    max_row_id=512,
    strip_column_names=False,
    add_aggregation_candidates=True,
)
converter = tf_example_utils.ToClassifierTensorflowExample(config)

# 4) Load your pretrained checkpoint
#    (uses the same flags as run_task_main.py --mode=predict)
task_config = get_task_config(
    task="sqa",
    init_checkpoint="tapas_sqa_base/model.ckpt-0",
    vocab_file=config.vocab_file,
    bsz=1,
    max_seq_length=config.max_seq_length,
)
model, tokenizer = get_classifier_model(task_config)

# 5) Convert a single (table, query) into a TF Example
def make_tf_example(table, query):
    interaction = interaction_pb2.Interaction()
    # a) question
    q = interaction.questions.add()
    q.original_text = query
    # b) columns
    for col in table[0]:
        interaction.table.columns.add().text = col
    # c) rows
    for row_vals in table[1:]:
        row = interaction.table.rows.add()
        for cell in row_vals:
            row.cells.add().text = cell
    # d) numeric annotation helps SUM/AVG
    number_annotation_utils.add_numeric_values(interaction)
    # e) convert to example
    serialized = converter.convert(interaction)
    return serialized

# 6) Run TAPAS and parse its coordinate output
def predict_answer(query):
    # build TF example
    example = make_tf_example(table, query)
    # run prediction
    input_fn = tf_example_utils.input_fn_builder(
        [example],
        is_training=False,
        drop_remainder=False,
        batch_size=1,
        seq_length=config.max_seq_length,
    )
    preds = model.predict(input_fn)
    # parse answer coordinates
    coords = prediction_utils.parse_coordinates(preds[0]["answer_coordinates"])
    # map back to table values
    answers = []
    for (r, c) in coords:
        # table[0] is header row, so data starts at index 1
        answers.append(table[r+1][c])
    return ", ".join(answers) if answers else "No answer found."

# 7) Gradio interface
def answer_fn(question: str) -> str:
    try:
        return predict_answer(question)
    except Exception as e:
        return f"❌ Error: {e}"

iface = gr.Interface(
    fn=answer_fn,
    inputs=gr.Textbox(lines=2, label="Your question"),
    outputs=gr.Textbox(label="Answer"),
    title="SAP Profitability Q&A (TAPAS Low-Level)",
    description=(
        "Uses TAPAS’s Interaction + Converter APIs with aggregation candidates " 
        "and numeric annotations to reliably answer sum/average queries."
    ),
    allow_flagging="never",
)

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)