import os import gradio as gr import pandas as pd import tensorflow as tf # TAPAS imports from tapas.protos import interaction_pb2 from tapas.utils import number_annotation_utils, tf_example_utils, prediction_utils from tapas.scripts.run_task_main import get_classifier_model, get_task_config # 1) Load & stringify your CSV df = pd.read_csv("synthetic_profit.csv") df = df.astype(str) # 2) Build the “list of lists” table # (header row + all data rows) table = [list(df.columns)] table.extend(df.values.tolist()) # 3) Prepare the TAPAS converter + model # – add_aggregation_candidates=True to surface SUM/AVG ops # – strip_column_names=False so your exact headers stay visible config = tf_example_utils.ClassifierConversionConfig( vocab_file="tapas_sqa_base/vocab.txt", max_seq_length=512, max_column_id=512, max_row_id=512, strip_column_names=False, add_aggregation_candidates=True, ) converter = tf_example_utils.ToClassifierTensorflowExample(config) # 4) Load your pretrained checkpoint # (uses the same flags as run_task_main.py --mode=predict) task_config = get_task_config( task="sqa", init_checkpoint="tapas_sqa_base/model.ckpt-0", vocab_file=config.vocab_file, bsz=1, max_seq_length=config.max_seq_length, ) model, tokenizer = get_classifier_model(task_config) # 5) Convert a single (table, query) into a TF Example def make_tf_example(table, query): interaction = interaction_pb2.Interaction() # a) question q = interaction.questions.add() q.original_text = query # b) columns for col in table[0]: interaction.table.columns.add().text = col # c) rows for row_vals in table[1:]: row = interaction.table.rows.add() for cell in row_vals: row.cells.add().text = cell # d) numeric annotation helps SUM/AVG number_annotation_utils.add_numeric_values(interaction) # e) convert to example serialized = converter.convert(interaction) return serialized # 6) Run TAPAS and parse its coordinate output def predict_answer(query): # build TF example example = make_tf_example(table, query) # run prediction input_fn = tf_example_utils.input_fn_builder( [example], is_training=False, drop_remainder=False, batch_size=1, seq_length=config.max_seq_length, ) preds = model.predict(input_fn) # parse answer coordinates coords = prediction_utils.parse_coordinates(preds[0]["answer_coordinates"]) # map back to table values answers = [] for (r, c) in coords: # table[0] is header row, so data starts at index 1 answers.append(table[r+1][c]) return ", ".join(answers) if answers else "No answer found." # 7) Gradio interface def answer_fn(question: str) -> str: try: return predict_answer(question) except Exception as e: return f"❌ Error: {e}" iface = gr.Interface( fn=answer_fn, inputs=gr.Textbox(lines=2, label="Your question"), outputs=gr.Textbox(label="Answer"), title="SAP Profitability Q&A (TAPAS Low-Level)", description=( "Uses TAPAS’s Interaction + Converter APIs with aggregation candidates " "and numeric annotations to reliably answer sum/average queries." ), allow_flagging="never", ) if __name__ == "__main__": iface.launch(server_name="0.0.0.0", server_port=7860)