import os
import gradio as gr
import pandas as pd
import tensorflow as tf

# TAPAS imports
from tapas.protos import interaction_pb2
from tapas.utils import number_annotation_utils, tf_example_utils, prediction_utils
from tapas.scripts.run_task_main import get_classifier_model, get_task_config

# 1) Load & stringify your CSV
df = pd.read_csv("synthetic_profit.csv")
df = df.astype(str)

# 2) Build the “list of lists” table  
#    (header row + all data rows)
table = [list(df.columns)]
table.extend(df.values.tolist())

# 3) Prepare the TAPAS converter + model
#    – add_aggregation_candidates=True to surface SUM/AVG ops
#    – strip_column_names=False so your exact headers stay visible
config = tf_example_utils.ClassifierConversionConfig(
    vocab_file="tapas_sqa_base/vocab.txt",
    max_seq_length=512,
    max_column_id=512,
    max_row_id=512,
    strip_column_names=False,
    add_aggregation_candidates=True,
)
converter = tf_example_utils.ToClassifierTensorflowExample(config)

# 4) Load your pretrained checkpoint
#    (uses the same flags as run_task_main.py --mode=predict)
task_config = get_task_config(
    task="sqa",
    init_checkpoint="tapas_sqa_base/model.ckpt-0",
    vocab_file=config.vocab_file,
    bsz=1,
    max_seq_length=config.max_seq_length,
)
model, tokenizer = get_classifier_model(task_config)

# 5) Convert a single (table, query) into a TF Example
def make_tf_example(table, query):
    interaction = interaction_pb2.Interaction()
    # a) question
    q = interaction.questions.add()
    q.original_text = query
    # b) columns
    for col in table[0]:
        interaction.table.columns.add().text = col
    # c) rows
    for row_vals in table[1:]:
        row = interaction.table.rows.add()
        for cell in row_vals:
            row.cells.add().text = cell
    # d) numeric annotation helps SUM/AVG
    number_annotation_utils.add_numeric_values(interaction)
    # e) convert to example
    serialized = converter.convert(interaction)
    return serialized

# 6) Run TAPAS and parse its coordinate output
def predict_answer(query):
    # build TF example
    example = make_tf_example(table, query)
    # run prediction
    input_fn = tf_example_utils.input_fn_builder(
        [example],
        is_training=False,
        drop_remainder=False,
        batch_size=1,
        seq_length=config.max_seq_length,
    )
    preds = model.predict(input_fn)
    # parse answer coordinates
    coords = prediction_utils.parse_coordinates(preds[0]["answer_coordinates"])
    # map back to table values
    answers = []
    for (r, c) in coords:
        # table[0] is header row, so data starts at index 1
        answers.append(table[r+1][c])
    return ", ".join(answers) if answers else "No answer found."

# 7) Gradio interface
def answer_fn(question: str) -> str:
    try:
        return predict_answer(question)
    except Exception as e:
        return f"❌ Error: {e}"

iface = gr.Interface(
    fn=answer_fn,
    inputs=gr.Textbox(lines=2, label="Your question"),
    outputs=gr.Textbox(label="Answer"),
    title="SAP Profitability Q&A (TAPAS Low-Level)",
    description=(
        "Uses TAPAS’s Interaction + Converter APIs with aggregation candidates " 
        "and numeric annotations to reliably answer sum/average queries."
    ),
    allow_flagging="never",
)

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)