Spaces:

PD03
/

talk_to_data

Sleeping

File size: 2,826 Bytes

b1f2bdd
a5ece8b
aa97025
b1f2bdd
 
 
 
 
 
 
887b999
0b8ba87
b1f2bdd
 
 
 
0e84c33
b1f2bdd
 
 
 
 
 
 
 
 
0b8ba87
b1f2bdd
02d55fb
b1f2bdd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02d55fb
b1f2bdd
 
 
 
 
02d55fb
b1f2bdd
 
 
 
 
 
 
 
 
 
 
 
 
 
79c9d08
b1f2bdd
e784f1e
b1f2bdd
 
4790b2c
b1f2bdd
 
 
 
 
e784f1e
 
79c9d08
b1f2bdd
0e84c33

import os
import gradio as gr
import pandas as pd
import tensorflow as tf
from tapas.scripts import prediction_utils
from tapas.utils import number_annotation_utils
from tapas.protos import interaction_pb2

# 1) Read CSV and build list-of-lists table
import pandas as pd

df = pd.read_csv("synthetic_profit.csv")
# Ensure all values are strings
df = df.astype(str)
# Build TAPAS-style table: header row + data rows
table = [list(df.columns)] + df.values.tolist()

# 2) Configure TAPAS conversion with aggregation support
from tapas.utils import example_utils as tf_example_utils
config = tf_example_utils.ClassifierConversionConfig(
    vocab_file="tapas_sqa_base/vocab.txt",
    max_seq_length=512,
    max_column_id=512,
    max_row_id=512,
    strip_column_names=False,             # Keep header names
    add_aggregation_candidates=True,     # Propose SUM/AVERAGE operations
)
converter = tf_example_utils.ToClassifierTensorflowExample(config)

# 3) Helper: convert one interaction to model input
def interaction_from_query(question: str):
    interaction = interaction_pb2.Interaction()
    # Add question
    q = interaction.questions.add()
    q.original_text = question
    # Add table columns
    for col in table[0]:
        interaction.table.columns.add().text = col
    # Add table rows/cells
    for row in table[1:]:
        r = interaction.table.rows.add()
        for cell in row:
            r.cells.add().text = cell
    # Annotate numeric values
    number_annotation_utils.add_numeric_values(interaction)
    return interaction

# 4) Instantiate TAPAS model and tokenizer
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
MODEL = "google/tapas-base-finetuned-wtq"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model     = TFAutoModelForSequenceClassification.from_pretrained(MODEL)

# 5) Prediction helper
def predict_answer(question: str):
    interaction = interaction_from_query(question)
    # Convert to TensorFlowExample
    tf_example = converter.convert(interaction)
    # Run prediction
    result = model(tf_example.features)
    # Parse answer coordinates
    coords = prediction_utils.parse_coordinates(result.logits)
    # Map coordinates back to table cells
    answers = []
    for r, c in coords:
        answers.append(table[r+1][c])
    return ", ".join(answers)

# 6) Gradio interface
iface = gr.Interface(
    fn=predict_answer,
    inputs=gr.Textbox(lines=2, placeholder="Ask a question…"),
    outputs=gr.Textbox(lines=3),
    title="SAP Profitability Q&A (TAPAS Low-Level)",
    description=(
        "Low-level TAPAS: list-of-lists input, numeric annotations, "
        "aggregation candidates, and coordinate post-processing."
    ),
    allow_flagging="never",
)

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)