Spaces:

PD03
/

talk_to_data

Sleeping

App Files Files Community

talk_to_data / app.py

PD03

Update app.py

d162c32 verified 22 days ago

raw

history blame

3.41 kB

	import os
	import gradio as gr
	import pandas as pd
	import tensorflow as tf

	# TAPAS imports
	from tapas.protos import interaction_pb2
	from tapas.utils import number_annotation_utils, tf_example_utils, prediction_utils
	from tapas.scripts.run_task_main import get_classifier_model, get_task_config

	# 1) Load & stringify your CSV
	df = pd.read_csv("synthetic_profit.csv")
	df = df.astype(str)

	# 2) Build the “list of lists” table
	# (header row + all data rows)
	table = [list(df.columns)]
	table.extend(df.values.tolist())

	# 3) Prepare the TAPAS converter + model
	# – add_aggregation_candidates=True to surface SUM/AVG ops
	# – strip_column_names=False so your exact headers stay visible
	config = tf_example_utils.ClassifierConversionConfig(
	vocab_file="tapas_sqa_base/vocab.txt",
	max_seq_length=512,
	max_column_id=512,
	max_row_id=512,
	strip_column_names=False,
	add_aggregation_candidates=True,
	)
	converter = tf_example_utils.ToClassifierTensorflowExample(config)

	# 4) Load your pretrained checkpoint
	# (uses the same flags as run_task_main.py --mode=predict)
	task_config = get_task_config(
	task="sqa",
	init_checkpoint="tapas_sqa_base/model.ckpt-0",
	vocab_file=config.vocab_file,
	bsz=1,
	max_seq_length=config.max_seq_length,
	)
	model, tokenizer = get_classifier_model(task_config)

	# 5) Convert a single (table, query) into a TF Example
	def make_tf_example(table, query):
	interaction = interaction_pb2.Interaction()
	# a) question
	q = interaction.questions.add()
	q.original_text = query
	# b) columns
	for col in table[0]:
	interaction.table.columns.add().text = col
	# c) rows
	for row_vals in table[1:]:
	row = interaction.table.rows.add()
	for cell in row_vals:
	row.cells.add().text = cell
	# d) numeric annotation helps SUM/AVG
	number_annotation_utils.add_numeric_values(interaction)
	# e) convert to example
	serialized = converter.convert(interaction)
	return serialized

	# 6) Run TAPAS and parse its coordinate output
	def predict_answer(query):
	# build TF example
	example = make_tf_example(table, query)
	# run prediction
	input_fn = tf_example_utils.input_fn_builder(
	[example],
	is_training=False,
	drop_remainder=False,
	batch_size=1,
	seq_length=config.max_seq_length,
	)
	preds = model.predict(input_fn)
	# parse answer coordinates
	coords = prediction_utils.parse_coordinates(preds[0]["answer_coordinates"])
	# map back to table values
	answers = []
	for (r, c) in coords:
	# table[0] is header row, so data starts at index 1
	answers.append(table[r+1][c])
	return ", ".join(answers) if answers else "No answer found."

	# 7) Gradio interface
	def answer_fn(question: str) -> str:
	try:
	return predict_answer(question)
	except Exception as e:
	return f"❌ Error: {e}"

	iface = gr.Interface(
	fn=answer_fn,
	inputs=gr.Textbox(lines=2, label="Your question"),
	outputs=gr.Textbox(label="Answer"),
	title="SAP Profitability Q&A (TAPAS Low-Level)",
	description=(
	"Uses TAPAS’s Interaction + Converter APIs with aggregation candidates "
	"and numeric annotations to reliably answer sum/average queries."
	),
	allow_flagging="never",
	)

	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=7860)