Spaces:

PD03
/

talk_to_data

Sleeping

App Files Files Community

talk_to_data / app.py

PD03

Update app.py

2abeceb verified 3 months ago

raw

history blame

2.92 kB

	import os
	import gradio as gr
	import pandas as pd
	import duckdb
	import openai

	# 1) Load your OpenAI key from the Space’s Secrets
	OPENAI_KEY = os.getenv("OPENAI_API_KEY")
	if not OPENAI_KEY:
	raise RuntimeError("Missing OPENAI_API_KEY secret in your Space settings")
	openai.api_key = OPENAI_KEY

	# 2) Load your synthetic data into DuckDB
	df = pd.read_csv('synthetic_profit.csv')
	conn = duckdb.connect(':memory:')
	conn.register('sap', df)

	# 3) Build a one-line schema description for prompts
	schema = ", ".join(df.columns)

	# 4) Function to generate SQL via OpenAI
	def generate_sql(question: str) -> str:
	system_prompt = (
	f"You are an expert SQL generator for a DuckDB table named `sap` "
	f"with columns: {schema}. "
	"Translate the user's question into a valid SQL query and return ONLY the SQL."
	)
	try:
	resp = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": question},
	],
	temperature=0.0,
	max_tokens=150,
	)
	except Exception as e:
	# Catch network/auth errors
	raise RuntimeError(f"OpenAI API error: {e}")

	sql = resp.choices[0].message.content.strip()
	# strip triple-backticks if present
	if sql.startswith("```") and sql.endswith("```"):
	sql = "\n".join(sql.splitlines()[1:-1])
	return sql

	# 5) Core Q&A function: NL → SQL → execute → format
	def answer_profitability(question: str) -> str:
	# a) turn the question into SQL
	try:
	sql = generate_sql(question)
	except Exception as e:
	return f"❌ OpenAI Error\n{e}"

	# b) try to run it
	try:
	result_df = conn.execute(sql).df()
	except Exception as e:
	return (
	f"❌ SQL Execution Error\n{e}\n\n"
	f"Generated SQL\n```sql\n{sql}\n```"
	)

	# c) format the result
	if result_df.empty:
	return f"No rows returned.\n\n```sql\n{sql}\n```"

	# single-cell → scalar
	if result_df.shape == (1,1):
	return str(result_df.iat[0,0])

	# multi-cell → markdown table
	return result_df.to_markdown(index=False)

	# 6) Gradio interface with explicit inputs & outputs
	iface = gr.Interface(
	fn=answer_profitability,
	inputs=gr.Textbox(lines=2, placeholder="Ask a question about profitability…", label="Question"),
	outputs=gr.Textbox(lines=8, placeholder="Answer will appear here", label="Answer"),
	title="SAP Profitability Q&A (OpenAI → SQL → DuckDB)",
	description=(
	"Uses OpenAI’s GPT-3.5-Turbo to translate your question into SQL, "
	"executes it against the `sap` table in DuckDB, and returns the result."
	),
	allow_flagging="never",
	)

	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=7860)