Spaces:

PD03
/

talk_to_data

Sleeping

App Files Files Community

talk_to_data / app.py

PD03

Update app.py

7704539 verified 3 months ago

raw

history blame

3.1 kB

	import os
	import gradio as gr
	import pandas as pd
	import duckdb
	import openai

	# ─── 1) Load your OpenAI key from the Space’s Secrets ────────────────────────
	openai.api_key = os.getenv("OPENAI_API_KEY")
	if not openai.api_key:
	raise RuntimeError("Missing OPENAI_API_KEY secret in your Space settings")

	# ─── 2) Load your CSV into DuckDB ───────────────────────────────────────────
	df = pd.read_csv("synthetic_profit.csv")
	conn = duckdb.connect(":memory:")
	conn.register("sap", df)

	# ─── 3) Build a one-line schema string for prompting ────────────────────────
	schema = ", ".join(df.columns) # e.g. "Region,Product,FiscalYear, ..."

	# ─── 4) Function to generate SQL via OpenAI’s new chat API ──────────────────
	def generate_sql(question: str) -> str:
	system = (
	f"You are an expert SQL generator for a DuckDB table named `sap` "
	f"with columns: {schema}. "
	"Translate the user's question into a valid SQL query and return ONLY the SQL."
	)
	messages = [
	{"role": "system", "content": system},
	{"role": "user", "content": question},
	]
	resp = openai.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=messages,
	temperature=0.0,
	max_tokens=150,
	)
	sql = resp.choices[0].message.content.strip()
	# strip ``` if the model wrapped it
	if sql.startswith("```") and sql.endswith("```"):
	sql = "\n".join(sql.splitlines()[1:-1])
	return sql

	# ─── 5) Core Q&A function: NL → SQL → execute → format ─────────────────────
	def answer_profitability(question: str) -> str:
	# a) generate SQL
	try:
	sql = generate_sql(question)
	except Exception as e:
	return f"❌ OpenAI error:\n{e}"

	# b) execute it in DuckDB
	try:
	df_out = conn.execute(sql).df()
	except Exception as e:
	return (
	f"❌ SQL error:\n{e}\n\n"
	f"Generated SQL:\n```sql\n{sql}\n```"
	)

	# c) format the result
	if df_out.empty:
	return f"No results.\n\nSQL was:\n```sql\n{sql}\n```"
	if df_out.shape == (1,1):
	return str(df_out.iat[0,0])
	return df_out.to_markdown(index=False)

	# ─── 6) Gradio interface with explicit outputs ──────────────────────────────
	iface = gr.Interface(
	fn=answer_profitability,
	inputs=gr.Textbox(lines=2, placeholder="Ask a question…", label="Question"),
	outputs=gr.Textbox(lines=8, placeholder="Answer appears here", label="Answer"),
	title="SAP Profitability Q&A (OpenAI→SQL→DuckDB)",
	description="Enter a natural-language question and get back the numeric result or table.",
	allow_flagging="never",
	)

	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=7860)