talk_to_data / app.py
PD03's picture
Update app.py
7704539 verified
raw
history blame
3.1 kB
import os
import gradio as gr
import pandas as pd
import duckdb
import openai
# ─── 1) Load your OpenAI key from the Space’s Secrets ────────────────────────
openai.api_key = os.getenv("OPENAI_API_KEY")
if not openai.api_key:
raise RuntimeError("Missing OPENAI_API_KEY secret in your Space settings")
# ─── 2) Load your CSV into DuckDB ───────────────────────────────────────────
df = pd.read_csv("synthetic_profit.csv")
conn = duckdb.connect(":memory:")
conn.register("sap", df)
# ─── 3) Build a one-line schema string for prompting ────────────────────────
schema = ", ".join(df.columns) # e.g. "Region,Product,FiscalYear, ..."
# ─── 4) Function to generate SQL via OpenAI’s new chat API ──────────────────
def generate_sql(question: str) -> str:
system = (
f"You are an expert SQL generator for a DuckDB table named `sap` "
f"with columns: {schema}. "
"Translate the user's question into a valid SQL query and return ONLY the SQL."
)
messages = [
{"role": "system", "content": system},
{"role": "user", "content": question},
]
resp = openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=messages,
temperature=0.0,
max_tokens=150,
)
sql = resp.choices[0].message.content.strip()
# strip ``` if the model wrapped it
if sql.startswith("```") and sql.endswith("```"):
sql = "\n".join(sql.splitlines()[1:-1])
return sql
# ─── 5) Core Q&A function: NL β†’ SQL β†’ execute β†’ format ─────────────────────
def answer_profitability(question: str) -> str:
# a) generate SQL
try:
sql = generate_sql(question)
except Exception as e:
return f"❌ OpenAI error:\n{e}"
# b) execute it in DuckDB
try:
df_out = conn.execute(sql).df()
except Exception as e:
return (
f"❌ SQL error:\n{e}\n\n"
f"Generated SQL:\n```sql\n{sql}\n```"
)
# c) format the result
if df_out.empty:
return f"No results.\n\nSQL was:\n```sql\n{sql}\n```"
if df_out.shape == (1,1):
return str(df_out.iat[0,0])
return df_out.to_markdown(index=False)
# ─── 6) Gradio interface with explicit outputs ──────────────────────────────
iface = gr.Interface(
fn=answer_profitability,
inputs=gr.Textbox(lines=2, placeholder="Ask a question…", label="Question"),
outputs=gr.Textbox(lines=8, placeholder="Answer appears here", label="Answer"),
title="SAP Profitability Q&A (OpenAI→SQL→DuckDB)",
description="Enter a natural-language question and get back the numeric result or table.",
allow_flagging="never",
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)