talk_to_data / app.py
PD03's picture
Update app.py
2abeceb verified
raw
history blame
2.92 kB
import os
import gradio as gr
import pandas as pd
import duckdb
import openai
# 1) Load your OpenAI key from the Space’s Secrets
OPENAI_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_KEY:
raise RuntimeError("Missing OPENAI_API_KEY secret in your Space settings")
openai.api_key = OPENAI_KEY
# 2) Load your synthetic data into DuckDB
df = pd.read_csv('synthetic_profit.csv')
conn = duckdb.connect(':memory:')
conn.register('sap', df)
# 3) Build a one-line schema description for prompts
schema = ", ".join(df.columns)
# 4) Function to generate SQL via OpenAI
def generate_sql(question: str) -> str:
system_prompt = (
f"You are an expert SQL generator for a DuckDB table named `sap` "
f"with columns: {schema}. "
"Translate the user's question into a valid SQL query and return ONLY the SQL."
)
try:
resp = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": question},
],
temperature=0.0,
max_tokens=150,
)
except Exception as e:
# Catch network/auth errors
raise RuntimeError(f"OpenAI API error: {e}")
sql = resp.choices[0].message.content.strip()
# strip triple-backticks if present
if sql.startswith("```") and sql.endswith("```"):
sql = "\n".join(sql.splitlines()[1:-1])
return sql
# 5) Core Q&A function: NL → SQL → execute → format
def answer_profitability(question: str) -> str:
# a) turn the question into SQL
try:
sql = generate_sql(question)
except Exception as e:
return f"❌ **OpenAI Error**\n{e}"
# b) try to run it
try:
result_df = conn.execute(sql).df()
except Exception as e:
return (
f"❌ **SQL Execution Error**\n{e}\n\n"
f"**Generated SQL**\n```sql\n{sql}\n```"
)
# c) format the result
if result_df.empty:
return f"No rows returned.\n\n```sql\n{sql}\n```"
# single-cell → scalar
if result_df.shape == (1,1):
return str(result_df.iat[0,0])
# multi-cell → markdown table
return result_df.to_markdown(index=False)
# 6) Gradio interface with explicit inputs & outputs
iface = gr.Interface(
fn=answer_profitability,
inputs=gr.Textbox(lines=2, placeholder="Ask a question about profitability…", label="Question"),
outputs=gr.Textbox(lines=8, placeholder="Answer will appear here", label="Answer"),
title="SAP Profitability Q&A (OpenAI → SQL → DuckDB)",
description=(
"Uses OpenAI’s GPT-3.5-Turbo to translate your question into SQL, "
"executes it against the `sap` table in DuckDB, and returns the result."
),
allow_flagging="never",
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)