Spaces:

PD03
/

talk_to_data

Sleeping

File size: 2,924 Bytes

e784f1e
a5ece8b
aa97025
519d64c
e784f1e
887b999
2959765
2abeceb
 
 
 
e784f1e
ca3ae8d
aa97025
e784f1e
 
60fddfe
ca3ae8d
2959765
e784f1e
2959765
e784f1e
2959765
e784f1e
 
2959765
e784f1e
2abeceb
 
 
 
 
 
 
 
 
 
 
 
 
 
e784f1e
2abeceb
ca3ae8d
e784f1e
 
8cc354c
2959765
e784f1e
2959765
2abeceb
 
 
 
 
ca3ae8d
67fc297
ca3ae8d
67fc297
79c9d08
2abeceb
 
79c9d08
2abeceb
ca3ae8d
 
2959765
2abeceb
2959765
ca3ae8d
 
2abeceb
 
ca3ae8d
79c9d08
2abeceb
e784f1e
 
2959765
 
e784f1e
 
 
2959765
e784f1e
 
 
79c9d08
e784f1e

import os
import gradio as gr
import pandas as pd
import duckdb
import openai

# 1) Load your OpenAI key from the Space’s Secrets
OPENAI_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_KEY:
    raise RuntimeError("Missing OPENAI_API_KEY secret in your Space settings")
openai.api_key = OPENAI_KEY

# 2) Load your synthetic data into DuckDB
df = pd.read_csv('synthetic_profit.csv')
conn = duckdb.connect(':memory:')
conn.register('sap', df)

# 3) Build a one-line schema description for prompts
schema = ", ".join(df.columns)

# 4) Function to generate SQL via OpenAI
def generate_sql(question: str) -> str:
    system_prompt = (
        f"You are an expert SQL generator for a DuckDB table named `sap` "
        f"with columns: {schema}. "
        "Translate the user's question into a valid SQL query and return ONLY the SQL."
    )
    try:
        resp = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system",  "content": system_prompt},
                {"role": "user",    "content": question},
            ],
            temperature=0.0,
            max_tokens=150,
        )
    except Exception as e:
        # Catch network/auth errors
        raise RuntimeError(f"OpenAI API error: {e}")

    sql = resp.choices[0].message.content.strip()
    # strip triple-backticks if present
    if sql.startswith("```") and sql.endswith("```"):
        sql = "\n".join(sql.splitlines()[1:-1])
    return sql

# 5) Core Q&A function: NL → SQL → execute → format
def answer_profitability(question: str) -> str:
    # a) turn the question into SQL
    try:
        sql = generate_sql(question)
    except Exception as e:
        return f"❌ **OpenAI Error**\n{e}"

    # b) try to run it
    try:
        result_df = conn.execute(sql).df()
    except Exception as e:
        return (
            f"❌ **SQL Execution Error**\n{e}\n\n"
            f"**Generated SQL**\n```sql\n{sql}\n```"
        )

    # c) format the result
    if result_df.empty:
        return f"No rows returned.\n\n```sql\n{sql}\n```"

    # single-cell → scalar
    if result_df.shape == (1,1):
        return str(result_df.iat[0,0])

    # multi-cell → markdown table
    return result_df.to_markdown(index=False)

# 6) Gradio interface with explicit inputs & outputs
iface = gr.Interface(
    fn=answer_profitability,
    inputs=gr.Textbox(lines=2, placeholder="Ask a question about profitability…", label="Question"),
    outputs=gr.Textbox(lines=8, placeholder="Answer will appear here", label="Answer"),
    title="SAP Profitability Q&A (OpenAI → SQL → DuckDB)",
    description=(
        "Uses OpenAI’s GPT-3.5-Turbo to translate your question into SQL, "
        "executes it against the `sap` table in DuckDB, and returns the result."
    ),
    allow_flagging="never",
)

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)