File size: 3,100 Bytes
e784f1e
a5ece8b
aa97025
519d64c
e784f1e
887b999
7704539
08892e7
 
2abeceb
e784f1e
7704539
08892e7
 
 
60fddfe
7704539
 
e784f1e
7704539
e784f1e
08892e7
7704539
 
 
08892e7
7704539
 
 
 
 
08892e7
7704539
08892e7
 
e784f1e
 
7704539
ca3ae8d
e784f1e
 
8cc354c
7704539
e784f1e
7704539
2abeceb
 
 
08892e7
2abeceb
7704539
67fc297
7704539
67fc297
7704539
 
 
 
2abeceb
7704539
 
08892e7
7704539
 
 
79c9d08
7704539
e784f1e
 
08892e7
 
 
 
e784f1e
 
79c9d08
e784f1e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
import gradio as gr
import pandas as pd
import duckdb
import openai

# ─── 1) Load your OpenAI key from the Space’s Secrets ────────────────────────
openai.api_key = os.getenv("OPENAI_API_KEY")
if not openai.api_key:
    raise RuntimeError("Missing OPENAI_API_KEY secret in your Space settings")

# ─── 2) Load your CSV into DuckDB ───────────────────────────────────────────
df = pd.read_csv("synthetic_profit.csv")
conn = duckdb.connect(":memory:")
conn.register("sap", df)

# ─── 3) Build a one-line schema string for prompting ────────────────────────
schema = ", ".join(df.columns)  # e.g. "Region,Product,FiscalYear, ..."

# ─── 4) Function to generate SQL via OpenAI’s new chat API ──────────────────
def generate_sql(question: str) -> str:
    system = (
        f"You are an expert SQL generator for a DuckDB table named `sap` "
        f"with columns: {schema}. "
        "Translate the user's question into a valid SQL query and return ONLY the SQL."
    )
    messages = [
        {"role": "system", "content": system},
        {"role": "user",   "content": question},
    ]
    resp = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        temperature=0.0,
        max_tokens=150,
    )
    sql = resp.choices[0].message.content.strip()
    # strip ``` if the model wrapped it
    if sql.startswith("```") and sql.endswith("```"):
        sql = "\n".join(sql.splitlines()[1:-1])
    return sql

# ─── 5) Core Q&A function: NL β†’ SQL β†’ execute β†’ format ─────────────────────
def answer_profitability(question: str) -> str:
    # a) generate SQL
    try:
        sql = generate_sql(question)
    except Exception as e:
        return f"❌ OpenAI error:\n{e}"

    # b) execute it in DuckDB
    try:
        df_out = conn.execute(sql).df()
    except Exception as e:
        return (
            f"❌ SQL error:\n{e}\n\n"
            f"Generated SQL:\n```sql\n{sql}\n```"
        )

    # c) format the result
    if df_out.empty:
        return f"No results.\n\nSQL was:\n```sql\n{sql}\n```"
    if df_out.shape == (1,1):
        return str(df_out.iat[0,0])
    return df_out.to_markdown(index=False)

# ─── 6) Gradio interface with explicit outputs ──────────────────────────────
iface = gr.Interface(
    fn=answer_profitability,
    inputs=gr.Textbox(lines=2, placeholder="Ask a question…", label="Question"),
    outputs=gr.Textbox(lines=8, placeholder="Answer appears here", label="Answer"),
    title="SAP Profitability Q&A (OpenAI→SQL→DuckDB)",
    description="Enter a natural-language question and get back the numeric result or table.",
    allow_flagging="never",
)

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)