PD03 commited on
Commit
ba55f08
Β·
verified Β·
1 Parent(s): 65a4fc0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -53
app.py CHANGED
@@ -3,76 +3,71 @@ import gradio as gr
3
  import pandas as pd
4
  import duckdb
5
  import openai
 
 
6
 
7
- # ─── 1) Load your OpenAI key from the Space’s Secrets ────────────────────────
8
  openai.api_key = os.getenv("OPENAI_API_KEY")
9
- if not openai.api_key:
10
- raise RuntimeError("Missing OPENAI_API_KEY secret in your Space settings")
11
 
12
- # ─── 2) Load your CSV into DuckDB ───────────────────────────────────────────
13
- df = pd.read_csv("synthetic_profit.csv")
14
- conn = duckdb.connect(":memory:")
15
- conn.register("sap", df)
16
 
17
- # ─── 3) Build a one-line schema string for prompting ────────────────────────
18
- schema = ", ".join(df.columns) # e.g. "Region,Product,FiscalYear, ..."
 
 
 
19
 
20
- # ─── 4) Function to generate SQL via OpenAI’s new chat API ──────────────────
21
  def generate_sql(question: str) -> str:
22
- system = (
23
- f"You are an expert SQL generator for a DuckDB table named `sap` "
24
- f"with columns: {schema}. "
25
- "Translate the user's question into a valid SQL query and return ONLY the SQL."
26
  )
27
- messages = [
28
- {"role": "system", "content": system},
29
- {"role": "user", "content": question},
30
- ]
31
- resp = openai.chat.completions.create(
32
- model="gpt-3.5-turbo",
33
- messages=messages,
34
- temperature=0.0,
35
- max_tokens=150,
36
- )
37
- sql = resp.choices[0].message.content.strip()
38
- # strip ``` if the model wrapped it
 
 
 
 
 
 
 
39
  if sql.startswith("```") and sql.endswith("```"):
40
  sql = "\n".join(sql.splitlines()[1:-1])
41
  return sql
42
 
43
- # ─── 5) Core Q&A function: NL β†’ SQL β†’ execute β†’ format ─────────────────────
44
  def answer_profitability(question: str) -> str:
45
- # a) generate SQL
46
- try:
47
- sql = generate_sql(question)
48
- except Exception as e:
49
- return f"❌ OpenAI error:\n{e}"
50
-
51
- # b) execute it in DuckDB
52
  try:
53
- df_out = conn.execute(sql).df()
54
  except Exception as e:
55
- return (
56
- f"❌ SQL error:\n{e}\n\n"
57
- f"Generated SQL:\n```sql\n{sql}\n```"
58
- )
59
-
60
- # c) format the result
61
- if df_out.empty:
62
- return f"No results.\n\nSQL was:\n```sql\n{sql}\n```"
63
- if df_out.shape == (1,1):
64
- return str(df_out.iat[0,0])
65
- return df_out.to_markdown(index=False)
66
 
67
- # ─── 6) Gradio interface with explicit outputs ──────────────────────────────
68
  iface = gr.Interface(
69
  fn=answer_profitability,
70
- inputs=gr.Textbox(lines=2, placeholder="Ask a question…", label="Question"),
71
- outputs=gr.Textbox(lines=8, placeholder="Answer appears here", label="Answer"),
72
- title="SAP Profitability Q&A (OpenAI→SQL→DuckDB)",
73
- description="Enter a natural-language question and get back the numeric result or table.",
74
  allow_flagging="never",
75
  )
76
 
77
- if __name__ == "__main__":
78
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
3
  import pandas as pd
4
  import duckdb
5
  import openai
6
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
7
+ import openai.error
8
 
9
+ # β€” Load OpenAI key β€”
10
  openai.api_key = os.getenv("OPENAI_API_KEY")
 
 
11
 
12
+ # β€” Prepare DuckDB β€”
13
+ df = pd.read_csv("synthetic_profit.csv")
14
+ conn = duckdb.connect(":memory:"); conn.register("sap", df)
15
+ schema = ", ".join(df.columns)
16
 
17
+ # β€” Prepare HF fallback pipeline once β€”
18
+ HF_MODEL = "google/flan-t5-small"
19
+ hf_tok = AutoTokenizer.from_pretrained(HF_MODEL)
20
+ hf_mod = AutoModelForSeq2SeqLM.from_pretrained(HF_MODEL)
21
+ hf_gen = pipeline("text2text-generation", model=hf_mod, tokenizer=hf_tok, device=-1)
22
 
 
23
  def generate_sql(question: str) -> str:
24
+ prompt = (
25
+ f"You are an expert SQL generator for DuckDB table `sap` with columns: {schema}.\n"
26
+ f"Translate the user’s question into a valid SQL query. Return ONLY the SQL."
 
27
  )
28
+ try:
29
+ resp = openai.chat.completions.create(
30
+ model="gpt-3.5-turbo",
31
+ messages=[
32
+ {"role":"system","content":prompt},
33
+ {"role":"user","content":question}
34
+ ],
35
+ temperature=0.0,
36
+ max_tokens=150,
37
+ )
38
+ sql = resp.choices[0].message.content.strip()
39
+ except openai.error.InvalidRequestError as e:
40
+ # catch non-quota OpenAI errors here if you want
41
+ raise
42
+ except openai.error.RateLimitError as e:
43
+ # 429 fallback to Hugging Face
44
+ fallback_prompt = f"Translate to SQL over `sap({schema})`:\n{question}"
45
+ sql = hf_gen(fallback_prompt, max_length=128)[0]["generated_text"]
46
+ # strip ``` fences if present
47
  if sql.startswith("```") and sql.endswith("```"):
48
  sql = "\n".join(sql.splitlines()[1:-1])
49
  return sql
50
 
 
51
  def answer_profitability(question: str) -> str:
52
+ sql = generate_sql(question)
 
 
 
 
 
 
53
  try:
54
+ out_df = conn.execute(sql).df()
55
  except Exception as e:
56
+ return f"❌ SQL error:\n{e}\n\n```sql\n{sql}\n```"
57
+ if out_df.empty:
58
+ return f"No results.\n\n```sql\n{sql}\n```"
59
+ if out_df.shape == (1,1):
60
+ return str(out_df.iat[0,0])
61
+ return out_df.to_markdown(index=False)
 
 
 
 
 
62
 
 
63
  iface = gr.Interface(
64
  fn=answer_profitability,
65
+ inputs=gr.Textbox(lines=2, label="Question"),
66
+ outputs=gr.Textbox(lines=8, label="Answer"),
67
+ title="SAP Profitability Q&A",
68
+ description="Uses OpenAI β†’ DuckDB, falling back to Flan-T5-Small on 429s.",
69
  allow_flagging="never",
70
  )
71
 
72
+ if __name__=="__main__":
73
+ iface.launch(server_name="0.0.0.0", server_port=7860)