PD03 commited on
Commit
e784f1e
Β·
verified Β·
1 Parent(s): d34c686

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -46
app.py CHANGED
@@ -1,62 +1,85 @@
 
1
  import gradio as gr
2
  import pandas as pd
3
- import torch
4
  import duckdb
5
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
6
 
7
- # Load data into DuckDB
 
 
 
 
 
 
 
8
  df = pd.read_csv('synthetic_profit.csv')
9
- con = duckdb.connect(':memory:')
10
- con.register('sap', df)
11
-
12
- # One-line schema for prompts
13
- schema = ", ".join(df.columns)
14
-
15
- # Load TAPEX for SQL generation
16
- MODEL_ID = "microsoft/tapex-base-finetuned-wikisql"
17
- device = 0 if torch.cuda.is_available() else -1
18
-
19
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
20
- model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID)
21
-
22
- sql_gen = pipeline(
23
- "text2text-generation",
24
- model=model,
25
- tokenizer=tokenizer,
26
- framework="pt",
27
- device=device,
28
- max_length=128,
29
- )
30
 
31
- def answer_profitability(question: str) -> str:
32
- # 1) Generate SQL
33
- prompt = (
34
- f"-- Translate to SQL for table `sap` ({schema})\n"
35
- f"Question: {question}\n"
36
- "SQL:"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  )
38
- sql = sql_gen(prompt)[0]['generated_text'].strip()
 
 
 
 
39
 
40
- # 2) Try to execute it
 
 
 
 
 
41
  try:
42
- df_out = con.execute(sql).df()
43
  except Exception as e:
44
- # Use a normal f-string with explicit \n for newlines
45
  return (
46
- f"❌ **SQL Error**\n"
47
  f"```\n{e}\n```\n\n"
48
- f"**Generated SQL**\n"
49
- f"```sql\n{sql}\n```"
50
- )
51
-
52
- # 3) Format successful result
53
- if df_out.empty:
54
- return (
55
- "No rows returned.\n\n"
56
  f"**Generated SQL**\n```sql\n{sql}\n```"
57
  )
58
 
59
- if df_out.shape == (1,1):
60
- return str(df_out.iat[0,0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- return df_out.to_markdown(index=False)
 
 
1
+ import os
2
  import gradio as gr
3
  import pandas as pd
 
4
  import duckdb
5
+ import openai
6
 
7
+ # ─── 1) Set your OpenAI key via the SECRET: OPENAI_API_KEY ───────────────────
8
+ openai.api_key = os.getenv("
9
+
10
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/651bae43dea81981d501e862/YGXP5ediv-1OOqmcZh8YA.png)
11
+
12
+ ")
13
+
14
+ # ─── 2) Load your synthetic data into DuckDB ─────────────────────────────────
15
  df = pd.read_csv('synthetic_profit.csv')
16
+ conn = duckdb.connect(':memory:')
17
+ conn.register('sap', df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # ─── 3) One-line schema description for prompting ─────────────────────────────
20
+ schema = ", ".join(df.columns)
21
+ # e.g. "Region,Product,FiscalYear,FiscalQuarter,Revenue,Profit,ProfitMargin"
22
+
23
+ # ─── 4) Function to call OpenAI and generate SQL ──────────────────────────────
24
+ def generate_sql(question: str) -> str:
25
+ system = (
26
+ f"You are an expert SQL generator for a DuckDB table named `sap` "
27
+ f"with columns: {schema}. "
28
+ "Generate a valid SQL query that returns exactly what the user is asking. "
29
+ "Only return the SQL query, without any explanation."
30
+ )
31
+ messages = [
32
+ {"role": "system", "content": system},
33
+ {"role": "user", "content": question}
34
+ ]
35
+ resp = openai.ChatCompletion.create(
36
+ model="gpt-3.5-turbo",
37
+ messages=messages,
38
+ temperature=0.0,
39
+ max_tokens=150,
40
  )
41
+ sql = resp.choices[0].message.content.strip()
42
+ # Strip triple-backticks if present
43
+ if sql.startswith("```") and "```" in sql[3:]:
44
+ sql = "\n".join(sql.splitlines()[1:-1])
45
+ return sql
46
 
47
+ # ─── 5) Core QA function: NL β†’ SQL β†’ execute β†’ format result ─────────────────
48
+ def answer_profitability(question: str) -> str:
49
+ # 5a) Generate SQL
50
+ sql = generate_sql(question)
51
+
52
+ # 5b) Try to run it
53
  try:
54
+ out_df = conn.execute(sql).df()
55
  except Exception as e:
 
56
  return (
57
+ f"❌ **Error executing SQL**\n\n"
58
  f"```\n{e}\n```\n\n"
 
 
 
 
 
 
 
 
59
  f"**Generated SQL**\n```sql\n{sql}\n```"
60
  )
61
 
62
+ # 5c) Format the successful result
63
+ if out_df.empty:
64
+ return f"No rows returned.\n\n**SQL**\n```sql\n{sql}\n```"
65
+ # Single‐cell result β†’ scalar
66
+ if out_df.shape == (1,1):
67
+ return str(out_df.iat[0,0])
68
+ # Otherwise β†’ markdown table
69
+ return out_df.to_markdown(index=False)
70
+
71
+ # ─── 6) Gradio UI ─────────────────────────────────────────────────────────────
72
+ iface = gr.Interface(
73
+ fn=answer_profitability,
74
+ inputs=gr.Textbox(lines=2, placeholder="Ask a question about profitability…"),
75
+ outputs=gr.Markdown(),
76
+ title="SAP Profitability Q&A (OpenAI β†’ SQL β†’ DuckDB)",
77
+ description=(
78
+ "Uses OpenAI’s GPT-3.5-Turbo to translate your question into SQL, "
79
+ "executes it on the `sap` table in DuckDB, and returns the result."
80
+ ),
81
+ allow_flagging="never",
82
+ )
83
 
84
+ if __name__ == "__main__":
85
+ iface.launch(server_name="0.0.0.0", server_port=7860)