Spaces:

PD03
/

talk_to_data

Sleeping

App Files Files Community

PD03 commited on 19 days ago

Commit

ba55f08

verified ·

1 Parent(s): 65a4fc0

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -53

app.py CHANGED Viewed

@@ -3,76 +3,71 @@ import gradio as gr
 import pandas as pd
 import duckdb
 import openai
-# ─── 1) Load your OpenAI key from the Space’s Secrets ────────────────────────
 openai.api_key = os.getenv("OPENAI_API_KEY")
-if not openai.api_key:
-    raise RuntimeError("Missing OPENAI_API_KEY secret in your Space settings")
-# ─── 2) Load your CSV into DuckDB ───────────────────────────────────────────
-df = pd.read_csv("synthetic_profit.csv")
-conn = duckdb.connect(":memory:")
-conn.register("sap", df)
-# ─── 3) Build a one-line schema string for prompting ────────────────────────
-schema = ", ".join(df.columns)  # e.g. "Region,Product,FiscalYear, ..."
-# ─── 4) Function to generate SQL via OpenAI’s new chat API ──────────────────
 def generate_sql(question: str) -> str:
-    system = (
-        f"You are an expert SQL generator for a DuckDB table named `sap` "
-        f"with columns: {schema}. "
-        "Translate the user's question into a valid SQL query and return ONLY the SQL."
     )
-    messages = [
-        {"role": "system", "content": system},
-        {"role": "user",   "content": question},
-    ]
-    resp = openai.chat.completions.create(
-        model="gpt-3.5-turbo",
-        messages=messages,
-        temperature=0.0,
-        max_tokens=150,
-    )
-    sql = resp.choices[0].message.content.strip()
-    # strip ``` if the model wrapped it
     if sql.startswith("```") and sql.endswith("```"):
         sql = "\n".join(sql.splitlines()[1:-1])
     return sql
-# ─── 5) Core Q&A function: NL → SQL → execute → format ─────────────────────
 def answer_profitability(question: str) -> str:
-    # a) generate SQL
-    try:
-        sql = generate_sql(question)
-    except Exception as e:
-        return f"❌ OpenAI error:\n{e}"
-    # b) execute it in DuckDB
     try:
-        df_out = conn.execute(sql).df()
     except Exception as e:
-        return (
-            f"❌ SQL error:\n{e}\n\n"
-            f"Generated SQL:\n```sql\n{sql}\n```"
-        )
-    # c) format the result
-    if df_out.empty:
-        return f"No results.\n\nSQL was:\n```sql\n{sql}\n```"
-    if df_out.shape == (1,1):
-        return str(df_out.iat[0,0])
-    return df_out.to_markdown(index=False)
-# ─── 6) Gradio interface with explicit outputs ──────────────────────────────
 iface = gr.Interface(
     fn=answer_profitability,
-    inputs=gr.Textbox(lines=2, placeholder="Ask a question…", label="Question"),
-    outputs=gr.Textbox(lines=8, placeholder="Answer appears here", label="Answer"),
-    title="SAP Profitability Q&A (OpenAI→SQL→DuckDB)",
-    description="Enter a natural-language question and get back the numeric result or table.",
     allow_flagging="never",
 )
-if __name__ == "__main__":
-    iface.launch(server_name="0.0.0.0", server_port=7860)

 import pandas as pd
 import duckdb
 import openai
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+import openai.error
+# — Load OpenAI key —
 openai.api_key = os.getenv("OPENAI_API_KEY")
+# — Prepare DuckDB —
+df   = pd.read_csv("synthetic_profit.csv")
+conn = duckdb.connect(":memory:"); conn.register("sap", df)
+schema = ", ".join(df.columns)
+# — Prepare HF fallback pipeline once —
+HF_MODEL = "google/flan-t5-small"
+hf_tok   = AutoTokenizer.from_pretrained(HF_MODEL)
+hf_mod   = AutoModelForSeq2SeqLM.from_pretrained(HF_MODEL)
+hf_gen   = pipeline("text2text-generation", model=hf_mod, tokenizer=hf_tok, device=-1)
 def generate_sql(question: str) -> str:
+    prompt = (
+        f"You are an expert SQL generator for DuckDB table `sap` with columns: {schema}.\n"
+        f"Translate the user’s question into a valid SQL query. Return ONLY the SQL."
     )
+    try:
+        resp = openai.chat.completions.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role":"system","content":prompt},
+                {"role":"user","content":question}
+            ],
+            temperature=0.0,
+            max_tokens=150,
+        )
+        sql = resp.choices[0].message.content.strip()
+    except openai.error.InvalidRequestError as e:
+        # catch non-quota OpenAI errors here if you want
+        raise
+    except openai.error.RateLimitError as e:
+        # 429 fallback to Hugging Face
+        fallback_prompt = f"Translate to SQL over `sap({schema})`:\n{question}"
+        sql = hf_gen(fallback_prompt, max_length=128)[0]["generated_text"]
+    # strip ``` fences if present
     if sql.startswith("```") and sql.endswith("```"):
         sql = "\n".join(sql.splitlines()[1:-1])
     return sql
 def answer_profitability(question: str) -> str:
+    sql = generate_sql(question)
     try:
+        out_df = conn.execute(sql).df()
     except Exception as e:
+        return f"❌ SQL error:\n{e}\n\n```sql\n{sql}\n```"
+    if out_df.empty:
+        return f"No results.\n\n```sql\n{sql}\n```"
+    if out_df.shape == (1,1):
+        return str(out_df.iat[0,0])
+    return out_df.to_markdown(index=False)
 iface = gr.Interface(
     fn=answer_profitability,
+    inputs=gr.Textbox(lines=2, label="Question"),
+    outputs=gr.Textbox(lines=8, label="Answer"),
+    title="SAP Profitability Q&A",
+    description="Uses OpenAI → DuckDB, falling back to Flan-T5-Small on 429s.",
     allow_flagging="never",
 )
+if __name__=="__main__":
+    iface.launch(server_name="0.0.0.0", server_port=7860)