Spaces:

PD03
/

talk_to_data

Running

File size: 2,954 Bytes

dfe31fe
 
5edc373
a5ece8b
aa97025
25e4074
887b999
5edc373
0b8ba87
25e4074
5edc373
 
25e4074
 
 
e2b2add
0b8ba87
5edc373
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02d55fb
5edc373
dfe31fe
5edc373
 
dfe31fe
 
 
 
e784f1e
5edc373
 
dfe31fe
25e4074
dfe31fe
5edc373
 
dfe31fe
e784f1e
 
79c9d08
b1f2bdd
dfe31fe

# app.py

import re
import gradio as gr
import pandas as pd
from transformers import pipeline

# 1) Load your synthetic SAP data
df = pd.read_csv("synthetic_profit.csv")

# 2) Prepare TAPAS as a fallback (optional)
tapas = pipeline(
    "table-question-answering",
    model="google/tapas-base-finetuned-wtq",
    tokenizer="google/tapas-base-finetuned-wtq",
    device=-1
)
table = df.astype(str).to_dict(orient="records")

# 3) Mapping words → pandas methods and columns
OPERATIONS = {
    "total": "sum",
    "sum":   "sum",
    "average": "mean",
    "mean":    "mean"
}
COLUMNS = {
    "revenue":       "Revenue",
    "cost":          "Cost",
    "profit margin": "ProfitMargin",
    "profit":        "Profit",
    "margin":        "ProfitMargin"
}

def parse_and_compute(question: str) -> str | None:
    q = question.lower()

    # 1) What operation?
    op = next((OPERATIONS[k] for k in OPERATIONS if k in q), None)
    # 2) Which column?
    col = next((COLUMNS[k] for k in COLUMNS if k in q), None)
    # 3) Which product?
    prod = next((p for p in df["Product"].unique() if p.lower() in q), None)
    # 4) Which region? (optional)
    region = next((r for r in df["Region"].unique() if r.lower() in q), None)
    # 5) Which year?
    m_y = re.search(r"\b(20\d{2})\b", q)
    year = int(m_y.group(1)) if m_y else None
    # 6) Which quarter?
    qtr = next((fq for fq in df["FiscalQuarter"].unique() if fq.lower() in q), None)

    # Must have at least: op, col, prod, year, qtr
    if None in (op, col, prod, year, qtr):
        return None

    # Build the mask
    mask = (
        (df["Product"] == prod) &
        (df["FiscalYear"] == year) &
        (df["FiscalQuarter"] == qtr)
    )
    if region:
        mask &= (df["Region"] == region)

    # Compute
    try:
        series = df.loc[mask, col]
        result = getattr(series, op)()
    except Exception:
        return None

    # Friendly formatting
    region_part = f" in {region}" if region else ""
    return f"{op.capitalize()} {col} for {prod}{region_part}, {qtr} {year}: {result:.2f}"

def answer(question: str) -> str:
    # 1) Try the generic parser + Pandas
    out = parse_and_compute(question)
    if out is not None:
        return out

    # 2) Fallback to TAPAS for anything else
    try:
        res = tapas(table=table, query=question)
        return res.get("answer", "No answer found.")
    except Exception as e:
        return f"❌ Pipeline error:\n{e}"

# 4) Gradio UI
iface = gr.Interface(
    fn=answer,
    inputs=gr.Textbox(lines=2, placeholder="e.g. What is the total revenue for Product A in Q1 2024?"),
    outputs=gr.Textbox(lines=2),
    title="SAP Profitability Q&A",
    description=(
        "Generic sum/mean parsing via Pandas (region optional), "
        "falling back to TAPAS only if the question doesn't match."
    ),
    allow_flagging="never",
)

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)