Spaces:
Running
Running
File size: 2,954 Bytes
dfe31fe 5edc373 a5ece8b aa97025 25e4074 887b999 5edc373 0b8ba87 25e4074 5edc373 25e4074 e2b2add 0b8ba87 5edc373 02d55fb 5edc373 dfe31fe 5edc373 dfe31fe e784f1e 5edc373 dfe31fe 25e4074 dfe31fe 5edc373 dfe31fe e784f1e 79c9d08 b1f2bdd dfe31fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# app.py
import re
import gradio as gr
import pandas as pd
from transformers import pipeline
# 1) Load your synthetic SAP data
df = pd.read_csv("synthetic_profit.csv")
# 2) Prepare TAPAS as a fallback (optional)
tapas = pipeline(
"table-question-answering",
model="google/tapas-base-finetuned-wtq",
tokenizer="google/tapas-base-finetuned-wtq",
device=-1
)
table = df.astype(str).to_dict(orient="records")
# 3) Mapping words β pandas methods and columns
OPERATIONS = {
"total": "sum",
"sum": "sum",
"average": "mean",
"mean": "mean"
}
COLUMNS = {
"revenue": "Revenue",
"cost": "Cost",
"profit margin": "ProfitMargin",
"profit": "Profit",
"margin": "ProfitMargin"
}
def parse_and_compute(question: str) -> str | None:
q = question.lower()
# 1) What operation?
op = next((OPERATIONS[k] for k in OPERATIONS if k in q), None)
# 2) Which column?
col = next((COLUMNS[k] for k in COLUMNS if k in q), None)
# 3) Which product?
prod = next((p for p in df["Product"].unique() if p.lower() in q), None)
# 4) Which region? (optional)
region = next((r for r in df["Region"].unique() if r.lower() in q), None)
# 5) Which year?
m_y = re.search(r"\b(20\d{2})\b", q)
year = int(m_y.group(1)) if m_y else None
# 6) Which quarter?
qtr = next((fq for fq in df["FiscalQuarter"].unique() if fq.lower() in q), None)
# Must have at least: op, col, prod, year, qtr
if None in (op, col, prod, year, qtr):
return None
# Build the mask
mask = (
(df["Product"] == prod) &
(df["FiscalYear"] == year) &
(df["FiscalQuarter"] == qtr)
)
if region:
mask &= (df["Region"] == region)
# Compute
try:
series = df.loc[mask, col]
result = getattr(series, op)()
except Exception:
return None
# Friendly formatting
region_part = f" in {region}" if region else ""
return f"{op.capitalize()} {col} for {prod}{region_part}, {qtr} {year}: {result:.2f}"
def answer(question: str) -> str:
# 1) Try the generic parser + Pandas
out = parse_and_compute(question)
if out is not None:
return out
# 2) Fallback to TAPAS for anything else
try:
res = tapas(table=table, query=question)
return res.get("answer", "No answer found.")
except Exception as e:
return f"β Pipeline error:\n{e}"
# 4) Gradio UI
iface = gr.Interface(
fn=answer,
inputs=gr.Textbox(lines=2, placeholder="e.g. What is the total revenue for Product A in Q1 2024?"),
outputs=gr.Textbox(lines=2),
title="SAP Profitability Q&A",
description=(
"Generic sum/mean parsing via Pandas (region optional), "
"falling back to TAPAS only if the question doesn't match."
),
allow_flagging="never",
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860) |