Spaces:
Running
Running
# app.py | |
import re | |
import gradio as gr | |
import pandas as pd | |
from transformers import pipeline | |
# 1) Load your synthetic SAP data | |
df = pd.read_csv("synthetic_profit.csv") | |
# 2) Prepare TAPAS as a fallback (optional) | |
tapas = pipeline( | |
"table-question-answering", | |
model="google/tapas-base-finetuned-wtq", | |
tokenizer="google/tapas-base-finetuned-wtq", | |
device=-1 | |
) | |
table = df.astype(str).to_dict(orient="records") | |
# 3) Mapping words β pandas methods and columns | |
OPERATIONS = { | |
"total": "sum", | |
"sum": "sum", | |
"average": "mean", | |
"mean": "mean" | |
} | |
COLUMNS = { | |
"revenue": "Revenue", | |
"cost": "Cost", | |
"profit margin": "ProfitMargin", | |
"profit": "Profit", | |
"margin": "ProfitMargin" | |
} | |
def parse_and_compute(question: str) -> str | None: | |
q = question.lower() | |
# 1) What operation? | |
op = next((OPERATIONS[k] for k in OPERATIONS if k in q), None) | |
# 2) Which column? | |
col = next((COLUMNS[k] for k in COLUMNS if k in q), None) | |
# 3) Which product? | |
prod = next((p for p in df["Product"].unique() if p.lower() in q), None) | |
# 4) Which region? (optional) | |
region = next((r for r in df["Region"].unique() if r.lower() in q), None) | |
# 5) Which year? | |
m_y = re.search(r"\b(20\d{2})\b", q) | |
year = int(m_y.group(1)) if m_y else None | |
# 6) Which quarter? | |
qtr = next((fq for fq in df["FiscalQuarter"].unique() if fq.lower() in q), None) | |
# Must have at least: op, col, prod, year, qtr | |
if None in (op, col, prod, year, qtr): | |
return None | |
# Build the mask | |
mask = ( | |
(df["Product"] == prod) & | |
(df["FiscalYear"] == year) & | |
(df["FiscalQuarter"] == qtr) | |
) | |
if region: | |
mask &= (df["Region"] == region) | |
# Compute | |
try: | |
series = df.loc[mask, col] | |
result = getattr(series, op)() | |
except Exception: | |
return None | |
# Friendly formatting | |
region_part = f" in {region}" if region else "" | |
return f"{op.capitalize()} {col} for {prod}{region_part}, {qtr} {year}: {result:.2f}" | |
def answer(question: str) -> str: | |
# 1) Try the generic parser + Pandas | |
out = parse_and_compute(question) | |
if out is not None: | |
return out | |
# 2) Fallback to TAPAS for anything else | |
try: | |
res = tapas(table=table, query=question) | |
return res.get("answer", "No answer found.") | |
except Exception as e: | |
return f"β Pipeline error:\n{e}" | |
# 4) Gradio UI | |
iface = gr.Interface( | |
fn=answer, | |
inputs=gr.Textbox(lines=2, placeholder="e.g. What is the total revenue for Product A in Q1 2024?"), | |
outputs=gr.Textbox(lines=2), | |
title="SAP Profitability Q&A", | |
description=( | |
"Generic sum/mean parsing via Pandas (region optional), " | |
"falling back to TAPAS only if the question doesn't match." | |
), | |
allow_flagging="never", | |
) | |
if __name__ == "__main__": | |
iface.launch(server_name="0.0.0.0", server_port=7860) |