# app.py import re import gradio as gr import pandas as pd from transformers import pipeline # 1) Load your synthetic SAP data df = pd.read_csv("synthetic_profit.csv") # 2) Prepare TAPAS as a fallback (optional) tapas = pipeline( "table-question-answering", model="google/tapas-base-finetuned-wtq", tokenizer="google/tapas-base-finetuned-wtq", device=-1 ) table = df.astype(str).to_dict(orient="records") # 3) Mapping words → pandas methods and columns OPERATIONS = { "total": "sum", "sum": "sum", "average": "mean", "mean": "mean" } COLUMNS = { "revenue": "Revenue", "cost": "Cost", "profit margin": "ProfitMargin", "profit": "Profit", "margin": "ProfitMargin" } def parse_and_compute(question: str) -> str | None: q = question.lower() # 1) What operation? op = next((OPERATIONS[k] for k in OPERATIONS if k in q), None) # 2) Which column? col = next((COLUMNS[k] for k in COLUMNS if k in q), None) # 3) Which product? prod = next((p for p in df["Product"].unique() if p.lower() in q), None) # 4) Which region? (optional) region = next((r for r in df["Region"].unique() if r.lower() in q), None) # 5) Which year? m_y = re.search(r"\b(20\d{2})\b", q) year = int(m_y.group(1)) if m_y else None # 6) Which quarter? qtr = next((fq for fq in df["FiscalQuarter"].unique() if fq.lower() in q), None) # Must have at least: op, col, prod, year, qtr if None in (op, col, prod, year, qtr): return None # Build the mask mask = ( (df["Product"] == prod) & (df["FiscalYear"] == year) & (df["FiscalQuarter"] == qtr) ) if region: mask &= (df["Region"] == region) # Compute try: series = df.loc[mask, col] result = getattr(series, op)() except Exception: return None # Friendly formatting region_part = f" in {region}" if region else "" return f"{op.capitalize()} {col} for {prod}{region_part}, {qtr} {year}: {result:.2f}" def answer(question: str) -> str: # 1) Try the generic parser + Pandas out = parse_and_compute(question) if out is not None: return out # 2) Fallback to TAPAS for anything else try: res = tapas(table=table, query=question) return res.get("answer", "No answer found.") except Exception as e: return f"❌ Pipeline error:\n{e}" # 4) Gradio UI iface = gr.Interface( fn=answer, inputs=gr.Textbox(lines=2, placeholder="e.g. What is the total revenue for Product A in Q1 2024?"), outputs=gr.Textbox(lines=2), title="SAP Profitability Q&A", description=( "Generic sum/mean parsing via Pandas (region optional), " "falling back to TAPAS only if the question doesn't match." ), allow_flagging="never", ) if __name__ == "__main__": iface.launch(server_name="0.0.0.0", server_port=7860)