PD03 commited on
Commit
aa97025
·
verified ·
1 Parent(s): 60fddfe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -41
app.py CHANGED
@@ -1,55 +1,45 @@
1
- import pandas as pd
2
  import gradio as gr
3
- from transformers import pipeline
4
- from langchain_community.llms import HuggingFacePipeline
5
- from langchain_experimental.agents import create_pandas_dataframe_agent
6
- from langchain.agents.agent_types import AgentType
7
 
8
- # Load data
9
- df = pd.read_csv("synthetic_profit.csv")
10
 
11
- # Lightweight Hugging Face pipeline (Flan-T5-base)
12
- hf_pipeline = pipeline(
13
- task="text2text-generation",
14
- model="google/flan-t5-base",
15
- device=-1 # CPU
16
- )
17
 
18
- # LangChain LLM
19
- llm = HuggingFacePipeline(pipeline=hf_pipeline)
20
 
21
- # Create LangChain agent with explicit parsing-error handling
22
- agent = create_pandas_dataframe_agent(
23
- llm,
24
- df,
25
- verbose=True,
26
- agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
27
- handle_parsing_errors=True,
28
- allow_dangerous_code=True
29
  )
30
 
31
- # Answer query function with clearer prompts
32
- def answer(query: str) -> str:
33
  try:
34
- prompt = (
35
- f"Answer this clearly and numerically without scientific notation: {query}. "
36
- "If multiple numbers, provide their total sum clearly."
37
- )
38
- response = agent.run(prompt)
39
- return f"📊 {response}"
40
  except Exception as e:
41
- return f"Error: {str(e)}"
42
 
43
  # Gradio interface
44
- demo = gr.Interface(
45
- fn=answer,
46
- inputs=gr.Textbox(
47
- lines=2,
48
- placeholder="E.g., 'Total revenue for Product B in EMEA during Q2 2024'"
49
- ),
50
  outputs="text",
51
- title="🟢 SAP Profitability Data Chat (Flan-T5 + Pandas)",
52
- description="Ask clearly numeric questions about synthetic SAP profitability data. Results are precise and human-readable."
 
 
 
53
  )
54
 
55
- demo.launch()
 
 
1
+ # app.py
2
  import gradio as gr
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
4
+ import pandas as pd
 
 
5
 
6
+ # Load your synthetic profitability dataset
7
+ df = pd.read_csv('synthetic_profit.csv')
8
 
9
+ # Initialize the TAPEX small model fine-tuned on WikiSQL
10
+ MODEL_ID = "microsoft/tapex-small-finetuned-wikisql"
 
 
 
 
11
 
12
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
13
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID)
14
 
15
+ # Build a table-QA pipeline
16
+ table_qa = pipeline(
17
+ "table-question-answering",
18
+ model=model,
19
+ tokenizer=tokenizer,
20
+ framework="pt",
21
+ device=-1 # set to 0 if you enable GPU in your Space
 
22
  )
23
 
24
+ def answer_profitability(question):
25
+ table = df.to_dict(orient="records")
26
  try:
27
+ out = table_qa(table=table, query=question)
28
+ return out.get("answer", "No answer found.")
 
 
 
 
29
  except Exception as e:
30
+ return f"Error: {e}"
31
 
32
  # Gradio interface
33
+ iface = gr.Interface(
34
+ fn=answer_profitability,
35
+ inputs=gr.Textbox(lines=2, placeholder="Ask a question about profitability…"),
 
 
 
36
  outputs="text",
37
+ title="SAP Profitability Q&A (TAPEX-Small)",
38
+ description="""
39
+ Ask free-form questions on the synthetic profitability dataset.
40
+ Powered end-to-end by microsoft/tapex-small-finetuned-wikisql.
41
+ """
42
  )
43
 
44
+ if __name__ == "__main__":
45
+ iface.launch(server_name="0.0.0.0", server_port=7860)