File size: 4,620 Bytes
4421377
67ac5b4
 
 
 
4421377
67ac5b4
 
 
 
4421377
67ac5b4
4421377
67ac5b4
 
4421377
 
 
67ac5b4
 
 
4421377
67ac5b4
 
4421377
67ac5b4
 
4421377
67ac5b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4421377
67ac5b4
 
 
 
4421377
67ac5b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4421377
67ac5b4
 
4421377
67ac5b4
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import streamlit as st
import pandas as pd
import os
from datetime import datetime
import google.generativeai as genai

# Configure Gemini
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
genai.configure(api_key=GEMINI_API_KEY)
model = genai.GenerativeModel('gemini-pro')

class GeminiDataChatbot:
    def __init__(self):
        if 'uploaded_df' not in st.session_state:
            st.session_state.uploaded_df = None
        if 'chat_history' not in st.session_state:
            st.session_state.chat_history = []
    
    def render_interface(self):
        st.title("πŸ“Š Data Analysis Chatbot")
        st.write("Upload your CSV file and ask questions about your data")
        
        # File upload section
        uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
        
        if uploaded_file is not None:
            self._process_uploaded_file(uploaded_file)
        
        # Chat interface
        if st.session_state.uploaded_df is not None:
            self._render_chat_window()
    
    def _process_uploaded_file(self, uploaded_file):
        try:
            df = pd.read_csv(uploaded_file)
            st.session_state.uploaded_df = df
            st.success("Data successfully loaded!")
            
            with st.expander("View Data Preview"):
                st.dataframe(df.head())
            
            # Initial analysis prompt
            initial_prompt = f"""
            I have uploaded a dataset with {len(df)} rows and {len(df.columns)} columns.
            Columns: {', '.join(df.columns)}.
            First give a very brief (2-3 sentence) overview of what this data might contain.
            Then suggest 3 specific questions I could ask about this data.
            """
            
            with st.spinner("Analyzing your data..."):
                response = self._generate_gemini_response(initial_prompt, df)
                st.session_state.chat_history.append({
                    "role": "assistant", 
                    "content": response
                })
                
        except Exception as e:
            st.error(f"Error processing file: {str(e)}")
    
    def _render_chat_window(self):
        st.subheader("Chat About Your Data")
        
        # Display chat history
        for message in st.session_state.chat_history:
            with st.chat_message(message["role"]):
                st.markdown(message["content"])
        
        # User input
        if prompt := st.chat_input("Ask about your data..."):
            # Add user message to chat history
            st.session_state.chat_history.append({"role": "user", "content": prompt})
            
            # Display user message
            with st.chat_message("user"):
                st.markdown(prompt)
            
            # Generate and display assistant response
            with st.chat_message("assistant"):
                with st.spinner("Thinking..."):
                    response = self._generate_gemini_response(prompt, st.session_state.uploaded_df)
                    st.markdown(response)
            
            # Add assistant response to chat history
            st.session_state.chat_history.append({"role": "assistant", "content": response})
    
    def _generate_gemini_response(self, prompt: str, df: pd.DataFrame) -> str:
        """Generate response using Gemini API with data context"""
        try:
            # Create data summary for context
            data_summary = f"""
            Data Summary:
            - Shape: {df.shape}
            - Columns: {', '.join(df.columns)}
            - First 5 rows:
            {df.head().to_markdown()}
            """
            
            # Create prompt with context
            full_prompt = f"""
            You are a data analysis assistant. The user has uploaded a dataset with the following characteristics:
            {data_summary}
            
            User Question: {prompt}
            
            Provide a detailed response answering their question about the data. If appropriate, include:
            - Relevant statistics
            - Potential visualizations that would help
            - Any data quality issues to consider
            - Business insights if applicable
            """
            
            response = model.generate_content(full_prompt)
            return response.text
        
        except Exception as e:
            return f"Sorry, I encountered an error processing your request: {str(e)}"

# Initialize and run the chatbot
if __name__ == "__main__":
    chatbot = GeminiDataChatbot()
    chatbot.render_interface()