import pandas as pd import numpy as np import plotly.express as px import plotly.graph_objects as go import streamlit as st from typing import Dict, List, Any class DataProcessor: def __init__(self): self.data = None self.numeric_columns = [] self.categorical_columns = [] self.date_columns = [] def load_data(self, file) -> bool: try: self.data = pd.read_csv(file) self._classify_columns() return True except Exception as e: st.error(f"Error loading data: {str(e)}") return False def _classify_columns(self): for col in self.data.columns: if pd.api.types.is_numeric_dtype(self.data[col]): self.numeric_columns.append(col) elif pd.api.types.is_datetime64_any_dtype(self.data[col]): self.date_columns.append(col) else: try: pd.to_datetime(self.data[col]) self.date_columns.append(col) except: self.categorical_columns.append(col) def get_basic_stats(self) -> Dict[str, Any]: if self.data is None: return {} stats = { 'summary': self.data[self.numeric_columns].describe(), 'missing_values': self.data.isnull().sum(), 'row_count': len(self.data), 'column_count': len(self.data.columns) } return stats def create_visualization(self, chart_type: str, x_col: str, y_col: str, color_col: str = None) -> go.Figure: if chart_type == "Line Plot": fig = px.line(self.data, x=x_col, y=y_col, color=color_col) elif chart_type == "Bar Plot": fig = px.bar(self.data, x=x_col, y=y_col, color=color_col) elif chart_type == "Scatter Plot": fig = px.scatter(self.data, x=x_col, y=y_col, color=color_col) elif chart_type == "Box Plot": fig = px.box(self.data, x=x_col, y=y_col, color=color_col) else: fig = px.histogram(self.data, x=x_col, color=color_col) return fig