Spaces:
Running
Running
import pandas as pd | |
import numpy as np | |
import plotly.express as px | |
import plotly.graph_objects as go | |
import streamlit as st | |
from typing import Dict, List, Any | |
class DataProcessor: | |
def __init__(self): | |
self.data = None | |
self.numeric_columns = [] | |
self.categorical_columns = [] | |
self.date_columns = [] | |
def load_data(self, file) -> bool: | |
try: | |
self.data = pd.read_csv(file) | |
self._classify_columns() | |
return True | |
except Exception as e: | |
st.error(f"Error loading data: {str(e)}") | |
return False | |
def _classify_columns(self): | |
for col in self.data.columns: | |
if pd.api.types.is_numeric_dtype(self.data[col]): | |
self.numeric_columns.append(col) | |
elif pd.api.types.is_datetime64_any_dtype(self.data[col]): | |
self.date_columns.append(col) | |
else: | |
try: | |
pd.to_datetime(self.data[col]) | |
self.date_columns.append(col) | |
except: | |
self.categorical_columns.append(col) | |
def get_basic_stats(self) -> Dict[str, Any]: | |
if self.data is None: | |
return {} | |
stats = { | |
'summary': self.data[self.numeric_columns].describe(), | |
'missing_values': self.data.isnull().sum(), | |
'row_count': len(self.data), | |
'column_count': len(self.data.columns) | |
} | |
return stats | |
def create_visualization(self, chart_type: str, x_col: str, y_col: str, color_col: str = None) -> go.Figure: | |
if chart_type == "Line Plot": | |
fig = px.line(self.data, x=x_col, y=y_col, color=color_col) | |
elif chart_type == "Bar Plot": | |
fig = px.bar(self.data, x=x_col, y=y_col, color=color_col) | |
elif chart_type == "Scatter Plot": | |
fig = px.scatter(self.data, x=x_col, y=y_col, color=color_col) | |
elif chart_type == "Box Plot": | |
fig = px.box(self.data, x=x_col, y=y_col, color=color_col) | |
else: | |
fig = px.histogram(self.data, x=x_col, color=color_col) | |
return fig |