Prospea / data_processor.py
Pranav0111's picture
Update data_processor.py
c1c9488 verified
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
from typing import Dict, List, Any
class DataProcessor:
def __init__(self):
self.data = None
self.numeric_columns = []
self.categorical_columns = []
self.date_columns = []
def load_data(self, file) -> bool:
try:
self.data = pd.read_csv(file)
self._classify_columns()
return True
except Exception as e:
st.error(f"Error loading data: {str(e)}")
return False
def _classify_columns(self):
for col in self.data.columns:
if pd.api.types.is_numeric_dtype(self.data[col]):
self.numeric_columns.append(col)
elif pd.api.types.is_datetime64_any_dtype(self.data[col]):
self.date_columns.append(col)
else:
try:
pd.to_datetime(self.data[col])
self.date_columns.append(col)
except:
self.categorical_columns.append(col)
def get_basic_stats(self) -> Dict[str, Any]:
if self.data is None:
return {}
stats = {
'summary': self.data[self.numeric_columns].describe(),
'missing_values': self.data.isnull().sum(),
'row_count': len(self.data),
'column_count': len(self.data.columns)
}
return stats
def create_visualization(self, chart_type: str, x_col: str, y_col: str, color_col: str = None) -> go.Figure:
if chart_type == "Line Plot":
fig = px.line(self.data, x=x_col, y=y_col, color=color_col)
elif chart_type == "Bar Plot":
fig = px.bar(self.data, x=x_col, y=y_col, color=color_col)
elif chart_type == "Scatter Plot":
fig = px.scatter(self.data, x=x_col, y=y_col, color=color_col)
elif chart_type == "Box Plot":
fig = px.box(self.data, x=x_col, y=y_col, color=color_col)
else:
fig = px.histogram(self.data, x=x_col, color=color_col)
return fig