Pranav0111 commited on
Commit
c1c9488
·
verified ·
1 Parent(s): 74daaf8

Update data_processor.py

Browse files
Files changed (1) hide show
  1. data_processor.py +6 -84
data_processor.py CHANGED
@@ -1,8 +1,10 @@
1
  import pandas as pd
 
2
  import plotly.express as px
3
  import plotly.graph_objects as go
4
- from typing import List, Dict, Any
5
  import streamlit as st
 
 
6
 
7
  class DataProcessor:
8
  def __init__(self):
@@ -12,7 +14,6 @@ class DataProcessor:
12
  self.date_columns = []
13
 
14
  def load_data(self, file) -> bool:
15
- """Load and validate CSV data"""
16
  try:
17
  self.data = pd.read_csv(file)
18
  self._classify_columns()
@@ -22,7 +23,6 @@ class DataProcessor:
22
  return False
23
 
24
  def _classify_columns(self):
25
- """Classify columns into numeric, categorical, and date types"""
26
  for col in self.data.columns:
27
  if pd.api.types.is_numeric_dtype(self.data[col]):
28
  self.numeric_columns.append(col)
@@ -34,9 +34,8 @@ class DataProcessor:
34
  self.date_columns.append(col)
35
  except:
36
  self.categorical_columns.append(col)
37
-
38
  def get_basic_stats(self) -> Dict[str, Any]:
39
- """Calculate basic statistics for numeric columns"""
40
  if self.data is None:
41
  return {}
42
 
@@ -47,9 +46,8 @@ class DataProcessor:
47
  'column_count': len(self.data.columns)
48
  }
49
  return stats
50
-
51
  def create_visualization(self, chart_type: str, x_col: str, y_col: str, color_col: str = None) -> go.Figure:
52
- """Create different types of visualizations based on user selection"""
53
  if chart_type == "Line Plot":
54
  fig = px.line(self.data, x=x_col, y=y_col, color=color_col)
55
  elif chart_type == "Bar Plot":
@@ -61,80 +59,4 @@ class DataProcessor:
61
  else:
62
  fig = px.histogram(self.data, x=x_col, color=color_col)
63
 
64
- return fig
65
-
66
- def calculate_metrics(self, column: str) -> Dict[str, float]:
67
- """Calculate key metrics for a selected column"""
68
- if column not in self.numeric_columns:
69
- return {}
70
-
71
- metrics = {
72
- 'mean': self.data[column].mean(),
73
- 'median': self.data[column].median(),
74
- 'std': self.data[column].std(),
75
- 'min': self.data[column].min(),
76
- 'max': self.data[column].max(),
77
- 'skew': self.data[column].skew()
78
- }
79
- return metrics
80
-
81
- def render_analytics_page():
82
- st.title("Data Analytics Dashboard")
83
-
84
- # Initialize data processor
85
- processor = DataProcessor()
86
-
87
- # File upload
88
- uploaded_file = st.file_uploader("Upload your CSV data", type=['csv'])
89
- if uploaded_file is not None:
90
- if processor.load_data(uploaded_file):
91
- st.success("Data loaded successfully!")
92
-
93
- # Data Preview
94
- st.subheader("Data Preview")
95
- st.dataframe(processor.data.head())
96
-
97
- # Basic Stats
98
- st.subheader("Basic Statistics")
99
- stats = processor.get_basic_stats()
100
- st.write(stats['summary'])
101
-
102
- # Visualization Section
103
- st.subheader("Create Visualization")
104
- col1, col2, col3 = st.columns(3)
105
-
106
- with col1:
107
- chart_type = st.selectbox(
108
- "Select Chart Type",
109
- ["Line Plot", "Bar Plot", "Scatter Plot", "Box Plot", "Histogram"]
110
- )
111
-
112
- with col2:
113
- x_col = st.selectbox("Select X-axis", processor.data.columns)
114
-
115
- with col3:
116
- y_col = st.selectbox("Select Y-axis", processor.numeric_columns) if chart_type != "Histogram" else None
117
-
118
- color_col = st.selectbox("Select Color Variable (optional)",
119
- ['None'] + processor.categorical_columns)
120
- color_col = None if color_col == 'None' else color_col
121
-
122
- # Generate and display visualization
123
- fig = processor.create_visualization(
124
- chart_type,
125
- x_col,
126
- y_col if y_col else x_col,
127
- color_col
128
- )
129
- st.plotly_chart(fig, use_container_width=True)
130
-
131
- # Metrics Calculator
132
- st.subheader("Metric Calculator")
133
- metric_col = st.selectbox("Select column for metrics", processor.numeric_columns)
134
- metrics = processor.calculate_metrics(metric_col)
135
-
136
- # Display metrics in columns
137
- cols = st.columns(3)
138
- for i, (metric, value) in enumerate(metrics.items()):
139
- with cols[i % 3]:
140
- st.metric(label=metric.capitalize(), value=f"{value:.2f}")
 
1
  import pandas as pd
2
+ import numpy as np
3
  import plotly.express as px
4
  import plotly.graph_objects as go
 
5
  import streamlit as st
6
+ from typing import Dict, List, Any
7
+
8
 
9
  class DataProcessor:
10
  def __init__(self):
 
14
  self.date_columns = []
15
 
16
  def load_data(self, file) -> bool:
 
17
  try:
18
  self.data = pd.read_csv(file)
19
  self._classify_columns()
 
23
  return False
24
 
25
  def _classify_columns(self):
 
26
  for col in self.data.columns:
27
  if pd.api.types.is_numeric_dtype(self.data[col]):
28
  self.numeric_columns.append(col)
 
34
  self.date_columns.append(col)
35
  except:
36
  self.categorical_columns.append(col)
37
+
38
  def get_basic_stats(self) -> Dict[str, Any]:
 
39
  if self.data is None:
40
  return {}
41
 
 
46
  'column_count': len(self.data.columns)
47
  }
48
  return stats
49
+
50
  def create_visualization(self, chart_type: str, x_col: str, y_col: str, color_col: str = None) -> go.Figure:
 
51
  if chart_type == "Line Plot":
52
  fig = px.line(self.data, x=x_col, y=y_col, color=color_col)
53
  elif chart_type == "Bar Plot":
 
59
  else:
60
  fig = px.histogram(self.data, x=x_col, color=color_col)
61
 
62
+ return fig