|
import pandas as pd |
|
import numpy as np |
|
from sklearn.ensemble import RandomForestClassifier |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.metrics import classification_report |
|
import io |
|
|
|
def predict_engagement(file): |
|
""" |
|
Predicts engagement levels from uploaded student logs CSV. |
|
Assumes a binary 'Engaged' column: 1 = Engaged, 0 = Not Engaged. |
|
|
|
Parameters: |
|
file (file-like): CSV file uploaded by user |
|
|
|
Returns: |
|
str: Prediction summary and performance metrics (if labeled) |
|
""" |
|
try: |
|
df = pd.read_csv(file) |
|
|
|
if 'Engaged' not in df.columns: |
|
return "β CSV must include a binary column named 'Engaged' (1 or 0)." |
|
|
|
|
|
X = df.drop(columns=['Engaged']) |
|
y = df['Engaged'] |
|
|
|
|
|
X = X.select_dtypes(include=[np.number]) |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
model = RandomForestClassifier() |
|
model.fit(X_train, y_train) |
|
|
|
|
|
predictions = model.predict(X_test) |
|
report = classification_report(y_test, predictions, target_names=["Not Engaged", "Engaged"]) |
|
|
|
return "π Engagement Prediction Report:\n\n" + report |
|
|
|
except Exception as e: |
|
return f"Error during prediction: {str(e)}" |
|
|