import pandas as pd import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report import io def predict_engagement(file): """ Predicts engagement levels from uploaded student logs CSV. Assumes a binary 'Engaged' column: 1 = Engaged, 0 = Not Engaged. Parameters: file (file-like): CSV file uploaded by user Returns: str: Prediction summary and performance metrics (if labeled) """ try: df = pd.read_csv(file) if 'Engaged' not in df.columns: return "❌ CSV must include a binary column named 'Engaged' (1 or 0)." # Separate features and labels X = df.drop(columns=['Engaged']) y = df['Engaged'] # Clean non-numeric columns X = X.select_dtypes(include=[np.number]) # Train/test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train model model = RandomForestClassifier() model.fit(X_train, y_train) # Predict predictions = model.predict(X_test) report = classification_report(y_test, predictions, target_names=["Not Engaged", "Engaged"]) return "📊 Engagement Prediction Report:\n\n" + report except Exception as e: return f"Error during prediction: {str(e)}"