Spaces:
Runtime error
Runtime error
| #import libraries | |
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| from sklearn.model_selection import train_test_split, GridSearchCV | |
| from sklearn.ensemble import RandomForestRegressor | |
| from sklearn.metrics import mean_squared_error, r2_score | |
| #importing the dataset | |
| df= pd.read_csv('clean_water_requirement_data.csv') | |
| print(df.shape) | |
| #finding the categorical and numerical columns | |
| categorical_columns = df.select_dtypes(include=['object', 'category']).columns | |
| numerical_columns = df.select_dtypes(include=['float64', 'int64']).columns | |
| #encoding the categorical variables | |
| from sklearn.preprocessing import LabelEncoder | |
| label_encoder = LabelEncoder() | |
| for col in categorical_columns: | |
| df[col] = label_encoder.fit_transform(df[col]) | |
| #separating input and target features in dataset | |
| X = df.drop(columns=['water_requirement']) | |
| y=df.iloc[:,-1] | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| from sklearn.preprocessing import StandardScaler | |
| # Apply StandardScaler | |
| scaler = StandardScaler() | |
| X_train = scaler.fit_transform(X_train) | |
| X_test = scaler.transform(X_test) | |
| # Initialize the model with the provided best parameters | |
| model = RandomForestRegressor(max_depth=20, n_estimators=200, random_state=42) | |
| # Fit the model | |
| model.fit(X_train, y_train) | |
| # Predictions | |
| y_pred = model.predict(X_test) | |
| # Calculate performance metrics | |
| mse = mean_squared_error(y_test, y_pred) | |
| mae = mean_absolute_error(y_test, y_pred) | |
| r2 = r2_score(y_test, y_pred) | |
| # Save the model as a .pkl file using joblib | |
| model_filename = 'random_forest_regressor_model.pkl' | |
| joblib.dump(model, model_filename) |