Spaces:

adityamanwatkar
/

optimal_water_usage_prediction

Runtime error

App Files Files Community

optimal_water_usage_prediction / main.py

adityamanwatkar

Upload 5 files

a246dee verified 10 months ago

raw

history blame

1.72 kB

	#import libraries
	import pandas as pd
	import numpy as np
	import joblib
	import warnings
	warnings.filterwarnings('ignore')
	from sklearn.model_selection import train_test_split, GridSearchCV
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.metrics import mean_squared_error, r2_score
	#importing the dataset
	df= pd.read_csv('clean_water_requirement_data.csv')
	print(df.shape)

	#finding the categorical and numerical columns
	categorical_columns = df.select_dtypes(include=['object', 'category']).columns
	numerical_columns = df.select_dtypes(include=['float64', 'int64']).columns

	#encoding the categorical variables
	from sklearn.preprocessing import LabelEncoder
	label_encoder = LabelEncoder()
	for col in categorical_columns:
	df[col] = label_encoder.fit_transform(df[col])

	#separating input and target features in dataset
	X = df.drop(columns=['water_requirement'])
	y=df.iloc[:,-1]

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	from sklearn.preprocessing import StandardScaler

	# Apply StandardScaler
	scaler = StandardScaler()
	X_train = scaler.fit_transform(X_train)
	X_test = scaler.transform(X_test)

	# Initialize the model with the provided best parameters
	model = RandomForestRegressor(max_depth=20, n_estimators=200, random_state=42)

	# Fit the model
	model.fit(X_train, y_train)

	# Predictions
	y_pred = model.predict(X_test)

	# Calculate performance metrics
	mse = mean_squared_error(y_test, y_pred)
	mae = mean_absolute_error(y_test, y_pred)
	r2 = r2_score(y_test, y_pred)

	# Save the model as a .pkl file using joblib
	model_filename = 'random_forest_regressor_model.pkl'
	joblib.dump(model, model_filename)