Finished Model Evaluation Metrics
This commit is contained in:
@@ -0,0 +1,143 @@
|
||||
from sklearn.datasets import load_boston
|
||||
from sklearn.model_selection import train_test_split
|
||||
import numpy as np
|
||||
import tests2 as t
|
||||
from sklearn.tree import DecisionTreeRegressor
|
||||
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
|
||||
|
||||
boston = load_boston()
|
||||
y = boston.target
|
||||
X = boston.data
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.33, random_state=42)
|
||||
|
||||
|
||||
dec_tree = DecisionTreeRegressor()
|
||||
ran_for = RandomForestRegressor()
|
||||
ada = AdaBoostRegressor()
|
||||
lin_reg = LinearRegression()
|
||||
|
||||
|
||||
dec_tree.fit(X_train, y_train)
|
||||
ran_for.fit(X_train, y_train)
|
||||
ada.fit(X_train, y_train)
|
||||
lin_reg.fit(X_train, y_train)
|
||||
|
||||
|
||||
dec_pred = dec_tree.predict(X_test)
|
||||
ran_pred = ran_for.predict(X_test)
|
||||
ada_pred = ada.predict(X_test)
|
||||
lin_pred = lin_reg.predict(X_test)
|
||||
|
||||
|
||||
# potential model options
|
||||
a = 'regression'
|
||||
b = 'classification'
|
||||
c = 'both regression and classification'
|
||||
|
||||
metrics_dict = {
|
||||
'precision': b,
|
||||
'recall': b,
|
||||
'accuracy': b,
|
||||
'r2_score': a,
|
||||
'mean_squared_error': a,
|
||||
'area_under_curve': b,
|
||||
'mean_absolute_area': a
|
||||
}
|
||||
|
||||
# checks your answer, no need to change this code
|
||||
t.q6_check(metrics_dict)
|
||||
print()
|
||||
|
||||
models = {'dec_pred': dec_pred, 'ran_pred': ran_pred, 'ada_pred': ada_pred,
|
||||
'lin_pred': lin_pred}
|
||||
metrics = [r2_score, mean_squared_error, mean_absolute_error]
|
||||
|
||||
|
||||
# Check r2
|
||||
def r2(actual, preds):
|
||||
'''
|
||||
INPUT:
|
||||
actual - numpy array or pd series of actual y values
|
||||
preds - numpy array or pd series of predicted y values
|
||||
OUTPUT:
|
||||
returns the r-squared score as a float
|
||||
'''
|
||||
sse = np.sum((actual - preds)**2)
|
||||
sst = np.sum((actual - np.mean(actual))**2)
|
||||
return 1 - sse / sst
|
||||
|
||||
|
||||
for i in models:
|
||||
print(f'r2 manual for {i} is {r2(y_test, models[i]):.4f}')
|
||||
print(f'r2 sklearn for {i} is {r2_score(y_test, models[i]):.4f}')
|
||||
print()
|
||||
# Check solution matches sklearn
|
||||
|
||||
|
||||
def mse(actual, preds):
|
||||
'''
|
||||
INPUT:
|
||||
actual - numpy array or pd series of actual y values
|
||||
preds - numpy array or pd series of predicted y values
|
||||
OUTPUT:
|
||||
returns the mean squared error as a float
|
||||
'''
|
||||
|
||||
return np.sum((actual - preds)**2) / len(actual)
|
||||
|
||||
|
||||
# Check your solution matches sklearn
|
||||
for i in models:
|
||||
print(f'mse manual for {i} is {mse(y_test, models[i]):.4f}')
|
||||
print(f'mse sklearn for {i} is'
|
||||
f' {mean_squared_error(y_test, models[i]):.4f}')
|
||||
print()
|
||||
|
||||
|
||||
def mae(actual, preds):
|
||||
'''
|
||||
INPUT:
|
||||
actual - numpy array or pd series of actual y values
|
||||
preds - numpy array or pd series of predicted y values
|
||||
OUTPUT:
|
||||
returns the mean absolute error as a float
|
||||
'''
|
||||
|
||||
return np.sum(np.abs(actual - preds)) / len(actual)
|
||||
|
||||
|
||||
# Check your solution matches sklearn
|
||||
for i in models:
|
||||
print(f'mae manual for {i} is {mae(y_test, models[i]):.4f}')
|
||||
print(f'mae sklearn for {i} is'
|
||||
f' {mean_absolute_error(y_test, models[i]):.4f}')
|
||||
print()
|
||||
|
||||
print('=================')
|
||||
print('Comparison of all models:\n')
|
||||
for i in models:
|
||||
for j in range(len(metrics)):
|
||||
print(f'{metrics[j].__name__} for '
|
||||
f'{i} {metrics[j](y_test, models[i]):.4f}')
|
||||
print()
|
||||
|
||||
|
||||
# match each metric to the model that performed best on it
|
||||
a = 'decision tree'
|
||||
b = 'random forest'
|
||||
c = 'adaptive boosting'
|
||||
d = 'linear regression'
|
||||
|
||||
|
||||
best_fit = {
|
||||
'mse': b,
|
||||
'r2': b,
|
||||
'mae': b
|
||||
}
|
||||
|
||||
# Tests your answer - don't change this code
|
||||
t.check_ten(best_fit)
|
||||
Reference in New Issue
Block a user