Finished Model Evaluation Metrics #1
@@ -0,0 +1,239 @@
|
||||
# Import our libraries
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score
|
||||
from sklearn.metrics import f1_score, fbeta_score
|
||||
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
|
||||
from sklearn.ensemble import AdaBoostClassifier
|
||||
from sklearn.svm import SVC
|
||||
import matplotlib.pyplot as plt
|
||||
from itertools import cycle
|
||||
from sklearn.metrics import roc_curve, auc, roc_auc_score
|
||||
from scipy import interp
|
||||
# import tests as t
|
||||
|
||||
# Read in our dataset
|
||||
df = pd.read_csv('smsspamcollection_SMSSpamCollection',
|
||||
header=None,
|
||||
names=['label', 'sms_message'])
|
||||
|
||||
# Fix our response value
|
||||
df['label'] = df.label.map({'ham': 0, 'spam': 1})
|
||||
|
||||
# Split our dataset into training and testing data
|
||||
X_train, X_test, y_train, y_test = train_test_split(df['sms_message'],
|
||||
df['label'],
|
||||
random_state=1)
|
||||
|
||||
# Instantiate the CountVectorizer method
|
||||
count_vector = CountVectorizer()
|
||||
|
||||
# Fit the training data and then return the matrix
|
||||
training_data = count_vector.fit_transform(X_train)
|
||||
|
||||
# Transform testing data and return the matrix. Note we are not fitting the
|
||||
# testing data into the CountVectorizer()
|
||||
testing_data = count_vector.transform(X_test)
|
||||
|
||||
# Instantiate a number of our models
|
||||
naive_bayes = MultinomialNB()
|
||||
bag_mod = BaggingClassifier(n_estimators=200)
|
||||
rf_mod = RandomForestClassifier(n_estimators=200)
|
||||
ada_mod = AdaBoostClassifier(n_estimators=300, learning_rate=0.2)
|
||||
svm_mod = SVC()
|
||||
|
||||
# Fit each of the 4 models
|
||||
# This might take some time to run
|
||||
naive_bayes.fit(training_data, y_train)
|
||||
bag_mod.fit(training_data, y_train)
|
||||
rf_mod.fit(training_data, y_train)
|
||||
ada_mod.fit(training_data, y_train)
|
||||
svm_mod.fit(training_data, y_train)
|
||||
|
||||
|
||||
# Make predictions using each of your models
|
||||
nb = naive_bayes.predict(testing_data)
|
||||
bag_pred = bag_mod.predict(testing_data)
|
||||
rf_pred = rf_mod.predict(testing_data)
|
||||
ada_pred = ada_mod.predict(testing_data)
|
||||
svm_pred = svm_mod.predict(testing_data)
|
||||
|
||||
|
||||
# accuracy is the total correct divided by the total to predict
|
||||
def accuracy(actual, preds):
|
||||
'''
|
||||
INPUT
|
||||
preds - predictions as a numpy array or pandas series
|
||||
actual - actual values as a numpy array or pandas series
|
||||
|
||||
OUTPUT:
|
||||
returns the accuracy as a float
|
||||
'''
|
||||
return np.sum(preds == actual) / len(actual)
|
||||
|
||||
|
||||
print(accuracy(y_test, nb))
|
||||
print(accuracy_score(y_test, nb))
|
||||
print("Since these match, we correctly calculated our metric!")
|
||||
|
||||
|
||||
# precision is the true positives over the predicted positive values
|
||||
def precision(actual, preds):
|
||||
'''
|
||||
INPUT
|
||||
(assumes positive = 1 and negative = 0)
|
||||
preds - predictions as a numpy array or pandas series
|
||||
actual - actual values as a numpy array or pandas series
|
||||
|
||||
OUTPUT:
|
||||
returns the precision as a float
|
||||
'''
|
||||
TP = np.sum((preds == actual) & (preds > 0))
|
||||
FP = np.sum((preds == 1) & (actual == 0))
|
||||
return TP / (TP + FP)
|
||||
|
||||
|
||||
print(precision(y_test, nb))
|
||||
print(precision_score(y_test, nb))
|
||||
print("If the above match, you got it!")
|
||||
|
||||
|
||||
# recall is true positives over all actual positive values
|
||||
def recall(actual, preds):
|
||||
'''
|
||||
INPUT
|
||||
preds - predictions as a numpy array or pandas series
|
||||
actual - actual values as a numpy array or pandas series
|
||||
|
||||
OUTPUT:
|
||||
returns the recall as a float
|
||||
'''
|
||||
TP = np.sum((preds == actual) & (preds > 0))
|
||||
FN = np.sum((preds == 0) & (actual == 1))
|
||||
return TP / (TP + FN)
|
||||
|
||||
|
||||
print(recall(y_test, nb))
|
||||
print(recall_score(y_test, nb))
|
||||
print("If the above match, you got it!")
|
||||
|
||||
|
||||
# f1_score is 2*(precision*recall)/(precision+recall))
|
||||
def f1(actual, preds):
|
||||
'''
|
||||
INPUT
|
||||
preds - predictions as a numpy array or pandas series
|
||||
actual - actual values as a numpy array or pandas series
|
||||
|
||||
OUTPUT:
|
||||
returns the f1score as a float
|
||||
'''
|
||||
prec = precision(actual, preds)
|
||||
rec = recall(actual, preds)
|
||||
return 2 * ((prec * rec) / (prec + rec))
|
||||
|
||||
|
||||
print(f1(y_test, nb))
|
||||
print(f1_score(y_test, nb))
|
||||
print("If the above match, you got it!")
|
||||
|
||||
|
||||
# add the letter of the most appropriate metric to each statement
|
||||
# in the dictionary
|
||||
a = "recall"
|
||||
b = "precision"
|
||||
c = "accuracy"
|
||||
d = 'f1-score'
|
||||
|
||||
|
||||
seven_sol = {
|
||||
'We have imbalanced classes, which metric do we definitely not want to'
|
||||
' use?': c,
|
||||
'We really want to make sure the positive cases are all caught even if'
|
||||
' that means we identify some negatives as positives': a,
|
||||
'When we identify something as positive, we want to be sure it is truly'
|
||||
' positive': b,
|
||||
'We care equally about identifying positive and negative cases': d
|
||||
}
|
||||
|
||||
# This gives: That's right! It isn't really necessary to memorize these in
|
||||
# practice, but it is important to know they exist and know why might use one
|
||||
# metric over another for a particular situation.
|
||||
|
||||
|
||||
models = {'nb': nb,
|
||||
'bag_pred': bag_pred,
|
||||
'rf_pred': rf_pred,
|
||||
'ada_pred': ada_pred,
|
||||
'svm_pred': svm_pred}
|
||||
metrics = [accuracy_score, precision_score, recall_score, f1_score]
|
||||
|
||||
for i in models:
|
||||
for j in range(len(metrics)):
|
||||
print(f'{metrics[j].__name__} for '
|
||||
f'{i} {metrics[j](y_test, models[i]):.4f}')
|
||||
print()
|
||||
|
||||
|
||||
beta = 1
|
||||
|
||||
print(f1_score(y_test, nb))
|
||||
print(fbeta_score(y_test, nb, beta))
|
||||
|
||||
for i in models:
|
||||
print(f'fbeta_score for {i} {fbeta_score(y_test, models[i], beta)}')
|
||||
print(f'f1_score for {i} {f1_score(y_test, models[i], beta)}')
|
||||
print()
|
||||
|
||||
|
||||
# Function for calculating auc and roc
|
||||
|
||||
def build_roc_auc(model, X_train, X_test, y_train, y_test):
|
||||
'''
|
||||
INPUT:
|
||||
model - an sklearn instantiated model
|
||||
X_train - the training data
|
||||
y_train - the training response values (must be categorical)
|
||||
X_test - the test data
|
||||
y_test - the test response values (must be categorical)
|
||||
OUTPUT:
|
||||
auc - returns auc as a float
|
||||
prints the roc curve
|
||||
'''
|
||||
y_preds = model.fit(X_train, y_train).predict_proba(X_test)
|
||||
# Compute ROC curve and ROC area for each class
|
||||
fpr = dict()
|
||||
tpr = dict()
|
||||
roc_auc = dict()
|
||||
for i in range(len(y_test)):
|
||||
fpr[i], tpr[i], _ = roc_curve(y_test, y_preds[:, 1])
|
||||
roc_auc[i] = auc(fpr[i], tpr[i])
|
||||
|
||||
# Compute micro-average ROC curve and ROC area
|
||||
fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(),
|
||||
y_preds[:, 1].ravel())
|
||||
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
|
||||
|
||||
plt.plot(fpr[2], tpr[2], color='darkorange',
|
||||
lw=2, label='ROC curve (area = %0.2f)' % roc_auc[2])
|
||||
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
|
||||
plt.xlim([0.0, 1.0])
|
||||
plt.ylim([0.0, 1.05])
|
||||
plt.xlabel('False Positive Rate')
|
||||
plt.ylabel('True Positive Rate')
|
||||
plt.title('Receiver operating characteristic example')
|
||||
plt.show()
|
||||
|
||||
return roc_auc_score(y_test, np.round(y_preds[:, 1]))
|
||||
|
||||
|
||||
instaniated_models = [naive_bayes, bag_mod, rf_mod]
|
||||
|
||||
for i in instaniated_models:
|
||||
build_roc_auc(i, training_data, testing_data, y_train, y_test)
|
||||
|
||||
print(build_roc_auc(instaniated_models[0], training_data, testing_data,
|
||||
y_train, y_test))
|
||||
@@ -0,0 +1,96 @@
|
||||
0.24539,0.81725,0
|
||||
0.21774,0.76462,0
|
||||
0.20161,0.69737,0
|
||||
0.20161,0.58041,0
|
||||
0.2477,0.49561,0
|
||||
0.32834,0.44883,0
|
||||
0.39516,0.48099,0
|
||||
0.39286,0.57164,0
|
||||
0.33525,0.62135,0
|
||||
0.33986,0.71199,0
|
||||
0.34447,0.81433,0
|
||||
0.28226,0.82602,0
|
||||
0.26613,0.75,0
|
||||
0.26613,0.63596,0
|
||||
0.32604,0.54825,0
|
||||
0.28917,0.65643,0
|
||||
0.80069,0.71491,0
|
||||
0.80069,0.64181,0
|
||||
0.80069,0.50146,0
|
||||
0.79839,0.36988,0
|
||||
0.73157,0.25,0
|
||||
0.63249,0.18275,0
|
||||
0.60023,0.27047,0
|
||||
0.66014,0.34649,0
|
||||
0.70161,0.42251,0
|
||||
0.70853,0.53947,0
|
||||
0.71544,0.63304,0
|
||||
0.74309,0.72076,0
|
||||
0.75,0.63596,0
|
||||
0.75,0.46345,0
|
||||
0.72235,0.35526,0
|
||||
0.66935,0.28509,0
|
||||
0.20622,0.94298,1
|
||||
0.26613,0.8962,1
|
||||
0.38134,0.8962,1
|
||||
0.42051,0.94591,1
|
||||
0.49885,0.86404,1
|
||||
0.31452,0.93421,1
|
||||
0.53111,0.72076,1
|
||||
0.45276,0.74415,1
|
||||
0.53571,0.6038,1
|
||||
0.60484,0.71491,1
|
||||
0.60945,0.58333,1
|
||||
0.51267,0.47807,1
|
||||
0.50806,0.59211,1
|
||||
0.46198,0.30556,1
|
||||
0.5288,0.41082,1
|
||||
0.38594,0.35819,1
|
||||
0.31682,0.31433,1
|
||||
0.29608,0.20906,1
|
||||
0.36982,0.27632,1
|
||||
0.42972,0.18275,1
|
||||
0.51498,0.10965,1
|
||||
0.53111,0.20906,1
|
||||
0.59793,0.095029,1
|
||||
0.73848,0.086257,1
|
||||
0.83065,0.18275,1
|
||||
0.8629,0.10965,1
|
||||
0.88364,0.27924,1
|
||||
0.93433,0.30848,1
|
||||
0.93433,0.19444,1
|
||||
0.92512,0.43421,1
|
||||
0.87903,0.43421,1
|
||||
0.87903,0.58626,1
|
||||
0.9182,0.71491,1
|
||||
0.85138,0.8348,1
|
||||
0.85599,0.94006,1
|
||||
0.70853,0.94298,1
|
||||
0.70853,0.87281,1
|
||||
0.59793,0.93129,1
|
||||
0.61175,0.83187,1
|
||||
0.78226,0.82895,1
|
||||
0.78917,0.8962,1
|
||||
0.90668,0.89912,1
|
||||
0.14862,0.92251,1
|
||||
0.15092,0.85819,1
|
||||
0.097926,0.85819,1
|
||||
0.079493,0.91374,1
|
||||
0.079493,0.77632,1
|
||||
0.10945,0.79678,1
|
||||
0.12327,0.67982,1
|
||||
0.077189,0.6886,1
|
||||
0.081797,0.58626,1
|
||||
0.14862,0.58041,1
|
||||
0.14862,0.5307,1
|
||||
0.14171,0.41959,1
|
||||
0.08871,0.49269,1
|
||||
0.095622,0.36696,1
|
||||
0.24539,0.3962,1
|
||||
0.1947,0.29678,1
|
||||
0.16935,0.22368,1
|
||||
0.15553,0.13596,1
|
||||
0.23848,0.12427,1
|
||||
0.33065,0.12427,1
|
||||
0.095622,0.2617,1
|
||||
0.091014,0.20322,1
|
||||
|
@@ -0,0 +1,35 @@
|
||||
# Import statements
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from sklearn.metrics import accuracy_score
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
# Import the train test split
|
||||
# http://scikit-learn.org/0.16/modules/generated/sklearn.cross_validation.train_test_split.html
|
||||
|
||||
|
||||
# Read in the data.
|
||||
data = np.asarray(pd.read_csv('data.csv', header=None))
|
||||
# Assign the features to the variable X, and the labels to the variable y.
|
||||
X = data[:, 0:2]
|
||||
y = data[:, 2]
|
||||
|
||||
# Use train test split to split your data
|
||||
# Use a test size of 25% and a random state of 42
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
|
||||
random_state=42)
|
||||
|
||||
# Instantiate your decision tree model
|
||||
model = DecisionTreeClassifier()
|
||||
|
||||
# TODO: Fit the model to the training data.
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
# TODO: Make predictions on the test data
|
||||
y_pred = model.predict(X_test)
|
||||
|
||||
# TODO: Calculate the accuracy and assign it to the variable acc on the test
|
||||
# data.
|
||||
acc = accuracy_score(y_test, y_pred)
|
||||
print(acc)
|
||||
@@ -0,0 +1,98 @@
|
||||
def test_one(mod_arg):
|
||||
'''
|
||||
INPUT:
|
||||
mod_arg - a set of the strings pertaining to the objects that were passed in the fitting of our models
|
||||
|
||||
OUTPUT:
|
||||
prints correctness of the set
|
||||
nothing returned
|
||||
'''
|
||||
a = 'X_train'
|
||||
b = 'X_test'
|
||||
c = 'y_train'
|
||||
d = 'y_test'
|
||||
e = 'training_data'
|
||||
f = 'testing_data'
|
||||
if mod_arg == {c, e}:
|
||||
print("That's right! You need to fit on both parts of the data pertaining to training data!")
|
||||
else:
|
||||
print("Oops! That doesn't look quite right! Remember you only want to fit your model to the training data! Notice that X_train hasn't had the data cleaned yet, so that won't work to pass to our fit method. Hint - there are two items you should be passing to your fit method.")
|
||||
|
||||
|
||||
def test_two(mod_arg):
|
||||
'''
|
||||
INPUT:
|
||||
model_arg - a set of the strings pertaining to the objects that were passed in the predicting step
|
||||
|
||||
OUTPUT:
|
||||
prints correctness of the set
|
||||
nothing returned
|
||||
'''
|
||||
a = 'X_train'
|
||||
b = 'X_test'
|
||||
c = 'y_train'
|
||||
d = 'y_test'
|
||||
e = 'training_data'
|
||||
f = 'testing_data'
|
||||
if mod_arg == {f}:
|
||||
print("That's right! To see how well our models perform in a new setting, you will want to predict on the test set of data.")
|
||||
else:
|
||||
print("Oops! That doesn't look quite right! Remember you will want to predict on test data to know how well your model will do in a new situation. Hint - there is only one item that should be passed to the predict method of your model. Also notice that X_test has not been cleaned yet, so this cannot be passed to the predict method!")
|
||||
|
||||
|
||||
def sol_seven(seven_sol):
|
||||
'''
|
||||
INPUT: dictionary with correct matching of metrics
|
||||
OUTPUT: nothing returned - prints statement related to correctness of dictionary
|
||||
'''
|
||||
|
||||
a = "recall"
|
||||
b = "precision"
|
||||
c = "accuracy"
|
||||
d = 'f1-score'
|
||||
|
||||
seven_sol_1 = {
|
||||
'We have imbalanced classes, which metric do we definitely not want to use?': c,
|
||||
'We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives': a, 'When we identify something as positive, we want to be sure it is truly positive': b,
|
||||
'We care equally about identifying positive and negative cases': d
|
||||
}
|
||||
|
||||
if seven_sol == seven_sol_1:
|
||||
print("That's right! It isn't really necessary to memorize these in practice, but it is important to know they exist and know why might use one metric over another for a particular situation.")
|
||||
|
||||
if seven_sol['We have imbalanced classes, which metric do we definitely not want to use?'] != seven_sol_1['We have imbalanced classes, which metric do we definitely not want to use?']:
|
||||
print("Oops! The first one isn't right. If we do not have balanced classes, we probably want to stay away from using accuracy.")
|
||||
|
||||
if seven_sol['We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives'] != seven_sol_1['We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives']:
|
||||
print("Oops! The second one isn't right. If we really want to be sure about catching positive cases, we should be closely watching recall, which has all of the positive clases in the denominator - so we are monitoring how many of them we get right with recall.")
|
||||
|
||||
if seven_sol['When we identify something as positive, we want to be sure it is truly positive'] != seven_sol_1['When we identify something as positive, we want to be sure it is truly positive']:
|
||||
print("Oops! The third one isn't right. Using precision, we have the predicted positives in the denominator. Therefore, this will help us be sure the items we identify as positive are actually positive.")
|
||||
|
||||
if seven_sol['We care equally about identifying positive and negative cases'] != seven_sol_1['We care equally about identifying positive and negative cases']:
|
||||
print("Oops! The last one isn't right. If we care equally about precision and recall, we should use f1 score.")
|
||||
|
||||
|
||||
def sol_eight(eight_sol):
|
||||
'''
|
||||
INPUT: dictionary with correct matching of metrics
|
||||
OUTPUT: nothing returned - prints statement related to correctness of dictionary
|
||||
'''
|
||||
a = "naive-bayes"
|
||||
b = "bagging"
|
||||
c = "random-forest"
|
||||
d = 'ada-boost'
|
||||
e = "svm"
|
||||
|
||||
eight_sol_1 = {
|
||||
'We have imbalanced classes, which metric do we definitely not want to use?': a,
|
||||
'We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives': a,
|
||||
'When we identify something as positive, we want to be sure it is truly positive': c,
|
||||
'We care equally about identifying positive and negative cases': a
|
||||
}
|
||||
|
||||
if eight_sol_1 == eight_sol:
|
||||
print("That's right! Naive Bayes was the best model for all of our metrics except precision!")
|
||||
|
||||
else:
|
||||
print("Oops! That doesn't look right. Make sure you are performing your predictions and matching on the test data. Hint: The naive bayes model actually performs best on all of the metrics except one. Try again!")
|
||||
@@ -0,0 +1,487 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Our Mission\n",
|
||||
"\n",
|
||||
"In this lesson you gained some insight into a number of techniques used to understand how well our model is performing. This notebook is aimed at giving you some practice with the metrics specifically related to classification problems. With that in mind, we will again be looking at the spam dataset from the earlier lessons.\n",
|
||||
"\n",
|
||||
"First, run the cell below to prepare the data and instantiate a number of different models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Import our libraries\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.feature_extraction.text import CountVectorizer\n",
|
||||
"from sklearn.naive_bayes import MultinomialNB\n",
|
||||
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n",
|
||||
"from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier\n",
|
||||
"from sklearn.svm import SVC\n",
|
||||
"import tests as t\n",
|
||||
"\n",
|
||||
"# Read in our dataset\n",
|
||||
"df = pd.read_table('smsspamcollection/SMSSpamCollection',\n",
|
||||
" sep='\\t', \n",
|
||||
" header=None, \n",
|
||||
" names=['label', 'sms_message'])\n",
|
||||
"\n",
|
||||
"# Fix our response value\n",
|
||||
"df['label'] = df.label.map({'ham':0, 'spam':1})\n",
|
||||
"\n",
|
||||
"# Split our dataset into training and testing data\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(df['sms_message'], \n",
|
||||
" df['label'], \n",
|
||||
" random_state=1)\n",
|
||||
"\n",
|
||||
"# Instantiate the CountVectorizer method\n",
|
||||
"count_vector = CountVectorizer()\n",
|
||||
"\n",
|
||||
"# Fit the training data and then return the matrix\n",
|
||||
"training_data = count_vector.fit_transform(X_train)\n",
|
||||
"\n",
|
||||
"# Transform testing data and return the matrix. Note we are not fitting the testing data into the CountVectorizer()\n",
|
||||
"testing_data = count_vector.transform(X_test)\n",
|
||||
"\n",
|
||||
"# Instantiate a number of our models\n",
|
||||
"naive_bayes = MultinomialNB()\n",
|
||||
"bag_mod = BaggingClassifier(n_estimators=200)\n",
|
||||
"rf_mod = RandomForestClassifier(n_estimators=200)\n",
|
||||
"ada_mod = AdaBoostClassifier(n_estimators=300, learning_rate=0.2)\n",
|
||||
"svm_mod = SVC()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 1**: Now, fit each of the above models to the appropriate data. Answer the following question to assure that you fit the models correctly."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Fit each of the 4 models\n",
|
||||
"# This might take some time to run\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The models you fit above were fit on which data?\n",
|
||||
"\n",
|
||||
"a = 'X_train'\n",
|
||||
"b = 'X_test'\n",
|
||||
"c = 'y_train'\n",
|
||||
"d = 'y_test'\n",
|
||||
"e = 'training_data'\n",
|
||||
"f = 'testing_data'\n",
|
||||
"\n",
|
||||
"# Change models_fit_on to only contain the correct string names\n",
|
||||
"# of values that you oassed to the above models\n",
|
||||
"\n",
|
||||
"models_fit_on = {a, b, c, d, e, f} # update this to only contain correct letters\n",
|
||||
"\n",
|
||||
"# Checks your solution - don't change this\n",
|
||||
"t.test_one(models_fit_on)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 2**: Now make predictions for each of your models on the data that will allow you to understand how well our model will extend to new data. Then correctly add the strings to the set in the following cell."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Make predictions using each of your models\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Which data was used in the predict method to see how well your\n",
|
||||
"# model would work on new data?\n",
|
||||
"\n",
|
||||
"a = 'X_train'\n",
|
||||
"b = 'X_test'\n",
|
||||
"c = 'y_train'\n",
|
||||
"d = 'y_test'\n",
|
||||
"e = 'training_data'\n",
|
||||
"f = 'testing_data'\n",
|
||||
"\n",
|
||||
"# Change models_predict_on to only contain the correct string names\n",
|
||||
"# of values that you oassed to the above models\n",
|
||||
"\n",
|
||||
"models_predict_on = {a, b, c, d, e, f} # update this to only contain correct letters\n",
|
||||
"\n",
|
||||
"# Checks your solution - don't change this\n",
|
||||
"t.test_two(models_predict_on)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now that you have set up all your predictions, let's get to topics addressed in this lesson - measuring how well each of your models performed. First, we will focus on how each metric was calculated for a single model, and then in the final part of this notebook, you will choose models that are best based on a particular metric.\n",
|
||||
"\n",
|
||||
"You will be writing functions to calculate a number of metrics and then comparing the values to what you get from sklearn. This will help you build intuition for how each metric is calculated.\n",
|
||||
"\n",
|
||||
"> **Step 3**: As an example of how this will work for the upcoming questions, run the cell below. Fill in the below function to calculate accuracy, and then compare your answer to the built in to assure you are correct."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# accuracy is the total correct divided by the total to predict\n",
|
||||
"def accuracy(actual, preds):\n",
|
||||
" '''\n",
|
||||
" INPUT\n",
|
||||
" preds - predictions as a numpy array or pandas series\n",
|
||||
" actual - actual values as a numpy array or pandas series\n",
|
||||
" \n",
|
||||
" OUTPUT:\n",
|
||||
" returns the accuracy as a float\n",
|
||||
" '''\n",
|
||||
" return np.sum(preds == actual)/len(actual)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(accuracy(y_test, preds_nb))\n",
|
||||
"print(accuracy_score(y_test, preds_nb))\n",
|
||||
"print(\"Since these match, we correctly calculated our metric!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 4**: Fill in the below function to calculate precision, and then compare your answer to the built in to assure you are correct."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# precision is the true positives over the predicted positive values\n",
|
||||
"def precision(actual, preds):\n",
|
||||
" '''\n",
|
||||
" INPUT\n",
|
||||
" (assumes positive = 1 and negative = 0)\n",
|
||||
" preds - predictions as a numpy array or pandas series \n",
|
||||
" actual - actual values as a numpy array or pandas series\n",
|
||||
" \n",
|
||||
" OUTPUT:\n",
|
||||
" returns the precision as a float\n",
|
||||
" '''\n",
|
||||
" \n",
|
||||
" return None # calculate precision here\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(precision(y_test, preds_nb))\n",
|
||||
"print(precision_score(y_test, preds_nb))\n",
|
||||
"print(\"If the above match, you got it!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 5**: Fill in the below function to calculate recall, and then compare your answer to the built in to assure you are correct."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# recall is true positives over all actual positive values\n",
|
||||
"def recall(actual, preds):\n",
|
||||
" '''\n",
|
||||
" INPUT\n",
|
||||
" preds - predictions as a numpy array or pandas series\n",
|
||||
" actual - actual values as a numpy array or pandas series\n",
|
||||
" \n",
|
||||
" OUTPUT:\n",
|
||||
" returns the recall as a float\n",
|
||||
" '''\n",
|
||||
"\n",
|
||||
" return None # calculate recall here\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(recall(y_test, preds_nb))\n",
|
||||
"print(recall_score(y_test, preds_nb))\n",
|
||||
"print(\"If the above match, you got it!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 6**: Fill in the below function to calculate f1-score, and then compare your answer to the built in to assure you are correct."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# f1_score is 2*(precision*recall)/(precision+recall))\n",
|
||||
"def f1(preds, actual):\n",
|
||||
" '''\n",
|
||||
" INPUT\n",
|
||||
" preds - predictions as a numpy array or pandas series\n",
|
||||
" actual - actual values as a numpy array or pandas series\n",
|
||||
" \n",
|
||||
" OUTPUT:\n",
|
||||
" returns the f1score as a float\n",
|
||||
" '''\n",
|
||||
" \n",
|
||||
" return None # calculate f1-score here\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(f1(y_test, preds_nb))\n",
|
||||
"print(f1_score(y_test, preds_nb))\n",
|
||||
"print(\"If the above match, you got it!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 7:** Now that you have calculated a number of different metrics, let's tie that to when we might use one versus another. Use the dictionary below to match a metric to each statement that identifies when you would want to use that metric."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# add the letter of the most appropriate metric to each statement\n",
|
||||
"# in the dictionary\n",
|
||||
"a = \"recall\"\n",
|
||||
"b = \"precision\"\n",
|
||||
"c = \"accuracy\"\n",
|
||||
"d = 'f1-score'\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"seven_sol = {\n",
|
||||
"'We have imbalanced classes, which metric do we definitely not want to use?': None # letter here,\n",
|
||||
"'We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives': None # letter here, \n",
|
||||
"'When we identify something as positive, we want to be sure it is truly positive': None # letter here, \n",
|
||||
"'We care equally about identifying positive and negative cases': None # letter here \n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"t.sol_seven(seven_sol)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 8:** Given what you know about the metrics now, use this information to correctly match the appropriate model to when it would be best to use each in the dictionary below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# use the answers you found to the previous questiona, then match the model that did best for each metric\n",
|
||||
"a = \"naive-bayes\"\n",
|
||||
"b = \"bagging\"\n",
|
||||
"c = \"random-forest\"\n",
|
||||
"d = 'ada-boost'\n",
|
||||
"e = \"svm\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"eight_sol = {\n",
|
||||
"'We have imbalanced classes, which metric do we definitely not want to use?': None # letter here,\n",
|
||||
"'We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives': None # letter here, \n",
|
||||
"'When we identify something as positive, we want to be sure it is truly positive': None # letter here, \n",
|
||||
"'We care equally about identifying positive and negative cases': None # letter here \n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"t.sol_eight(eight_sol)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# cells for work"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# If you get stuck, also notice there is a solution available by hitting the orange button in the top left"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As a final step in this workbook, let's take a look at the last three metrics you saw, f-beta scores, ROC curves, and AUC.\n",
|
||||
"\n",
|
||||
"**For f-beta scores:** If you decide that you care more about precision, you should move beta closer to 0. If you decide you care more about recall, you should move beta towards infinity. \n",
|
||||
"\n",
|
||||
"> **Step 9:** Using the fbeta_score works similar to most of the other metrics in sklearn, but you also need to set beta as your weighting between precision and recall. Use the space below to show that you can use [fbeta in sklearn](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.fbeta_score.html) to replicate your f1-score from above. If in the future you want to use a different weighting, [this article](http://mlwiki.org/index.php/Precision_and_Recall) does an amazing job of explaining how you might adjust beta for different situations."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import fbeta_score\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Show that you can produce the same f1_score results using fbeta_score\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 10:** Building ROC curves in python is a pretty involved process on your own. I wrote the function below to assist with the process and make it easier for you to do so in the future as well. Try it out using one of the other classifiers you created above to see how it compares to the random forest model below.\n",
|
||||
"\n",
|
||||
"Run the cell below to build a ROC curve, and retrieve the AUC for the random forest model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Function for calculating auc and roc\n",
|
||||
"\n",
|
||||
"def build_roc_auc(model, X_train, X_test, y_train, y_test):\n",
|
||||
" '''\n",
|
||||
" INPUT:\n",
|
||||
" model - an sklearn instantiated model\n",
|
||||
" X_train - the training data\n",
|
||||
" y_train - the training response values (must be categorical)\n",
|
||||
" X_test - the test data\n",
|
||||
" y_test - the test response values (must be categorical)\n",
|
||||
" OUTPUT:\n",
|
||||
" auc - returns auc as a float\n",
|
||||
" prints the roc curve\n",
|
||||
" '''\n",
|
||||
" import numpy as np\n",
|
||||
" import matplotlib.pyplot as plt\n",
|
||||
" from itertools import cycle\n",
|
||||
" from sklearn.metrics import roc_curve, auc, roc_auc_score\n",
|
||||
" from scipy import interp\n",
|
||||
" \n",
|
||||
" y_preds = model.fit(X_train, y_train).predict_proba(X_test)\n",
|
||||
" # Compute ROC curve and ROC area for each class\n",
|
||||
" fpr = dict()\n",
|
||||
" tpr = dict()\n",
|
||||
" roc_auc = dict()\n",
|
||||
" for i in range(len(y_test)):\n",
|
||||
" fpr[i], tpr[i], _ = roc_curve(y_test, y_preds[:, 1])\n",
|
||||
" roc_auc[i] = auc(fpr[i], tpr[i])\n",
|
||||
"\n",
|
||||
" # Compute micro-average ROC curve and ROC area\n",
|
||||
" fpr[\"micro\"], tpr[\"micro\"], _ = roc_curve(y_test.ravel(), y_preds[:, 1].ravel())\n",
|
||||
" roc_auc[\"micro\"] = auc(fpr[\"micro\"], tpr[\"micro\"])\n",
|
||||
" \n",
|
||||
" plt.plot(fpr[2], tpr[2], color='darkorange',\n",
|
||||
" lw=2, label='ROC curve (area = %0.2f)' % roc_auc[2])\n",
|
||||
" plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')\n",
|
||||
" plt.xlim([0.0, 1.0])\n",
|
||||
" plt.ylim([0.0, 1.05])\n",
|
||||
" plt.xlabel('False Positive Rate')\n",
|
||||
" plt.ylabel('True Positive Rate')\n",
|
||||
" plt.title('Receiver operating characteristic example')\n",
|
||||
" plt.show()\n",
|
||||
" \n",
|
||||
" return roc_auc_score(y_test, np.round(y_preds[:, 1]))\n",
|
||||
" \n",
|
||||
" \n",
|
||||
"# Finding roc and auc for the random forest model \n",
|
||||
"build_roc_auc(rf_mod, training_data, testing_data, y_train, y_test) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Your turn here - choose another classifier to see how it compares\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,239 @@
|
||||
# Import our libraries
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
from sklearn.naive_bayes import MultinomialNB
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score
|
||||
from sklearn.metrics import f1_score, fbeta_score
|
||||
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
|
||||
from sklearn.ensemble import AdaBoostClassifier
|
||||
from sklearn.svm import SVC
|
||||
import matplotlib.pyplot as plt
|
||||
from itertools import cycle
|
||||
from sklearn.metrics import roc_curve, auc, roc_auc_score
|
||||
from scipy import interp
|
||||
# import tests as t
|
||||
|
||||
# Read in our dataset
|
||||
df = pd.read_csv('smsspamcollection_SMSSpamCollection',
|
||||
header=None,
|
||||
names=['label', 'sms_message'])
|
||||
|
||||
# Fix our response value
|
||||
df['label'] = df.label.map({'ham': 0, 'spam': 1})
|
||||
|
||||
# Split our dataset into training and testing data
|
||||
X_train, X_test, y_train, y_test = train_test_split(df['sms_message'],
|
||||
df['label'],
|
||||
random_state=1)
|
||||
|
||||
# Instantiate the CountVectorizer method
|
||||
count_vector = CountVectorizer()
|
||||
|
||||
# Fit the training data and then return the matrix
|
||||
training_data = count_vector.fit_transform(X_train)
|
||||
|
||||
# Transform testing data and return the matrix. Note we are not fitting the
|
||||
# testing data into the CountVectorizer()
|
||||
testing_data = count_vector.transform(X_test)
|
||||
|
||||
# Instantiate a number of our models
|
||||
naive_bayes = MultinomialNB()
|
||||
bag_mod = BaggingClassifier(n_estimators=200)
|
||||
rf_mod = RandomForestClassifier(n_estimators=200)
|
||||
ada_mod = AdaBoostClassifier(n_estimators=300, learning_rate=0.2)
|
||||
svm_mod = SVC()
|
||||
|
||||
# Fit each of the 4 models
|
||||
# This might take some time to run
|
||||
naive_bayes.fit(training_data, y_train)
|
||||
bag_mod.fit(training_data, y_train)
|
||||
rf_mod.fit(training_data, y_train)
|
||||
ada_mod.fit(training_data, y_train)
|
||||
svm_mod.fit(training_data, y_train)
|
||||
|
||||
|
||||
# Make predictions using each of your models
|
||||
nb = naive_bayes.predict(testing_data)
|
||||
bag_pred = bag_mod.predict(testing_data)
|
||||
rf_pred = rf_mod.predict(testing_data)
|
||||
ada_pred = ada_mod.predict(testing_data)
|
||||
svm_pred = svm_mod.predict(testing_data)
|
||||
|
||||
|
||||
# accuracy is the total correct divided by the total to predict
|
||||
def accuracy(actual, preds):
|
||||
'''
|
||||
INPUT
|
||||
preds - predictions as a numpy array or pandas series
|
||||
actual - actual values as a numpy array or pandas series
|
||||
|
||||
OUTPUT:
|
||||
returns the accuracy as a float
|
||||
'''
|
||||
return np.sum(preds == actual) / len(actual)
|
||||
|
||||
|
||||
print(accuracy(y_test, nb))
|
||||
print(accuracy_score(y_test, nb))
|
||||
print("Since these match, we correctly calculated our metric!")
|
||||
|
||||
|
||||
# precision is the true positives over the predicted positive values
|
||||
def precision(actual, preds):
|
||||
'''
|
||||
INPUT
|
||||
(assumes positive = 1 and negative = 0)
|
||||
preds - predictions as a numpy array or pandas series
|
||||
actual - actual values as a numpy array or pandas series
|
||||
|
||||
OUTPUT:
|
||||
returns the precision as a float
|
||||
'''
|
||||
TP = np.sum((preds == actual) & (preds > 0))
|
||||
FP = np.sum((preds == 1) & (actual == 0))
|
||||
return TP / (TP + FP)
|
||||
|
||||
|
||||
print(precision(y_test, nb))
|
||||
print(precision_score(y_test, nb))
|
||||
print("If the above match, you got it!")
|
||||
|
||||
|
||||
# recall is true positives over all actual positive values
|
||||
def recall(actual, preds):
|
||||
'''
|
||||
INPUT
|
||||
preds - predictions as a numpy array or pandas series
|
||||
actual - actual values as a numpy array or pandas series
|
||||
|
||||
OUTPUT:
|
||||
returns the recall as a float
|
||||
'''
|
||||
TP = np.sum((preds == actual) & (preds > 0))
|
||||
FN = np.sum((preds == 0) & (actual == 1))
|
||||
return TP / (TP + FN)
|
||||
|
||||
|
||||
print(recall(y_test, nb))
|
||||
print(recall_score(y_test, nb))
|
||||
print("If the above match, you got it!")
|
||||
|
||||
|
||||
# f1_score is 2*(precision*recall)/(precision+recall))
|
||||
def f1(actual, preds):
|
||||
'''
|
||||
INPUT
|
||||
preds - predictions as a numpy array or pandas series
|
||||
actual - actual values as a numpy array or pandas series
|
||||
|
||||
OUTPUT:
|
||||
returns the f1score as a float
|
||||
'''
|
||||
prec = precision(actual, preds)
|
||||
rec = recall(actual, preds)
|
||||
return 2 * ((prec * rec) / (prec + rec))
|
||||
|
||||
|
||||
print(f1(y_test, nb))
|
||||
print(f1_score(y_test, nb))
|
||||
print("If the above match, you got it!")
|
||||
|
||||
|
||||
# add the letter of the most appropriate metric to each statement
|
||||
# in the dictionary
|
||||
a = "recall"
|
||||
b = "precision"
|
||||
c = "accuracy"
|
||||
d = 'f1-score'
|
||||
|
||||
|
||||
seven_sol = {
|
||||
'We have imbalanced classes, which metric do we definitely not want to'
|
||||
' use?': c,
|
||||
'We really want to make sure the positive cases are all caught even if'
|
||||
' that means we identify some negatives as positives': a,
|
||||
'When we identify something as positive, we want to be sure it is truly'
|
||||
' positive': b,
|
||||
'We care equally about identifying positive and negative cases': d
|
||||
}
|
||||
|
||||
# This gives: That's right! It isn't really necessary to memorize these in
|
||||
# practice, but it is important to know they exist and know why might use one
|
||||
# metric over another for a particular situation.
|
||||
|
||||
|
||||
models = {'nb': nb,
|
||||
'bag_pred': bag_pred,
|
||||
'rf_pred': rf_pred,
|
||||
'ada_pred': ada_pred,
|
||||
'svm_pred': svm_pred}
|
||||
metrics = [accuracy_score, precision_score, recall_score, f1_score]
|
||||
|
||||
for i in models:
|
||||
for j in range(len(metrics)):
|
||||
print(f'{metrics[j].__name__} for '
|
||||
f'{i} {metrics[j](y_test, models[i]):.4f}')
|
||||
print()
|
||||
|
||||
|
||||
beta = 1
|
||||
|
||||
print(f1_score(y_test, nb))
|
||||
print(fbeta_score(y_test, nb, beta))
|
||||
|
||||
for i in models:
|
||||
print(f'fbeta_score for {i} {fbeta_score(y_test, models[i], beta)}')
|
||||
print(f'f1_score for {i} {f1_score(y_test, models[i], beta)}')
|
||||
print()
|
||||
|
||||
|
||||
# Function for calculating auc and roc
|
||||
|
||||
def build_roc_auc(model, X_train, X_test, y_train, y_test):
|
||||
'''
|
||||
INPUT:
|
||||
model - an sklearn instantiated model
|
||||
X_train - the training data
|
||||
y_train - the training response values (must be categorical)
|
||||
X_test - the test data
|
||||
y_test - the test response values (must be categorical)
|
||||
OUTPUT:
|
||||
auc - returns auc as a float
|
||||
prints the roc curve
|
||||
'''
|
||||
y_preds = model.fit(X_train, y_train).predict_proba(X_test)
|
||||
# Compute ROC curve and ROC area for each class
|
||||
fpr = dict()
|
||||
tpr = dict()
|
||||
roc_auc = dict()
|
||||
for i in range(len(y_test)):
|
||||
fpr[i], tpr[i], _ = roc_curve(y_test, y_preds[:, 1])
|
||||
roc_auc[i] = auc(fpr[i], tpr[i])
|
||||
|
||||
# Compute micro-average ROC curve and ROC area
|
||||
fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(),
|
||||
y_preds[:, 1].ravel())
|
||||
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
|
||||
|
||||
plt.plot(fpr[2], tpr[2], color='darkorange',
|
||||
lw=2, label='ROC curve (area = %0.2f)' % roc_auc[2])
|
||||
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
|
||||
plt.xlim([0.0, 1.0])
|
||||
plt.ylim([0.0, 1.05])
|
||||
plt.xlabel('False Positive Rate')
|
||||
plt.ylabel('True Positive Rate')
|
||||
plt.title('Receiver operating characteristic example')
|
||||
plt.show()
|
||||
|
||||
return roc_auc_score(y_test, np.round(y_preds[:, 1]))
|
||||
|
||||
|
||||
instaniated_models = [naive_bayes, bag_mod, rf_mod]
|
||||
|
||||
for i in instaniated_models:
|
||||
build_roc_auc(i, training_data, testing_data, y_train, y_test)
|
||||
|
||||
print(build_roc_auc(instaniated_models[0], training_data, testing_data,
|
||||
y_train, y_test))
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,98 @@
|
||||
def test_one(mod_arg):
|
||||
'''
|
||||
INPUT:
|
||||
mod_arg - a set of the strings pertaining to the objects that were passed in the fitting of our models
|
||||
|
||||
OUTPUT:
|
||||
prints correctness of the set
|
||||
nothing returned
|
||||
'''
|
||||
a = 'X_train'
|
||||
b = 'X_test'
|
||||
c = 'y_train'
|
||||
d = 'y_test'
|
||||
e = 'training_data'
|
||||
f = 'testing_data'
|
||||
if mod_arg == {c, e}:
|
||||
print("That's right! You need to fit on both parts of the data pertaining to training data!")
|
||||
else:
|
||||
print("Oops! That doesn't look quite right! Remember you only want to fit your model to the training data! Notice that X_train hasn't had the data cleaned yet, so that won't work to pass to our fit method. Hint - there are two items you should be passing to your fit method.")
|
||||
|
||||
|
||||
def test_two(mod_arg):
|
||||
'''
|
||||
INPUT:
|
||||
model_arg - a set of the strings pertaining to the objects that were passed in the predicting step
|
||||
|
||||
OUTPUT:
|
||||
prints correctness of the set
|
||||
nothing returned
|
||||
'''
|
||||
a = 'X_train'
|
||||
b = 'X_test'
|
||||
c = 'y_train'
|
||||
d = 'y_test'
|
||||
e = 'training_data'
|
||||
f = 'testing_data'
|
||||
if mod_arg == {f}:
|
||||
print("That's right! To see how well our models perform in a new setting, you will want to predict on the test set of data.")
|
||||
else:
|
||||
print("Oops! That doesn't look quite right! Remember you will want to predict on test data to know how well your model will do in a new situation. Hint - there is only one item that should be passed to the predict method of your model. Also notice that X_test has not been cleaned yet, so this cannot be passed to the predict method!")
|
||||
|
||||
|
||||
def sol_seven(seven_sol):
|
||||
'''
|
||||
INPUT: dictionary with correct matching of metrics
|
||||
OUTPUT: nothing returned - prints statement related to correctness of dictionary
|
||||
'''
|
||||
|
||||
a = "recall"
|
||||
b = "precision"
|
||||
c = "accuracy"
|
||||
d = 'f1-score'
|
||||
|
||||
seven_sol_1 = {
|
||||
'We have imbalanced classes, which metric do we definitely not want to use?': c,
|
||||
'We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives': a, 'When we identify something as positive, we want to be sure it is truly positive': b,
|
||||
'We care equally about identifying positive and negative cases': d
|
||||
}
|
||||
|
||||
if seven_sol == seven_sol_1:
|
||||
print("That's right! It isn't really necessary to memorize these in practice, but it is important to know they exist and know why might use one metric over another for a particular situation.")
|
||||
|
||||
if seven_sol['We have imbalanced classes, which metric do we definitely not want to use?'] != seven_sol_1['We have imbalanced classes, which metric do we definitely not want to use?']:
|
||||
print("Oops! The first one isn't right. If we do not have balanced classes, we probably want to stay away from using accuracy.")
|
||||
|
||||
if seven_sol['We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives'] != seven_sol_1['We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives']:
|
||||
print("Oops! The second one isn't right. If we really want to be sure about catching positive cases, we should be closely watching recall, which has all of the positive clases in the denominator - so we are monitoring how many of them we get right with recall.")
|
||||
|
||||
if seven_sol['When we identify something as positive, we want to be sure it is truly positive'] != seven_sol_1['When we identify something as positive, we want to be sure it is truly positive']:
|
||||
print("Oops! The third one isn't right. Using precision, we have the predicted positives in the denominator. Therefore, this will help us be sure the items we identify as positive are actually positive.")
|
||||
|
||||
if seven_sol['We care equally about identifying positive and negative cases'] != seven_sol_1['We care equally about identifying positive and negative cases']:
|
||||
print("Oops! The last one isn't right. If we care equally about precision and recall, we should use f1 score.")
|
||||
|
||||
|
||||
def sol_eight(eight_sol):
|
||||
'''
|
||||
INPUT: dictionary with correct matching of metrics
|
||||
OUTPUT: nothing returned - prints statement related to correctness of dictionary
|
||||
'''
|
||||
a = "naive-bayes"
|
||||
b = "bagging"
|
||||
c = "random-forest"
|
||||
d = 'ada-boost'
|
||||
e = "svm"
|
||||
|
||||
eight_sol_1 = {
|
||||
'We have imbalanced classes, which metric do we definitely not want to use?': a,
|
||||
'We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives': a,
|
||||
'When we identify something as positive, we want to be sure it is truly positive': c,
|
||||
'We care equally about identifying positive and negative cases': a
|
||||
}
|
||||
|
||||
if eight_sol_1 == eight_sol:
|
||||
print("That's right! Naive Bayes was the best model for all of our metrics except precision!")
|
||||
|
||||
else:
|
||||
print("Oops! That doesn't look right. Make sure you are performing your predictions and matching on the test data. Hint: The naive bayes model actually performs best on all of the metrics except one. Try again!")
|
||||
@@ -0,0 +1,354 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Boston Housing Data\n",
|
||||
"\n",
|
||||
"In order to gain a better understanding of the metrics used in regression settings, we will be looking at the Boston Housing dataset. \n",
|
||||
"\n",
|
||||
"First use the cell below to read in the dataset and set up the training and testing data that will be used for the rest of this problem."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import load_boston\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"import numpy as np\n",
|
||||
"import tests2 as t\n",
|
||||
"\n",
|
||||
"boston = load_boston()\n",
|
||||
"y = boston.target\n",
|
||||
"X = boston.data\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
||||
" X, y, test_size=0.33, random_state=42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 1:** Before we get too far, let's do a quick check of the models that you can use in this situation given that you are working on a regression problem. Use the dictionary and corresponding letters below to provide all the possible models you might choose to use."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"That's right! All but logistic regression can be used for predicting numeric values. And linear regression is the only one of these that you should not use for predicting categories. Technically sklearn won't stop you from doing most of anything you want, but you probably want to treat cases in the way you found by answering this question!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# When can you use the model - use each option as many times as necessary\n",
|
||||
"a = 'regression'\n",
|
||||
"b = 'classification'\n",
|
||||
"c = 'both regression and classification'\n",
|
||||
"\n",
|
||||
"models = {\n",
|
||||
" 'decision trees': c,\n",
|
||||
" 'random forest': c,\n",
|
||||
" 'adaptive boosting': c,\n",
|
||||
" 'logistic regression': b,\n",
|
||||
" 'linear regression': a\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#checks your answer, no need to change this code\n",
|
||||
"t.q1_check(models)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 2:** Now for each of the models you found in the previous question that can be used for regression problems, import them using sklearn."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Import models from sklearn - notice you will want to use \n",
|
||||
"# the regressor version (not classifier) - googling to find \n",
|
||||
"# each of these is what we all do!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 3:** Now that you have imported the 4 models that can be used for regression problems, instantate each below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Instantiate each of the models you imported\n",
|
||||
"# For now use the defaults for all the hyperparameters\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 4:** Fit each of your instantiated models on the training data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Fit each of your models using the training data\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 5:** Use each of your models to predict on the test data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Predict on the test values for each model\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 6:** Now for the information related to this lesson. Use the dictionary to match the metrics that are used for regression and those that are for classification."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# potential model options\n",
|
||||
"a = 'regression'\n",
|
||||
"b = 'classification'\n",
|
||||
"c = 'both regression and classification'\n",
|
||||
"\n",
|
||||
"#\n",
|
||||
"metrics = {\n",
|
||||
" 'precision': # Letter here,\n",
|
||||
" 'recall': # Letter here,\n",
|
||||
" 'accuracy': # Letter here,\n",
|
||||
" 'r2_score': # Letter here,\n",
|
||||
" 'mean_squared_error': # Letter here,\n",
|
||||
" 'area_under_curve': # Letter here, \n",
|
||||
" 'mean_absolute_area' # Letter here \n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#checks your answer, no need to change this code\n",
|
||||
"t.q6_check(metrics)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 6:** Now that you have identified the metrics that can be used in for regression problems, use sklearn to import them."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Import the metrics from sklearn\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 7:** Similar to what you did with classification models, let's make sure you are comfortable with how exactly each of these metrics is being calculated. We can then match the value to what sklearn provides."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def r2(actual, preds):\n",
|
||||
" '''\n",
|
||||
" INPUT:\n",
|
||||
" actual - numpy array or pd series of actual y values\n",
|
||||
" preds - numpy array or pd series of predicted y values\n",
|
||||
" OUTPUT:\n",
|
||||
" returns the r-squared score as a float\n",
|
||||
" '''\n",
|
||||
" sse = np.sum((actual-preds)**2)\n",
|
||||
" sst = np.sum((actual-np.mean(actual))**2)\n",
|
||||
" return 1 - sse/sst\n",
|
||||
"\n",
|
||||
"# Check solution matches sklearn\n",
|
||||
"print(r2(y_test, preds_tree))\n",
|
||||
"print(r2_score(y_test, preds_tree))\n",
|
||||
"print(\"Since the above match, we can see that we have correctly calculated the r2 value.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 8:** Your turn fill in the function below and see if your result matches the built in for mean_squared_error. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def mse(actual, preds):\n",
|
||||
" '''\n",
|
||||
" INPUT:\n",
|
||||
" actual - numpy array or pd series of actual y values\n",
|
||||
" preds - numpy array or pd series of predicted y values\n",
|
||||
" OUTPUT:\n",
|
||||
" returns the mean squared error as a float\n",
|
||||
" '''\n",
|
||||
" \n",
|
||||
" return None # calculate mse here\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Check your solution matches sklearn\n",
|
||||
"print(mse(y_test, preds_tree))\n",
|
||||
"print(mean_squared_error(y_test, preds_tree))\n",
|
||||
"print(\"If the above match, you are all set!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 9:** Now one last time - complete the function related to mean absolute error. Then check your function against the sklearn metric to assure they match. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def mae(actual, preds):\n",
|
||||
" '''\n",
|
||||
" INPUT:\n",
|
||||
" actual - numpy array or pd series of actual y values\n",
|
||||
" preds - numpy array or pd series of predicted y values\n",
|
||||
" OUTPUT:\n",
|
||||
" returns the mean absolute error as a float\n",
|
||||
" '''\n",
|
||||
" \n",
|
||||
" return None # calculate the mae here\n",
|
||||
"\n",
|
||||
"# Check your solution matches sklearn\n",
|
||||
"print(mae(y_test, preds_tree))\n",
|
||||
"print(mean_absolute_error(y_test, preds_tree))\n",
|
||||
"print(\"If the above match, you are all set!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 10:** Which model performed the best in terms of each of the metrics? Note that r2 and mse will always match, but the mae may give a different best model. Use the dictionary and space below to match the best model via each metric."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#match each metric to the model that performed best on it\n",
|
||||
"a = 'decision tree'\n",
|
||||
"b = 'random forest'\n",
|
||||
"c = 'adaptive boosting'\n",
|
||||
"d = 'linear regression'\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"best_fit = {\n",
|
||||
" 'mse': # letter here,\n",
|
||||
" 'r2': # letter here,\n",
|
||||
" 'mae': # letter here\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#Tests your answer - don't change this code\n",
|
||||
"t.check_ten(best_fit)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# cells for work"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,486 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Boston Housing Data\n",
|
||||
"\n",
|
||||
"In order to gain a better understanding of the metrics used in regression settings, we will be looking at the Boston Housing dataset. \n",
|
||||
"\n",
|
||||
"First use the cell below to read in the dataset and set up the training and testing data that will be used for the rest of this problem."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import load_boston\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"import numpy as np\n",
|
||||
"import tests2 as t\n",
|
||||
"\n",
|
||||
"boston = load_boston()\n",
|
||||
"y = boston.target\n",
|
||||
"X = boston.data\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
||||
" X, y, test_size=0.33, random_state=42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 1:** Before we get too far, let's do a quick check of the models that you can use in this situation given that you are working on a regression problem. Use the dictionary and corresponding letters below to provide all the possible models you might choose to use."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"That's right! All but logistic regression can be used for predicting numeric values. And linear regression is the only one of these that you should not use for predicting categories. Technically sklearn won't stop you from doing most of anything you want, but you probably want to treat cases in the way you found by answering this question!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# When can you use the model - use each option as many times as necessary\n",
|
||||
"a = 'regression'\n",
|
||||
"b = 'classification'\n",
|
||||
"c = 'both regression and classification'\n",
|
||||
"\n",
|
||||
"models = {\n",
|
||||
" 'decision trees': c,\n",
|
||||
" 'random forest': c,\n",
|
||||
" 'adaptive boosting': c,\n",
|
||||
" 'logistic regression': b,\n",
|
||||
" 'linear regression': a\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#checks your answer, no need to change this code\n",
|
||||
"t.q1_check(models)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 2:** Now for each of the models you found in the previous question that can be used for regression problems, import them using sklearn."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Import models from sklearn - notice you will want to use \n",
|
||||
"# the regressor version (not classifier) - googling to find \n",
|
||||
"# each of these is what we all do!\n",
|
||||
"from sklearn.tree import DecisionTreeRegressor\n",
|
||||
"from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor\n",
|
||||
"from sklearn.linear_model import LinearRegression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 3:** Now that you have imported the 4 models that can be used for regression problems, instantate each below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Instantiate each of the models you imported\n",
|
||||
"# For now use the defaults for all the hyperparameters\n",
|
||||
"dec_tree = DecisionTreeRegressor()\n",
|
||||
"ran_for = RandomForestRegressor()\n",
|
||||
"ada = AdaBoostRegressor()\n",
|
||||
"lin_reg = LinearRegression()\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 4:** Fit each of your instantiated models on the training data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Fit each of your models using the training data\n",
|
||||
"dec_tree.fit(X_train, y_train)\n",
|
||||
"ran_for.fit(X_train, y_train)\n",
|
||||
"ada.fit(X_train, y_train)\n",
|
||||
"lin_reg.fit(X_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 5:** Use each of your models to predict on the test data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Predict on the test values for each model\n",
|
||||
"dec_pred = dec_tree.predict(X_test)\n",
|
||||
"ran_pred = ran_for.predict(X_test)\n",
|
||||
"ada_pred = ada.predict(X_test)\n",
|
||||
"lin_pred = lin_reg.predict(X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 6:** Now for the information related to this lesson. Use the dictionary to match the metrics that are used for regression and those that are for classification."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"That's right! Looks like you know your metrics!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# potential model options\n",
|
||||
"a = 'regression'\n",
|
||||
"b = 'classification'\n",
|
||||
"c = 'both regression and classification'\n",
|
||||
"\n",
|
||||
"#\n",
|
||||
"metrics = {\n",
|
||||
" 'precision': b,\n",
|
||||
" 'recall': b,\n",
|
||||
" 'accuracy': b,\n",
|
||||
" 'r2_score': a,\n",
|
||||
" 'mean_squared_error': a,\n",
|
||||
" 'area_under_curve': b, \n",
|
||||
" 'mean_absolute_area': a \n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#checks your answer, no need to change this code\n",
|
||||
"t.q6_check(metrics)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 6:** Now that you have identified the metrics that can be used in for regression problems, use sklearn to import them."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Import the metrics from sklearn\n",
|
||||
"from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 7:** Similar to what you did with classification models, let's make sure you are comfortable with how exactly each of these metrics is being calculated. We can then match the value to what sklearn provides."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"r2 manual for dec_pred is 0.7334\n",
|
||||
"r2 sklearn for dec_pred is 0.7334\n",
|
||||
"\n",
|
||||
"r2 manual for ran_pred is 0.8608\n",
|
||||
"r2 sklearn for ran_pred is 0.8608\n",
|
||||
"\n",
|
||||
"r2 manual for ada_pred is 0.7936\n",
|
||||
"r2 sklearn for ada_pred is 0.7936\n",
|
||||
"\n",
|
||||
"r2 manual for lin_pred is 0.7259\n",
|
||||
"r2 sklearn for lin_pred is 0.7259\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def r2(actual, preds):\n",
|
||||
" '''\n",
|
||||
" INPUT:\n",
|
||||
" actual - numpy array or pd series of actual y values\n",
|
||||
" preds - numpy array or pd series of predicted y values\n",
|
||||
" OUTPUT:\n",
|
||||
" returns the r-squared score as a float\n",
|
||||
" '''\n",
|
||||
" sse = np.sum((actual-preds)**2)\n",
|
||||
" sst = np.sum((actual-np.mean(actual))**2)\n",
|
||||
" return 1 - sse/sst\n",
|
||||
"\n",
|
||||
"# Check solution matches sklearn\n",
|
||||
"models = {'dec_pred': dec_pred, 'ran_pred': ran_pred, 'ada_pred': ada_pred,\n",
|
||||
" 'lin_pred': lin_pred}\n",
|
||||
"metrics = [r2_score, mean_squared_error, mean_absolute_error]\n",
|
||||
"\n",
|
||||
"for i in models:\n",
|
||||
" print(f'r2 manual for {i} is {r2(y_test, models[i]):.4f}')\n",
|
||||
" print(f'r2 sklearn for {i} is {r2_score(y_test, models[i]):.4f}')\n",
|
||||
" print()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 8:** Your turn fill in the function below and see if your result matches the built in for mean_squared_error. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"r2 manual for dec_pred is 20.1762\n",
|
||||
"r2 sklearn for dec_pred is 20.1762\n",
|
||||
"\n",
|
||||
"r2 manual for ran_pred is 10.5380\n",
|
||||
"r2 sklearn for ran_pred is 10.5380\n",
|
||||
"\n",
|
||||
"r2 manual for ada_pred is 15.6183\n",
|
||||
"r2 sklearn for ada_pred is 15.6183\n",
|
||||
"\n",
|
||||
"r2 manual for lin_pred is 20.7471\n",
|
||||
"r2 sklearn for lin_pred is 20.7471\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def mse(actual, preds):\n",
|
||||
" '''\n",
|
||||
" INPUT:\n",
|
||||
" actual - numpy array or pd series of actual y values\n",
|
||||
" preds - numpy array or pd series of predicted y values\n",
|
||||
" OUTPUT:\n",
|
||||
" returns the mean squared error as a float\n",
|
||||
" '''\n",
|
||||
" \n",
|
||||
" return np.sum((actual-preds)**2)/len(actual)\n",
|
||||
"\n",
|
||||
"# Check your solution matches sklearn\n",
|
||||
"for i in models:\n",
|
||||
" print(f'r2 manual for {i} is {mse(y_test, models[i]):.4f}')\n",
|
||||
" print(f'r2 sklearn for {i} is {mean_squared_error(y_test, models[i]):.4f}')\n",
|
||||
" print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 9:** Now one last time - complete the function related to mean absolute error. Then check your function against the sklearn metric to assure they match. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"mae manual for dec_pred is 3.1707\n",
|
||||
"mae sklearn for dec_pred is 3.1707\n",
|
||||
"\n",
|
||||
"mae manual for ran_pred is 2.2222\n",
|
||||
"mae sklearn for ran_pred is 2.2222\n",
|
||||
"\n",
|
||||
"mae manual for ada_pred is 2.7089\n",
|
||||
"mae sklearn for ada_pred is 2.7089\n",
|
||||
"\n",
|
||||
"mae manual for lin_pred is 3.1513\n",
|
||||
"mae sklearn for lin_pred is 3.1513\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def mae(actual, preds):\n",
|
||||
" '''\n",
|
||||
" INPUT:\n",
|
||||
" actual - numpy array or pd series of actual y values\n",
|
||||
" preds - numpy array or pd series of predicted y values\n",
|
||||
" OUTPUT:\n",
|
||||
" returns the mean absolute error as a float\n",
|
||||
" '''\n",
|
||||
" \n",
|
||||
" return np.sum(np.abs(actual-preds))/len(actual)\n",
|
||||
"\n",
|
||||
"# Check your solution matches sklearn\n",
|
||||
"for i in models:\n",
|
||||
" print(f'mae manual for {i} is {mae(y_test, models[i]):.4f}')\n",
|
||||
" print(f'mae sklearn for {i} is'\n",
|
||||
" f' {mean_absolute_error(y_test, models[i]):.4f}')\n",
|
||||
" print()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> **Step 10:** Which model performed the best in terms of each of the metrics? Note that r2 and mse will always match, but the mae may give a different best model. Use the dictionary and space below to match the best model via each metric."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"That's right! The random forest was best in terms of all the metrics this time!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#match each metric to the model that performed best on it\n",
|
||||
"a = 'decision tree'\n",
|
||||
"b = 'random forest'\n",
|
||||
"c = 'adaptive boosting'\n",
|
||||
"d = 'linear regression'\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"best_fit = {\n",
|
||||
" 'mse': b,\n",
|
||||
" 'r2': b,\n",
|
||||
" 'mae': b\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#Tests your answer - don't change this code\n",
|
||||
"t.check_ten(best_fit)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Comparison of all models:\n",
|
||||
"\n",
|
||||
"r2_score for dec_pred 0.7334\n",
|
||||
"mean_squared_error for dec_pred 20.1762\n",
|
||||
"mean_absolute_error for dec_pred 3.1707\n",
|
||||
"\n",
|
||||
"r2_score for ran_pred 0.8608\n",
|
||||
"mean_squared_error for ran_pred 10.5380\n",
|
||||
"mean_absolute_error for ran_pred 2.2222\n",
|
||||
"\n",
|
||||
"r2_score for ada_pred 0.7936\n",
|
||||
"mean_squared_error for ada_pred 15.6183\n",
|
||||
"mean_absolute_error for ada_pred 2.7089\n",
|
||||
"\n",
|
||||
"r2_score for lin_pred 0.7259\n",
|
||||
"mean_squared_error for lin_pred 20.7471\n",
|
||||
"mean_absolute_error for lin_pred 3.1513\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# cells for work\n",
|
||||
"\n",
|
||||
"models = {'dec_pred': dec_pred, 'ran_pred': ran_pred, 'ada_pred': ada_pred,\n",
|
||||
" 'lin_pred': lin_pred}\n",
|
||||
"metrics = [r2_score, mean_squared_error, mean_absolute_error]\n",
|
||||
"\n",
|
||||
"print('Comparison of all models:\\n')\n",
|
||||
"for i in models:\n",
|
||||
" for j in range(len(metrics)):\n",
|
||||
" print(f'{metrics[j].__name__} for '\n",
|
||||
" f'{i} {metrics[j](y_test, models[i]):.4f}')\n",
|
||||
" print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Binary file not shown.
@@ -0,0 +1,143 @@
|
||||
from sklearn.datasets import load_boston
|
||||
from sklearn.model_selection import train_test_split
|
||||
import numpy as np
|
||||
import tests2 as t
|
||||
from sklearn.tree import DecisionTreeRegressor
|
||||
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
|
||||
|
||||
boston = load_boston()
|
||||
y = boston.target
|
||||
X = boston.data
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.33, random_state=42)
|
||||
|
||||
|
||||
dec_tree = DecisionTreeRegressor()
|
||||
ran_for = RandomForestRegressor()
|
||||
ada = AdaBoostRegressor()
|
||||
lin_reg = LinearRegression()
|
||||
|
||||
|
||||
dec_tree.fit(X_train, y_train)
|
||||
ran_for.fit(X_train, y_train)
|
||||
ada.fit(X_train, y_train)
|
||||
lin_reg.fit(X_train, y_train)
|
||||
|
||||
|
||||
dec_pred = dec_tree.predict(X_test)
|
||||
ran_pred = ran_for.predict(X_test)
|
||||
ada_pred = ada.predict(X_test)
|
||||
lin_pred = lin_reg.predict(X_test)
|
||||
|
||||
|
||||
# potential model options
|
||||
a = 'regression'
|
||||
b = 'classification'
|
||||
c = 'both regression and classification'
|
||||
|
||||
metrics_dict = {
|
||||
'precision': b,
|
||||
'recall': b,
|
||||
'accuracy': b,
|
||||
'r2_score': a,
|
||||
'mean_squared_error': a,
|
||||
'area_under_curve': b,
|
||||
'mean_absolute_area': a
|
||||
}
|
||||
|
||||
# checks your answer, no need to change this code
|
||||
t.q6_check(metrics_dict)
|
||||
print()
|
||||
|
||||
models = {'dec_pred': dec_pred, 'ran_pred': ran_pred, 'ada_pred': ada_pred,
|
||||
'lin_pred': lin_pred}
|
||||
metrics = [r2_score, mean_squared_error, mean_absolute_error]
|
||||
|
||||
|
||||
# Check r2
|
||||
def r2(actual, preds):
|
||||
'''
|
||||
INPUT:
|
||||
actual - numpy array or pd series of actual y values
|
||||
preds - numpy array or pd series of predicted y values
|
||||
OUTPUT:
|
||||
returns the r-squared score as a float
|
||||
'''
|
||||
sse = np.sum((actual - preds)**2)
|
||||
sst = np.sum((actual - np.mean(actual))**2)
|
||||
return 1 - sse / sst
|
||||
|
||||
|
||||
for i in models:
|
||||
print(f'r2 manual for {i} is {r2(y_test, models[i]):.4f}')
|
||||
print(f'r2 sklearn for {i} is {r2_score(y_test, models[i]):.4f}')
|
||||
print()
|
||||
# Check solution matches sklearn
|
||||
|
||||
|
||||
def mse(actual, preds):
|
||||
'''
|
||||
INPUT:
|
||||
actual - numpy array or pd series of actual y values
|
||||
preds - numpy array or pd series of predicted y values
|
||||
OUTPUT:
|
||||
returns the mean squared error as a float
|
||||
'''
|
||||
|
||||
return np.sum((actual - preds)**2) / len(actual)
|
||||
|
||||
|
||||
# Check your solution matches sklearn
|
||||
for i in models:
|
||||
print(f'mse manual for {i} is {mse(y_test, models[i]):.4f}')
|
||||
print(f'mse sklearn for {i} is'
|
||||
f' {mean_squared_error(y_test, models[i]):.4f}')
|
||||
print()
|
||||
|
||||
|
||||
def mae(actual, preds):
|
||||
'''
|
||||
INPUT:
|
||||
actual - numpy array or pd series of actual y values
|
||||
preds - numpy array or pd series of predicted y values
|
||||
OUTPUT:
|
||||
returns the mean absolute error as a float
|
||||
'''
|
||||
|
||||
return np.sum(np.abs(actual - preds)) / len(actual)
|
||||
|
||||
|
||||
# Check your solution matches sklearn
|
||||
for i in models:
|
||||
print(f'mae manual for {i} is {mae(y_test, models[i]):.4f}')
|
||||
print(f'mae sklearn for {i} is'
|
||||
f' {mean_absolute_error(y_test, models[i]):.4f}')
|
||||
print()
|
||||
|
||||
print('=================')
|
||||
print('Comparison of all models:\n')
|
||||
for i in models:
|
||||
for j in range(len(metrics)):
|
||||
print(f'{metrics[j].__name__} for '
|
||||
f'{i} {metrics[j](y_test, models[i]):.4f}')
|
||||
print()
|
||||
|
||||
|
||||
# match each metric to the model that performed best on it
|
||||
a = 'decision tree'
|
||||
b = 'random forest'
|
||||
c = 'adaptive boosting'
|
||||
d = 'linear regression'
|
||||
|
||||
|
||||
best_fit = {
|
||||
'mse': b,
|
||||
'r2': b,
|
||||
'mae': b
|
||||
}
|
||||
|
||||
# Tests your answer - don't change this code
|
||||
t.check_ten(best_fit)
|
||||
@@ -0,0 +1,94 @@
|
||||
def q1_check(models_dict):
|
||||
'''
|
||||
INPUT:
|
||||
models_dict - a dictionary with models and what types of problems the models can be used for
|
||||
|
||||
OUTPUT:
|
||||
nothing returned
|
||||
prints statements related to the correctness of the dictionary
|
||||
'''
|
||||
a = 'regression'
|
||||
b = 'classification'
|
||||
c = 'both regression and classification'
|
||||
|
||||
models = {
|
||||
'decision trees': c,
|
||||
'random forest': c,
|
||||
'adaptive boosting': c,
|
||||
'logistic regression': b,
|
||||
'linear regression': a,
|
||||
}
|
||||
|
||||
if models == models_dict:
|
||||
print("That's right! All but logistic regression can be used for predicting numeric values. And linear regression is the only one of these that you should not use for predicting categories. Technically sklearn won't stop you from doing most of anything you want, but you probably want to treat cases in the way you found by answering this question!")
|
||||
|
||||
if models['logistic regression'] != models_dict['logistic regression']:
|
||||
print("Oops! In most cases, you will only want to use logistic regression for classification problems.")
|
||||
|
||||
if models['linear regression'] != models_dict['linear regression']:
|
||||
print("Oops! Linear regression should actually only be used in regression cases. Try again.")
|
||||
|
||||
if (models['decision trees'] != models_dict['decision trees']) or (models['random forest'] != models_dict['random forest']) or (models['adaptive boosting'] != models_dict['adaptive boosting']):
|
||||
print("Oops! Actually random forests, decision trees, and adaptive boosting are all techniques that can be used for both regression and classification. Try again!")
|
||||
|
||||
|
||||
|
||||
|
||||
def q6_check(metrics):
|
||||
'''
|
||||
INPUT:
|
||||
metrics - a dictionary with metrics and what types of problems the metrics can be used for
|
||||
|
||||
OUTPUT:
|
||||
nothing returned
|
||||
prints statements related to the correctness of the dictionary
|
||||
'''
|
||||
a = 'regression'
|
||||
b = 'classification'
|
||||
c = 'both regression and classification'
|
||||
|
||||
#
|
||||
metrics_ch = {
|
||||
'precision': b,
|
||||
'recall': b,
|
||||
'accuracy': b,
|
||||
'r2_score': a,
|
||||
'mean_squared_error': a,
|
||||
'area_under_curve': b,
|
||||
'mean_absolute_area': a
|
||||
}
|
||||
|
||||
if metrics_ch == metrics:
|
||||
print("That's right! Looks like you know your metrics!")
|
||||
|
||||
if (metrics['precision'] != metrics['precision']) or (metrics['recall'] != metrics['recall']) or (metrics['accuracy'] != metrics['accuracy']) or (metrics['area_under_curve'] != metrics['area_under_curve']):
|
||||
print("Oops! Actually, there are four metrics that are used for classification. Looks like you missed at least one of them.")
|
||||
|
||||
if metrics != metrics_ch:
|
||||
print("Oops! Something doesn't look quite right. You should have three metrics for regression, and the others should be for classification. None of the metrics are used for both regression and classification.")
|
||||
|
||||
|
||||
def check_ten(best_fit):
|
||||
'''
|
||||
INPUT:
|
||||
|
||||
OUTPUT:
|
||||
|
||||
'''
|
||||
a = 'decision tree'
|
||||
b = 'random forest'
|
||||
c = 'adaptive boosting'
|
||||
d = 'linear regression'
|
||||
|
||||
|
||||
best_fitting = {
|
||||
'mse': b,
|
||||
'r2': b,
|
||||
'mae': b
|
||||
}
|
||||
|
||||
if best_fit == best_fitting:
|
||||
print("That's right! The random forest was best in terms of all the metrics this time!")
|
||||
|
||||
else:
|
||||
print("Oops! Actually the best model was the same for all the metrics. Try again - all of your answers should be the same!")
|
||||
Reference in New Issue
Block a user