Finished Model Evaluation Metrics

This commit is contained in:
2019-07-12 02:01:41 +01:00
parent b5dd5aa345
commit af3c2caa6a
14 changed files with 8668 additions and 0 deletions

View File

@@ -0,0 +1,239 @@
# Import our libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import f1_score, fbeta_score
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from itertools import cycle
from sklearn.metrics import roc_curve, auc, roc_auc_score
from scipy import interp
# import tests as t
# Read in our dataset
df = pd.read_csv('smsspamcollection_SMSSpamCollection',
header=None,
names=['label', 'sms_message'])
# Fix our response value
df['label'] = df.label.map({'ham': 0, 'spam': 1})
# Split our dataset into training and testing data
X_train, X_test, y_train, y_test = train_test_split(df['sms_message'],
df['label'],
random_state=1)
# Instantiate the CountVectorizer method
count_vector = CountVectorizer()
# Fit the training data and then return the matrix
training_data = count_vector.fit_transform(X_train)
# Transform testing data and return the matrix. Note we are not fitting the
# testing data into the CountVectorizer()
testing_data = count_vector.transform(X_test)
# Instantiate a number of our models
naive_bayes = MultinomialNB()
bag_mod = BaggingClassifier(n_estimators=200)
rf_mod = RandomForestClassifier(n_estimators=200)
ada_mod = AdaBoostClassifier(n_estimators=300, learning_rate=0.2)
svm_mod = SVC()
# Fit each of the 4 models
# This might take some time to run
naive_bayes.fit(training_data, y_train)
bag_mod.fit(training_data, y_train)
rf_mod.fit(training_data, y_train)
ada_mod.fit(training_data, y_train)
svm_mod.fit(training_data, y_train)
# Make predictions using each of your models
nb = naive_bayes.predict(testing_data)
bag_pred = bag_mod.predict(testing_data)
rf_pred = rf_mod.predict(testing_data)
ada_pred = ada_mod.predict(testing_data)
svm_pred = svm_mod.predict(testing_data)
# accuracy is the total correct divided by the total to predict
def accuracy(actual, preds):
'''
INPUT
preds - predictions as a numpy array or pandas series
actual - actual values as a numpy array or pandas series
OUTPUT:
returns the accuracy as a float
'''
return np.sum(preds == actual) / len(actual)
print(accuracy(y_test, nb))
print(accuracy_score(y_test, nb))
print("Since these match, we correctly calculated our metric!")
# precision is the true positives over the predicted positive values
def precision(actual, preds):
'''
INPUT
(assumes positive = 1 and negative = 0)
preds - predictions as a numpy array or pandas series
actual - actual values as a numpy array or pandas series
OUTPUT:
returns the precision as a float
'''
TP = np.sum((preds == actual) & (preds > 0))
FP = np.sum((preds == 1) & (actual == 0))
return TP / (TP + FP)
print(precision(y_test, nb))
print(precision_score(y_test, nb))
print("If the above match, you got it!")
# recall is true positives over all actual positive values
def recall(actual, preds):
'''
INPUT
preds - predictions as a numpy array or pandas series
actual - actual values as a numpy array or pandas series
OUTPUT:
returns the recall as a float
'''
TP = np.sum((preds == actual) & (preds > 0))
FN = np.sum((preds == 0) & (actual == 1))
return TP / (TP + FN)
print(recall(y_test, nb))
print(recall_score(y_test, nb))
print("If the above match, you got it!")
# f1_score is 2*(precision*recall)/(precision+recall))
def f1(actual, preds):
'''
INPUT
preds - predictions as a numpy array or pandas series
actual - actual values as a numpy array or pandas series
OUTPUT:
returns the f1score as a float
'''
prec = precision(actual, preds)
rec = recall(actual, preds)
return 2 * ((prec * rec) / (prec + rec))
print(f1(y_test, nb))
print(f1_score(y_test, nb))
print("If the above match, you got it!")
# add the letter of the most appropriate metric to each statement
# in the dictionary
a = "recall"
b = "precision"
c = "accuracy"
d = 'f1-score'
seven_sol = {
'We have imbalanced classes, which metric do we definitely not want to'
' use?': c,
'We really want to make sure the positive cases are all caught even if'
' that means we identify some negatives as positives': a,
'When we identify something as positive, we want to be sure it is truly'
' positive': b,
'We care equally about identifying positive and negative cases': d
}
# This gives: That's right! It isn't really necessary to memorize these in
# practice, but it is important to know they exist and know why might use one
# metric over another for a particular situation.
models = {'nb': nb,
'bag_pred': bag_pred,
'rf_pred': rf_pred,
'ada_pred': ada_pred,
'svm_pred': svm_pred}
metrics = [accuracy_score, precision_score, recall_score, f1_score]
for i in models:
for j in range(len(metrics)):
print(f'{metrics[j].__name__} for '
f'{i} {metrics[j](y_test, models[i]):.4f}')
print()
beta = 1
print(f1_score(y_test, nb))
print(fbeta_score(y_test, nb, beta))
for i in models:
print(f'fbeta_score for {i} {fbeta_score(y_test, models[i], beta)}')
print(f'f1_score for {i} {f1_score(y_test, models[i], beta)}')
print()
# Function for calculating auc and roc
def build_roc_auc(model, X_train, X_test, y_train, y_test):
'''
INPUT:
model - an sklearn instantiated model
X_train - the training data
y_train - the training response values (must be categorical)
X_test - the test data
y_test - the test response values (must be categorical)
OUTPUT:
auc - returns auc as a float
prints the roc curve
'''
y_preds = model.fit(X_train, y_train).predict_proba(X_test)
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(y_test)):
fpr[i], tpr[i], _ = roc_curve(y_test, y_preds[:, 1])
roc_auc[i] = auc(fpr[i], tpr[i])
# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(),
y_preds[:, 1].ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
plt.plot(fpr[2], tpr[2], color='darkorange',
lw=2, label='ROC curve (area = %0.2f)' % roc_auc[2])
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.show()
return roc_auc_score(y_test, np.round(y_preds[:, 1]))
instaniated_models = [naive_bayes, bag_mod, rf_mod]
for i in instaniated_models:
build_roc_auc(i, training_data, testing_data, y_train, y_test)
print(build_roc_auc(instaniated_models[0], training_data, testing_data,
y_train, y_test))

View File

@@ -0,0 +1,96 @@
0.24539,0.81725,0
0.21774,0.76462,0
0.20161,0.69737,0
0.20161,0.58041,0
0.2477,0.49561,0
0.32834,0.44883,0
0.39516,0.48099,0
0.39286,0.57164,0
0.33525,0.62135,0
0.33986,0.71199,0
0.34447,0.81433,0
0.28226,0.82602,0
0.26613,0.75,0
0.26613,0.63596,0
0.32604,0.54825,0
0.28917,0.65643,0
0.80069,0.71491,0
0.80069,0.64181,0
0.80069,0.50146,0
0.79839,0.36988,0
0.73157,0.25,0
0.63249,0.18275,0
0.60023,0.27047,0
0.66014,0.34649,0
0.70161,0.42251,0
0.70853,0.53947,0
0.71544,0.63304,0
0.74309,0.72076,0
0.75,0.63596,0
0.75,0.46345,0
0.72235,0.35526,0
0.66935,0.28509,0
0.20622,0.94298,1
0.26613,0.8962,1
0.38134,0.8962,1
0.42051,0.94591,1
0.49885,0.86404,1
0.31452,0.93421,1
0.53111,0.72076,1
0.45276,0.74415,1
0.53571,0.6038,1
0.60484,0.71491,1
0.60945,0.58333,1
0.51267,0.47807,1
0.50806,0.59211,1
0.46198,0.30556,1
0.5288,0.41082,1
0.38594,0.35819,1
0.31682,0.31433,1
0.29608,0.20906,1
0.36982,0.27632,1
0.42972,0.18275,1
0.51498,0.10965,1
0.53111,0.20906,1
0.59793,0.095029,1
0.73848,0.086257,1
0.83065,0.18275,1
0.8629,0.10965,1
0.88364,0.27924,1
0.93433,0.30848,1
0.93433,0.19444,1
0.92512,0.43421,1
0.87903,0.43421,1
0.87903,0.58626,1
0.9182,0.71491,1
0.85138,0.8348,1
0.85599,0.94006,1
0.70853,0.94298,1
0.70853,0.87281,1
0.59793,0.93129,1
0.61175,0.83187,1
0.78226,0.82895,1
0.78917,0.8962,1
0.90668,0.89912,1
0.14862,0.92251,1
0.15092,0.85819,1
0.097926,0.85819,1
0.079493,0.91374,1
0.079493,0.77632,1
0.10945,0.79678,1
0.12327,0.67982,1
0.077189,0.6886,1
0.081797,0.58626,1
0.14862,0.58041,1
0.14862,0.5307,1
0.14171,0.41959,1
0.08871,0.49269,1
0.095622,0.36696,1
0.24539,0.3962,1
0.1947,0.29678,1
0.16935,0.22368,1
0.15553,0.13596,1
0.23848,0.12427,1
0.33065,0.12427,1
0.095622,0.2617,1
0.091014,0.20322,1
1 0.24539 0.81725 0
2 0.21774 0.76462 0
3 0.20161 0.69737 0
4 0.20161 0.58041 0
5 0.2477 0.49561 0
6 0.32834 0.44883 0
7 0.39516 0.48099 0
8 0.39286 0.57164 0
9 0.33525 0.62135 0
10 0.33986 0.71199 0
11 0.34447 0.81433 0
12 0.28226 0.82602 0
13 0.26613 0.75 0
14 0.26613 0.63596 0
15 0.32604 0.54825 0
16 0.28917 0.65643 0
17 0.80069 0.71491 0
18 0.80069 0.64181 0
19 0.80069 0.50146 0
20 0.79839 0.36988 0
21 0.73157 0.25 0
22 0.63249 0.18275 0
23 0.60023 0.27047 0
24 0.66014 0.34649 0
25 0.70161 0.42251 0
26 0.70853 0.53947 0
27 0.71544 0.63304 0
28 0.74309 0.72076 0
29 0.75 0.63596 0
30 0.75 0.46345 0
31 0.72235 0.35526 0
32 0.66935 0.28509 0
33 0.20622 0.94298 1
34 0.26613 0.8962 1
35 0.38134 0.8962 1
36 0.42051 0.94591 1
37 0.49885 0.86404 1
38 0.31452 0.93421 1
39 0.53111 0.72076 1
40 0.45276 0.74415 1
41 0.53571 0.6038 1
42 0.60484 0.71491 1
43 0.60945 0.58333 1
44 0.51267 0.47807 1
45 0.50806 0.59211 1
46 0.46198 0.30556 1
47 0.5288 0.41082 1
48 0.38594 0.35819 1
49 0.31682 0.31433 1
50 0.29608 0.20906 1
51 0.36982 0.27632 1
52 0.42972 0.18275 1
53 0.51498 0.10965 1
54 0.53111 0.20906 1
55 0.59793 0.095029 1
56 0.73848 0.086257 1
57 0.83065 0.18275 1
58 0.8629 0.10965 1
59 0.88364 0.27924 1
60 0.93433 0.30848 1
61 0.93433 0.19444 1
62 0.92512 0.43421 1
63 0.87903 0.43421 1
64 0.87903 0.58626 1
65 0.9182 0.71491 1
66 0.85138 0.8348 1
67 0.85599 0.94006 1
68 0.70853 0.94298 1
69 0.70853 0.87281 1
70 0.59793 0.93129 1
71 0.61175 0.83187 1
72 0.78226 0.82895 1
73 0.78917 0.8962 1
74 0.90668 0.89912 1
75 0.14862 0.92251 1
76 0.15092 0.85819 1
77 0.097926 0.85819 1
78 0.079493 0.91374 1
79 0.079493 0.77632 1
80 0.10945 0.79678 1
81 0.12327 0.67982 1
82 0.077189 0.6886 1
83 0.081797 0.58626 1
84 0.14862 0.58041 1
85 0.14862 0.5307 1
86 0.14171 0.41959 1
87 0.08871 0.49269 1
88 0.095622 0.36696 1
89 0.24539 0.3962 1
90 0.1947 0.29678 1
91 0.16935 0.22368 1
92 0.15553 0.13596 1
93 0.23848 0.12427 1
94 0.33065 0.12427 1
95 0.095622 0.2617 1
96 0.091014 0.20322 1

View File

@@ -0,0 +1,35 @@
# Import statements
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# Import the train test split
# http://scikit-learn.org/0.16/modules/generated/sklearn.cross_validation.train_test_split.html
# Read in the data.
data = np.asarray(pd.read_csv('data.csv', header=None))
# Assign the features to the variable X, and the labels to the variable y.
X = data[:, 0:2]
y = data[:, 2]
# Use train test split to split your data
# Use a test size of 25% and a random state of 42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
random_state=42)
# Instantiate your decision tree model
model = DecisionTreeClassifier()
# TODO: Fit the model to the training data.
model.fit(X_train, y_train)
# TODO: Make predictions on the test data
y_pred = model.predict(X_test)
# TODO: Calculate the accuracy and assign it to the variable acc on the test
# data.
acc = accuracy_score(y_test, y_pred)
print(acc)

View File

@@ -0,0 +1,98 @@
def test_one(mod_arg):
'''
INPUT:
mod_arg - a set of the strings pertaining to the objects that were passed in the fitting of our models
OUTPUT:
prints correctness of the set
nothing returned
'''
a = 'X_train'
b = 'X_test'
c = 'y_train'
d = 'y_test'
e = 'training_data'
f = 'testing_data'
if mod_arg == {c, e}:
print("That's right! You need to fit on both parts of the data pertaining to training data!")
else:
print("Oops! That doesn't look quite right! Remember you only want to fit your model to the training data! Notice that X_train hasn't had the data cleaned yet, so that won't work to pass to our fit method. Hint - there are two items you should be passing to your fit method.")
def test_two(mod_arg):
'''
INPUT:
model_arg - a set of the strings pertaining to the objects that were passed in the predicting step
OUTPUT:
prints correctness of the set
nothing returned
'''
a = 'X_train'
b = 'X_test'
c = 'y_train'
d = 'y_test'
e = 'training_data'
f = 'testing_data'
if mod_arg == {f}:
print("That's right! To see how well our models perform in a new setting, you will want to predict on the test set of data.")
else:
print("Oops! That doesn't look quite right! Remember you will want to predict on test data to know how well your model will do in a new situation. Hint - there is only one item that should be passed to the predict method of your model. Also notice that X_test has not been cleaned yet, so this cannot be passed to the predict method!")
def sol_seven(seven_sol):
'''
INPUT: dictionary with correct matching of metrics
OUTPUT: nothing returned - prints statement related to correctness of dictionary
'''
a = "recall"
b = "precision"
c = "accuracy"
d = 'f1-score'
seven_sol_1 = {
'We have imbalanced classes, which metric do we definitely not want to use?': c,
'We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives': a, 'When we identify something as positive, we want to be sure it is truly positive': b,
'We care equally about identifying positive and negative cases': d
}
if seven_sol == seven_sol_1:
print("That's right! It isn't really necessary to memorize these in practice, but it is important to know they exist and know why might use one metric over another for a particular situation.")
if seven_sol['We have imbalanced classes, which metric do we definitely not want to use?'] != seven_sol_1['We have imbalanced classes, which metric do we definitely not want to use?']:
print("Oops! The first one isn't right. If we do not have balanced classes, we probably want to stay away from using accuracy.")
if seven_sol['We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives'] != seven_sol_1['We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives']:
print("Oops! The second one isn't right. If we really want to be sure about catching positive cases, we should be closely watching recall, which has all of the positive clases in the denominator - so we are monitoring how many of them we get right with recall.")
if seven_sol['When we identify something as positive, we want to be sure it is truly positive'] != seven_sol_1['When we identify something as positive, we want to be sure it is truly positive']:
print("Oops! The third one isn't right. Using precision, we have the predicted positives in the denominator. Therefore, this will help us be sure the items we identify as positive are actually positive.")
if seven_sol['We care equally about identifying positive and negative cases'] != seven_sol_1['We care equally about identifying positive and negative cases']:
print("Oops! The last one isn't right. If we care equally about precision and recall, we should use f1 score.")
def sol_eight(eight_sol):
'''
INPUT: dictionary with correct matching of metrics
OUTPUT: nothing returned - prints statement related to correctness of dictionary
'''
a = "naive-bayes"
b = "bagging"
c = "random-forest"
d = 'ada-boost'
e = "svm"
eight_sol_1 = {
'We have imbalanced classes, which metric do we definitely not want to use?': a,
'We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives': a,
'When we identify something as positive, we want to be sure it is truly positive': c,
'We care equally about identifying positive and negative cases': a
}
if eight_sol_1 == eight_sol:
print("That's right! Naive Bayes was the best model for all of our metrics except precision!")
else:
print("Oops! That doesn't look right. Make sure you are performing your predictions and matching on the test data. Hint: The naive bayes model actually performs best on all of the metrics except one. Try again!")