Finished Model Evaluation Metrics

2019-07-12 02:01:41 +01:00
parent b5dd5aa345
commit af3c2caa6a
14 changed files with 8668 additions and 0 deletions
--- a/models/classification_metrics.py
+++ b/models/classification_metrics.py
@@ -0,0 +1,239 @@
+# Import our libraries
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.metrics import accuracy_score, precision_score, recall_score
+from sklearn.metrics import f1_score, fbeta_score
+from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
+from sklearn.ensemble import AdaBoostClassifier
+from sklearn.svm import SVC
+import matplotlib.pyplot as plt
+from itertools import cycle
+from sklearn.metrics import roc_curve, auc, roc_auc_score
+from scipy import interp
+# import tests as t
+
+# Read in our dataset
+df = pd.read_csv('smsspamcollection_SMSSpamCollection',
+                 header=None,
+                 names=['label', 'sms_message'])
+
+# Fix our response value
+df['label'] = df.label.map({'ham': 0, 'spam': 1})
+
+# Split our dataset into training and testing data
+X_train, X_test, y_train, y_test = train_test_split(df['sms_message'],
+                                                    df['label'],
+                                                    random_state=1)
+
+# Instantiate the CountVectorizer method
+count_vector = CountVectorizer()
+
+# Fit the training data and then return the matrix
+training_data = count_vector.fit_transform(X_train)
+
+# Transform testing data and return the matrix. Note we are not fitting the
+# testing data into the CountVectorizer()
+testing_data = count_vector.transform(X_test)
+
+# Instantiate a number of our models
+naive_bayes = MultinomialNB()
+bag_mod = BaggingClassifier(n_estimators=200)
+rf_mod = RandomForestClassifier(n_estimators=200)
+ada_mod = AdaBoostClassifier(n_estimators=300, learning_rate=0.2)
+svm_mod = SVC()
+
+# Fit each of the 4 models
+# This might take some time to run
+naive_bayes.fit(training_data, y_train)
+bag_mod.fit(training_data, y_train)
+rf_mod.fit(training_data, y_train)
+ada_mod.fit(training_data, y_train)
+svm_mod.fit(training_data, y_train)
+
+
+# Make predictions using each of your models
+nb = naive_bayes.predict(testing_data)
+bag_pred = bag_mod.predict(testing_data)
+rf_pred = rf_mod.predict(testing_data)
+ada_pred = ada_mod.predict(testing_data)
+svm_pred = svm_mod.predict(testing_data)
+
+
+# accuracy is the total correct divided by the total to predict
+def accuracy(actual, preds):
+    '''
+    INPUT
+    preds - predictions as a numpy array or pandas series
+    actual - actual values as a numpy array or pandas series
+
+    OUTPUT:
+    returns the accuracy as a float
+    '''
+    return np.sum(preds == actual) / len(actual)
+
+
+print(accuracy(y_test, nb))
+print(accuracy_score(y_test, nb))
+print("Since these match, we correctly calculated our metric!")
+
+
+# precision is the true positives over the predicted positive values
+def precision(actual, preds):
+    '''
+    INPUT
+    (assumes positive = 1 and negative = 0)
+    preds - predictions as a numpy array or pandas series
+    actual - actual values as a numpy array or pandas series
+
+    OUTPUT:
+    returns the precision as a float
+    '''
+    TP = np.sum((preds == actual) & (preds > 0))
+    FP = np.sum((preds == 1) & (actual == 0))
+    return TP / (TP + FP)
+
+
+print(precision(y_test, nb))
+print(precision_score(y_test, nb))
+print("If the above match, you got it!")
+
+
+# recall is true positives over all actual positive values
+def recall(actual, preds):
+    '''
+    INPUT
+    preds - predictions as a numpy array or pandas series
+    actual - actual values as a numpy array or pandas series
+
+    OUTPUT:
+    returns the recall as a float
+    '''
+    TP = np.sum((preds == actual) & (preds > 0))
+    FN = np.sum((preds == 0) & (actual == 1))
+    return TP / (TP + FN)
+
+
+print(recall(y_test, nb))
+print(recall_score(y_test, nb))
+print("If the above match, you got it!")
+
+
+# f1_score is 2*(precision*recall)/(precision+recall))
+def f1(actual, preds):
+    '''
+    INPUT
+    preds - predictions as a numpy array or pandas series
+    actual - actual values as a numpy array or pandas series
+
+    OUTPUT:
+    returns the f1score as a float
+    '''
+    prec = precision(actual, preds)
+    rec = recall(actual, preds)
+    return 2 * ((prec * rec) / (prec + rec))
+
+
+print(f1(y_test, nb))
+print(f1_score(y_test, nb))
+print("If the above match, you got it!")
+
+
+# add the letter of the most appropriate metric to each statement
+# in the dictionary
+a = "recall"
+b = "precision"
+c = "accuracy"
+d = 'f1-score'
+
+
+seven_sol = {
+    'We have imbalanced classes, which metric do we definitely not want to'
+    ' use?': c,
+    'We really want to make sure the positive cases are all caught even if'
+    ' that means we identify some negatives as positives': a,
+    'When we identify something as positive, we want to be sure it is truly'
+    ' positive': b,
+    'We care equally about identifying positive and negative cases': d
+}
+
+# This gives: That's right!  It isn't really necessary to memorize these in
+# practice, but it is important to know they exist and know why might use one
+# metric over another for a particular situation.
+
+
+models = {'nb': nb,
+          'bag_pred': bag_pred,
+          'rf_pred': rf_pred,
+          'ada_pred': ada_pred,
+          'svm_pred': svm_pred}
+metrics = [accuracy_score, precision_score, recall_score, f1_score]
+
+for i in models:
+    for j in range(len(metrics)):
+        print(f'{metrics[j].__name__} for '
+              f'{i} {metrics[j](y_test, models[i]):.4f}')
+    print()
+
+
+beta = 1
+
+print(f1_score(y_test, nb))
+print(fbeta_score(y_test, nb, beta))
+
+for i in models:
+    print(f'fbeta_score for {i} {fbeta_score(y_test, models[i], beta)}')
+    print(f'f1_score for {i} {f1_score(y_test, models[i], beta)}')
+    print()
+
+
+# Function for calculating auc and roc
+
+def build_roc_auc(model, X_train, X_test, y_train, y_test):
+    '''
+    INPUT:
+    model - an sklearn instantiated model
+    X_train - the training data
+    y_train - the training response values (must be categorical)
+    X_test - the test data
+    y_test - the test response values (must be categorical)
+    OUTPUT:
+    auc - returns auc as a float
+    prints the roc curve
+    '''
+    y_preds = model.fit(X_train, y_train).predict_proba(X_test)
+    # Compute ROC curve and ROC area for each class
+    fpr = dict()
+    tpr = dict()
+    roc_auc = dict()
+    for i in range(len(y_test)):
+        fpr[i], tpr[i], _ = roc_curve(y_test, y_preds[:, 1])
+        roc_auc[i] = auc(fpr[i], tpr[i])
+
+    # Compute micro-average ROC curve and ROC area
+    fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(),
+                                              y_preds[:, 1].ravel())
+    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
+
+    plt.plot(fpr[2], tpr[2], color='darkorange',
+             lw=2, label='ROC curve (area = %0.2f)' % roc_auc[2])
+    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
+    plt.xlim([0.0, 1.0])
+    plt.ylim([0.0, 1.05])
+    plt.xlabel('False Positive Rate')
+    plt.ylabel('True Positive Rate')
+    plt.title('Receiver operating characteristic example')
+    plt.show()
+
+    return roc_auc_score(y_test, np.round(y_preds[:, 1]))
+
+
+instaniated_models = [naive_bayes, bag_mod, rf_mod]
+
+for i in instaniated_models:
+    build_roc_auc(i, training_data, testing_data, y_train, y_test)
+
+print(build_roc_auc(instaniated_models[0], training_data, testing_data,
+                    y_train, y_test))
--- a/python/Supervised
+++ b/python/Supervised
@@ -0,0 +1,96 @@
+0.24539,0.81725,0
+0.21774,0.76462,0
+0.20161,0.69737,0
+0.20161,0.58041,0
+0.2477,0.49561,0
+0.32834,0.44883,0
+0.39516,0.48099,0
+0.39286,0.57164,0
+0.33525,0.62135,0
+0.33986,0.71199,0
+0.34447,0.81433,0
+0.28226,0.82602,0
+0.26613,0.75,0
+0.26613,0.63596,0
+0.32604,0.54825,0
+0.28917,0.65643,0
+0.80069,0.71491,0
+0.80069,0.64181,0
+0.80069,0.50146,0
+0.79839,0.36988,0
+0.73157,0.25,0
+0.63249,0.18275,0
+0.60023,0.27047,0
+0.66014,0.34649,0
+0.70161,0.42251,0
+0.70853,0.53947,0
+0.71544,0.63304,0
+0.74309,0.72076,0
+0.75,0.63596,0
+0.75,0.46345,0
+0.72235,0.35526,0
+0.66935,0.28509,0
+0.20622,0.94298,1
+0.26613,0.8962,1
+0.38134,0.8962,1
+0.42051,0.94591,1
+0.49885,0.86404,1
+0.31452,0.93421,1
+0.53111,0.72076,1
+0.45276,0.74415,1
+0.53571,0.6038,1
+0.60484,0.71491,1
+0.60945,0.58333,1
+0.51267,0.47807,1
+0.50806,0.59211,1
+0.46198,0.30556,1
+0.5288,0.41082,1
+0.38594,0.35819,1
+0.31682,0.31433,1
+0.29608,0.20906,1
+0.36982,0.27632,1
+0.42972,0.18275,1
+0.51498,0.10965,1
+0.53111,0.20906,1
+0.59793,0.095029,1
+0.73848,0.086257,1
+0.83065,0.18275,1
+0.8629,0.10965,1
+0.88364,0.27924,1
+0.93433,0.30848,1
+0.93433,0.19444,1
+0.92512,0.43421,1
+0.87903,0.43421,1
+0.87903,0.58626,1
+0.9182,0.71491,1
+0.85138,0.8348,1
+0.85599,0.94006,1
+0.70853,0.94298,1
+0.70853,0.87281,1
+0.59793,0.93129,1
+0.61175,0.83187,1
+0.78226,0.82895,1
+0.78917,0.8962,1
+0.90668,0.89912,1
+0.14862,0.92251,1
+0.15092,0.85819,1
+0.097926,0.85819,1
+0.079493,0.91374,1
+0.079493,0.77632,1
+0.10945,0.79678,1
+0.12327,0.67982,1
+0.077189,0.6886,1
+0.081797,0.58626,1
+0.14862,0.58041,1
+0.14862,0.5307,1
+0.14171,0.41959,1
+0.08871,0.49269,1
+0.095622,0.36696,1
+0.24539,0.3962,1
+0.1947,0.29678,1
+0.16935,0.22368,1
+0.15553,0.13596,1
+0.23848,0.12427,1
+0.33065,0.12427,1
+0.095622,0.2617,1
+0.091014,0.20322,1
--- a/python/Supervised
+++ b/python/Supervised
@@ -0,0 +1,35 @@
+# Import statements
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.metrics import accuracy_score
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+
+# Import the train test split
+# http://scikit-learn.org/0.16/modules/generated/sklearn.cross_validation.train_test_split.html
+
+
+# Read in the data.
+data = np.asarray(pd.read_csv('data.csv', header=None))
+# Assign the features to the variable X, and the labels to the variable y.
+X = data[:, 0:2]
+y = data[:, 2]
+
+# Use train test split to split your data
+# Use a test size of 25% and a random state of 42
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
+                                                    random_state=42)
+
+# Instantiate your decision tree model
+model = DecisionTreeClassifier()
+
+# TODO: Fit the model to the training data.
+model.fit(X_train, y_train)
+
+# TODO: Make predictions on the test data
+y_pred = model.predict(X_test)
+
+# TODO: Calculate the accuracy and assign it to the variable acc on the test
+# data.
+acc = accuracy_score(y_test, y_pred)
+print(acc)
--- a/python/Supervised
+++ b/python/Supervised
@@ -0,0 +1,98 @@
+def test_one(mod_arg):
+    '''
+    INPUT:
+    mod_arg - a set of the strings pertaining to the objects that were passed in the fitting of our models
+
+    OUTPUT:
+    prints correctness of the set
+    nothing returned
+    '''
+    a = 'X_train'
+    b = 'X_test'
+    c = 'y_train'
+    d = 'y_test'
+    e = 'training_data'
+    f = 'testing_data'
+    if mod_arg == {c, e}:
+        print("That's right!  You need to fit on both parts of the data pertaining to training data!")
+    else:
+        print("Oops!  That doesn't look quite right!  Remember you only want to fit your model to the training data!  Notice that X_train hasn't had the data cleaned yet, so that won't work to pass to our fit method. Hint - there are two items you should be passing to your fit method.")
+
+
+def test_two(mod_arg):
+    '''
+    INPUT:
+    model_arg - a set of the strings pertaining to the objects that were passed in the predicting step
+
+    OUTPUT:
+    prints correctness of the set
+    nothing returned
+    '''
+    a = 'X_train'
+    b = 'X_test'
+    c = 'y_train'
+    d = 'y_test'
+    e = 'training_data'
+    f = 'testing_data'
+    if mod_arg == {f}:
+        print("That's right! To see how well our models perform in a new setting, you will want to predict on the test set of data.")
+    else:
+        print("Oops!  That doesn't look quite right!  Remember you will want to predict on test data to know how well your model will do in a new situation.  Hint - there is only one item that should be passed to the predict method of your model.  Also notice that X_test has not been cleaned yet, so this cannot be passed to the predict method!")
+
+
+def sol_seven(seven_sol):
+    '''
+    INPUT: dictionary with correct matching of metrics
+    OUTPUT: nothing returned - prints statement related to correctness of dictionary
+    '''
+
+    a = "recall"
+    b = "precision"
+    c = "accuracy"
+    d = 'f1-score'
+
+    seven_sol_1 = {
+        'We have imbalanced classes, which metric do we definitely not want to use?': c,
+        'We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives': a, 'When we identify something as positive, we want to be sure it is truly positive': b,
+        'We care equally about identifying positive and negative cases': d
+    }
+
+    if seven_sol == seven_sol_1:
+        print("That's right!  It isn't really necessary to memorize these in practice, but it is important to know they exist and know why might use one metric over another for a particular situation.")
+
+    if seven_sol['We have imbalanced classes, which metric do we definitely not want to use?'] != seven_sol_1['We have imbalanced classes, which metric do we definitely not want to use?']:
+        print("Oops!  The first one isn't right.  If we do not have balanced classes, we probably want to stay away from using accuracy.")
+
+    if seven_sol['We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives'] != seven_sol_1['We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives']:
+        print("Oops!  The second one isn't right.  If we really want to be sure about catching positive cases, we should be closely watching recall, which has all of the positive clases in the denominator - so we are monitoring how many of them we get right with recall.")
+
+    if seven_sol['When we identify something as positive, we want to be sure it is truly positive'] != seven_sol_1['When we identify something as positive, we want to be sure it is truly positive']:
+        print("Oops!  The third one isn't right.  Using precision, we have the predicted positives in the denominator.  Therefore, this will help us be sure the items we identify as positive are actually positive.")
+
+    if seven_sol['We care equally about identifying positive and negative cases'] != seven_sol_1['We care equally about identifying positive and negative cases']:
+        print("Oops!  The last one isn't right.  If we care equally about precision and recall, we should use f1 score.")
+
+
+def sol_eight(eight_sol):
+    '''
+    INPUT: dictionary with correct matching of metrics
+    OUTPUT: nothing returned - prints statement related to correctness of dictionary
+    '''
+    a = "naive-bayes"
+    b = "bagging"
+    c = "random-forest"
+    d = 'ada-boost'
+    e = "svm"
+
+    eight_sol_1 = {
+        'We have imbalanced classes, which metric do we definitely not want to use?': a,
+        'We really want to make sure the positive cases are all caught even if that means we identify some negatives as positives': a,
+        'When we identify something as positive, we want to be sure it is truly positive': c,
+        'We care equally about identifying positive and negative cases': a
+    }
+
+    if eight_sol_1 == eight_sol:
+        print("That's right!  Naive Bayes was the best model for all of our metrics except precision!")
+
+    else:
+        print("Oops!  That doesn't look right.  Make sure you are performing your predictions and matching on the test data.  Hint: The naive bayes model actually performs best on all of the metrics except one.  Try again!")