diff --git a/python/Supervised Learning/Project/.ipynb_checkpoints/finding_donors-checkpoint.ipynb b/python/Supervised Learning/Project/.ipynb_checkpoints/finding_donors-checkpoint.ipynb index a340ac0..4909e69 100644 --- a/python/Supervised Learning/Project/.ipynb_checkpoints/finding_donors-checkpoint.ipynb +++ b/python/Supervised Learning/Project/.ipynb_checkpoints/finding_donors-checkpoint.ipynb @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 39, "metadata": {}, "outputs": [ { @@ -155,9 +155,17 @@ "** HINT: ** You may need to look at the table above to understand how the `'income'` entries are formatted. " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check for missing data\n", + "To check for missing data we need to see if any rows are missing:" + ] + }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 40, "metadata": {}, "outputs": [ { @@ -173,18 +181,22 @@ "50% 37.000000 10.000000 0.000000 0.000000 40.000000\n", "75% 47.000000 13.000000 0.000000 0.000000 45.000000\n", "max 90.000000 16.000000 99999.000000 4356.000000 99.000000\n", - "45222\n" + "Total number of rows for income = 45222\n", + "\n", + "No missing data\n" ] } ], "source": [ "print(data.describe())\n", - "print(data['income'].shape[0])" + "print('Total number of rows for income = {}\\n'.format(data['income'].shape[0]))\n", + "if (data.isnull().values.any()) == False:\n", + " print('No missing data')" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -205,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 42, "metadata": {}, "outputs": [ { @@ -281,7 +293,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -291,16 +303,16 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 8, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" }, @@ -330,7 +342,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -339,7 +351,7 @@ "(0, 1500)" ] }, - "execution_count": 9, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" }, @@ -364,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -373,7 +385,7 @@ "(0, 1000)" ] }, - "execution_count": 10, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" }, @@ -398,7 +410,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 47, "metadata": {}, "outputs": [ { @@ -434,7 +446,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 48, "metadata": {}, "outputs": [ { @@ -462,7 +474,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 49, "metadata": {}, "outputs": [ { @@ -471,7 +483,7 @@ "(0, 1500)" ] }, - "execution_count": 13, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" }, @@ -496,7 +508,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 50, "metadata": {}, "outputs": [ { @@ -505,7 +517,7 @@ "(0, 1500)" ] }, - "execution_count": 14, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" }, @@ -540,7 +552,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 59, "metadata": {}, "outputs": [ { @@ -735,7 +747,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 60, "metadata": {}, "outputs": [ { @@ -771,55 +783,55 @@ " \n", " \n", " \n", - " 28686\n", + " 14475\n", " Private\n", " Bachelors\n", " Married-civ-spouse\n", - " Craft-repair\n", + " Sales\n", " Husband\n", " White\n", " Male\n", " United-States\n", " \n", " \n", - " 12512\n", - " Private\n", - " HS-grad\n", - " Divorced\n", - " Machine-op-inspct\n", - " Other-relative\n", - " White\n", - " Male\n", - " United-States\n", - " \n", - " \n", - " 16816\n", - " Private\n", - " Assoc-acdm\n", - " Separated\n", - " Craft-repair\n", + " 14051\n", + " State-gov\n", + " Bachelors\n", + " Never-married\n", + " Prof-specialty\n", " Not-in-family\n", " White\n", " Male\n", " United-States\n", " \n", " \n", - " 5473\n", + " 40954\n", " Private\n", - " HS-grad\n", - " Married-civ-spouse\n", - " Craft-repair\n", - " Wife\n", + " Some-college\n", + " Never-married\n", + " Adm-clerical\n", + " Own-child\n", + " Black\n", + " Male\n", + " United-States\n", + " \n", + " \n", + " 29769\n", + " State-gov\n", + " Masters\n", + " Never-married\n", + " Exec-managerial\n", + " Not-in-family\n", " White\n", " Female\n", " United-States\n", " \n", " \n", - " 41388\n", - " Local-gov\n", - " Some-college\n", + " 22474\n", + " Private\n", + " Bachelors\n", " Married-civ-spouse\n", - " Protective-serv\n", + " Exec-managerial\n", " Husband\n", " White\n", " Male\n", @@ -830,22 +842,22 @@ "" ], "text/plain": [ - " workclass education_level marital-status occupation \\\n", - "28686 Private Bachelors Married-civ-spouse Craft-repair \n", - "12512 Private HS-grad Divorced Machine-op-inspct \n", - "16816 Private Assoc-acdm Separated Craft-repair \n", - "5473 Private HS-grad Married-civ-spouse Craft-repair \n", - "41388 Local-gov Some-college Married-civ-spouse Protective-serv \n", + " workclass education_level marital-status occupation \\\n", + "14475 Private Bachelors Married-civ-spouse Sales \n", + "14051 State-gov Bachelors Never-married Prof-specialty \n", + "40954 Private Some-college Never-married Adm-clerical \n", + "29769 State-gov Masters Never-married Exec-managerial \n", + "22474 Private Bachelors Married-civ-spouse Exec-managerial \n", "\n", - " relationship race sex native-country \n", - "28686 Husband White Male United-States \n", - "12512 Other-relative White Male United-States \n", - "16816 Not-in-family White Male United-States \n", - "5473 Wife White Female United-States \n", - "41388 Husband White Male United-States " + " relationship race sex native-country \n", + "14475 Husband White Male United-States \n", + "14051 Not-in-family White Male United-States \n", + "40954 Own-child Black Male United-States \n", + "29769 Not-in-family White Female United-States \n", + "22474 Husband White Male United-States " ] }, - "execution_count": 16, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } @@ -857,7 +869,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 65, "metadata": { "scrolled": true }, @@ -867,12 +879,16 @@ "output_type": "stream", "text": [ "98 total features after one-hot encoding.\n", + "\n", + "Encoded feature names are:\n", "['workclass_ Federal-gov', 'workclass_ Local-gov', 'workclass_ Private', 'workclass_ Self-emp-inc', 'workclass_ Self-emp-not-inc', 'workclass_ State-gov', 'workclass_ Without-pay', 'education_level_ 10th', 'education_level_ 11th', 'education_level_ 12th', 'education_level_ 1st-4th', 'education_level_ 5th-6th', 'education_level_ 7th-8th', 'education_level_ 9th', 'education_level_ Assoc-acdm', 'education_level_ Assoc-voc', 'education_level_ Bachelors', 'education_level_ Doctorate', 'education_level_ HS-grad', 'education_level_ Masters', 'education_level_ Preschool', 'education_level_ Prof-school', 'education_level_ Some-college', 'marital-status_ Divorced', 'marital-status_ Married-AF-spouse', 'marital-status_ Married-civ-spouse', 'marital-status_ Married-spouse-absent', 'marital-status_ Never-married', 'marital-status_ Separated', 'marital-status_ Widowed', 'occupation_ Adm-clerical', 'occupation_ Armed-Forces', 'occupation_ Craft-repair', 'occupation_ Exec-managerial', 'occupation_ Farming-fishing', 'occupation_ Handlers-cleaners', 'occupation_ Machine-op-inspct', 'occupation_ Other-service', 'occupation_ Priv-house-serv', 'occupation_ Prof-specialty', 'occupation_ Protective-serv', 'occupation_ Sales', 'occupation_ Tech-support', 'occupation_ Transport-moving', 'relationship_ Husband', 'relationship_ Not-in-family', 'relationship_ Other-relative', 'relationship_ Own-child', 'relationship_ Unmarried', 'relationship_ Wife', 'race_ Amer-Indian-Eskimo', 'race_ Asian-Pac-Islander', 'race_ Black', 'race_ Other', 'race_ White', 'sex_ Female', 'sex_ Male', 'native-country_ Cambodia', 'native-country_ Canada', 'native-country_ China', 'native-country_ Columbia', 'native-country_ Cuba', 'native-country_ Dominican-Republic', 'native-country_ Ecuador', 'native-country_ El-Salvador', 'native-country_ England', 'native-country_ France', 'native-country_ Germany', 'native-country_ Greece', 'native-country_ Guatemala', 'native-country_ Haiti', 'native-country_ Holand-Netherlands', 'native-country_ Honduras', 'native-country_ Hong', 'native-country_ Hungary', 'native-country_ India', 'native-country_ Iran', 'native-country_ Ireland', 'native-country_ Italy', 'native-country_ Jamaica', 'native-country_ Japan', 'native-country_ Laos', 'native-country_ Mexico', 'native-country_ Nicaragua', 'native-country_ Outlying-US(Guam-USVI-etc)', 'native-country_ Peru', 'native-country_ Philippines', 'native-country_ Poland', 'native-country_ Portugal', 'native-country_ Puerto-Rico', 'native-country_ Scotland', 'native-country_ South', 'native-country_ Taiwan', 'native-country_ Thailand', 'native-country_ Trinadad&Tobago', 'native-country_ United-States', 'native-country_ Vietnam', 'native-country_ Yugoslavia']\n", - "42657 0\n", - "31802 0\n", - "7760 0\n", - "21086 0\n", - "19464 0\n", + "\n", + "The income col now looks like:\n", + "40240 1\n", + "19808 1\n", + "26118 0\n", + "590 0\n", + "42229 1\n", "Name: income, dtype: object\n" ] } @@ -887,11 +903,11 @@ "\n", "# Print the number of features after one-hot encoding\n", "encoded = list(features_final.columns)\n", - "print(\"{} total features after one-hot encoding.\".format(len(encoded)))\n", + "print(\"{} total features after one-hot encoding.\\n\".format(len(encoded)))\n", "\n", "# Uncomment the following line to see the encoded feature names\n", - "print(encoded)\n", - "print(income_raw.sample(frac=1).head(5))" + "print(f'Encoded feature names are:\\n{encoded}\\n')\n", + "print(f'The income col now looks like:\\n{income_raw.sample(frac=1).head(5)}')" ] }, { @@ -916,7 +932,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 66, "metadata": {}, "outputs": [ { @@ -1000,7 +1016,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 67, "metadata": {}, "outputs": [], "source": [ @@ -1012,7 +1028,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 68, "metadata": {}, "outputs": [ { @@ -1076,6 +1092,162 @@ "Structure your answer in the same format as above^, with 4 parts for each of the three models you pick. Please include references with your answer." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Naive Bayes (Gaussian)\n", + "\n", + "## Applications\n", + "Naive Bayes has many applications in the real-world. It is very popular in the Medical industry - which itself is a huge area and has many applications where it can be applied.\n", + "\n", + "From my own research in my MSc, I used the results of [1]. Here Naive Bayes was used to categorically identify phage virion proteins from features such as amino acid composition and nucleo-capsid thickness. My own research was motivated in trying to understand the numerical mathematical dynamics of these features. The classifier had excellent results in this case - and this raised further questions about the dynamics of the features of viral proteins over time which were (and still) largely unknown. **This was an excellent use case of machine learning** to identify viral proteins from seemingly healthy ones - but mathematically was interesting as current numerical methods were slow, and the dyanmics of the equations were unknown. The classifier can be used to save a lot of time practically but mathematically raised more questions about what was happening to the current numerical models used. \n", + "The model also has extensive usage in text classification (such as the Spam example used in this nano-degree), and can also be used for predicting and recommendations if someone or something will do something (assuming the features are largely independent).\n", + "\n", + "## Advantages\n", + "The model performs very well in problems where the features are largely conditionally independent from each other. This means that we are assuming that features do not depend on each other (E.g if we considered height and weight we would exepct them to correlate, taller people would weigh more than shorter people, but weight could still be conditionally independent as there are other factors to consider in how heavy someone is).\n", + "Because of this this means that the model will converge much quicker than other methods (such as linear regression) - in practice this means we can use less training data. We can use the model for both discrete and continuous data. [2]\n", + "\n", + "## Disadvatanges\n", + "The model also has disadvantages. When the featurees are dependent the conditional independence does not hold: in such a case computing\n", + "\n", + "$P(X|C_i)$\n", + "\n", + "can be computationally expensive if we cannot leverage the conditional probability condition\n", + "\n", + "$P(X|C_i) = P(x_1 | C_i) * P(x_2 | C_i) * ... * P(x_N | C_i)$\n", + "\n", + "Interestingly, it has been shown that Naive Bayes (Gaussian) can be used and will even perform well even when this independence assumption does not hold. [3] accredits this to the underlying zero-loss function used in the algorithm - which calculates the error as the number of incorrect predictions. The zero-loss function does not penalise inaccurate probability estimates as long as the highest probability is assigned to the correct class [4]. In addition - special care must be taken if using this model **for non indpendent features** in the continuous case - as the model minimises the expected loss by maximising the posterior probability $P(C_i|X)$. Care must be taken when considering the zero-loss function, as integrating a discrete indicator over a probability density function (for a continuous case) would always be zero. Functions such as the Dirac delta function can be applied in this case [5]. There is much more research into why this model performs well when this condition does not hold and can be seen in [6].\n", + "Although the model is a very good classifier - it does fall short on estimating. This means we can use the model to evaluate future people given their features - but the probability that someone belongs to either class is not a good indicator. This means that future modelling will require all our data to classify someone, using the probabilities that someone belongs in either class is not a practical use for this model.\n", + "\n", + "\n", + "\n", + "## Reasoning\n", + "Based on the above I believe this model is a good candidate.\n", + "* We have features that appear at face value to not closely depend on each other - by this I mean no features stand out as largely dependending on another. Age is largeley independent to Ethnicity. Although there could be some dependence on the Capital Gains or Loss with respect to the other features - further investigation would only be warrented in my opinion if the output of the model warrants it.\n", + "* We want to know categorically if someone earns above or below \\$50k" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Decision Trees\n", + "\n", + "## Applications\n", + "\n", + "Decision trees have many applications in industry:\n", + "* One example is in customer relations and reccomendations. Decision trees can be used to analyse how customers interact online and then analyse their usage to provide recomendations based on this information. [10] applied decision trees to investigate customers shopping habits online - by classing people into two categories, 1) Those who rarely used it and 2) Those who shopped online frequently. The result of the decision tree showed that the time it takes customers to complete a transaction and how urgently the item needed were the most important factors in whether a customer shopped online or not. \n", + "\n", + "## Advantages\n", + "\n", + "Decision trees have many advantages when being considered as a classifier\n", + "\n", + "* They perform well with missing data points. Other methods require the removal of data if any features are missing - which is inefficient and if not careful can affect the validity of your data. Decision trees work around this by classifying missing values as a seperate category - where other categories can be used to analyise the missing categories. Or you can build a decision tree to categorically predict the missing values based on the data as a preliminary step before using the model to obtain your results. [7]\n", + "* They have excellent use in determining whether or not features are relatively important to one another. By using a decision tree you can find how important a feature is by removing it from the tree and validating the result against the feature included. This is a popular method employed to finding importance of features - I myself have used such methods when considering the importance of states in Markov Chains. By removing a state one by one - you can find the relative importance of a state relative to the other states. This can tell you lots of information about how important each state is. [8]\n", + "* Unlike a Naive Bayes classifier, Decision Trees can be used to predict values. The resulting probabilites from the model can be used to predict whether or not someone will belong to a class without the need to run the model with the new data included each time. [7]\n", + "* They are excellent when using categorical data. If you have a category that has many values under it a decision tree is a very good model in deciding how these categories can be split or grouped together. They can break the category down into a more manageable group. We can see in the cell below that the Occuption column has 14 values present. A decision tree can handle this (and much higher counts) with ease. [7]\n", + "\n", + "## Disadvantages\n", + "* Although decision trees are excellent predictors - if the data changes or evolves over time (say the number of people belonging to a specific job goes up) then the model needs to be redrawn to account for this. This can be accounted for by using ensemble methods in tandem with a decision tree [9]\n", + "* The hyperparamters for the model are very important - popular hyperparamters to consider are max_depth which can be used to control over-fitting, min_samples_split and min_samples_leaf which consider how many samples are needed to split or be defined as a leaf and min_weight_fraction_leaf when considering weighted samples. The overabundance and consideration of the hyperparameters means there is a need to understand your data before considering values for these. Methods we have seen such as grid search can aid in the process when we have many to consider.\n", + "* Decision trees are sensitive if a category is dominated by a particular value. Careful consideration should be taken if this is the case and balancing the data can help. Methods used to balance could include resampling (adding copies of under respresented values) or under sampling (removal of some of the dominant class values. We should also pay close attention to the Precision, Recall and F1 Score when evaluating the model - as this can aid in selection. \n", + "\n", + "## Reasoning\n", + "Based on the above I believe Decision Trees are a good candidate for our problem.\n", + "* We have a single classification problem which lends itself well to decision trees. \n", + "* We have categories which contain many classes (such as occupation) - we know decision trees can be used to great affect with this kind of data.\n", + "* Easy to visulise and explain - justifying the model can be easily done with scikitlearn. We can plot the tree which will show us the categories and their values used to split at each node.\n", + "* Categorical data - although we have accounted for categories with pd.get_dummies(), the model lends itself well to categorical data." + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of unique values in occupation column is 14\n" + ] + } + ], + "source": [ + "import collections\n", + "occupation_cat_count = collections.Counter(data['occupation'].unique())\n", + "occupation_cat_count = sum(occupation_cat_count.values())\n", + "print(f'Number of unique values in occupation column is {occupation_cat_count}')\n", + "# sum(collections.Counter(encoded).values())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SVMs\n", + "\n", + "## Applications\n", + "\n", + "An application of a support vector machine in industry is:\n", + "* SVMs have a wide application in image recognition and classification problems. When applied to image recognition each feature of the data set corresponds to a single image pixel. An SVM can be used to classify these pixels into which category they belong to based on their features (such as edge, colour or shape). In [11] SVMs were used on microscopic images of cells to detect the location of the cell nucleus. Manual location of the cell nuclei is a long and arduous process especially when considering there may be many of thousands of images to consider. A SVM can be used to learn what part of on image corresponds to a nucleus, and what part does not and was used to great effect in this case - managing to locate the neclei across multiple scales and stains successfully. Their usage is not limited to the medical sector and have many practical uses such as in geography in identifying areas of land suitable for cultivation and farming [12]\n", + "\n", + "## Advantages\n", + "\n", + "SVMs have many advantages and are a powerful classification method\n", + "* They integerate well with kernel methods - this means that SVMs are very versatile. You can employ different Kernels when considering how you will map your points to a higher dimensional space and even write your own kernels if your data requires it.\n", + "* When compared to linear regression methods SVMs are more robust due to the maximising of the margin. With the hyperparamter C you can control how much an incorrectly classified point is penalised.\n", + "* They are excellent at non linear boundaries due to the kernels they employ. Although it is possible to employ kernels in other methods such as regression SVMs in scikitlearn have the kernels already implemented and it is much easier to use the \"kernel trick\" (the kernel trick using the fact that you can generalise kernels in higher dimensions by using the dot product in the original space and using a generalisation of the corresponding Kernel. [14] has an excellent write up on how this used for the Linear kernel) \n", + "\n", + "## Disadvantages\n", + "Although SVMs are powerful in classification problems there are disadvantages we must consider when using them\n", + "* Choosing a kernel function is not an easy task and can often be the main barrier to the success of the model. Although scikitlearn offers several kernals such as the linear kernel and the popular Radial Basis Function kernel or RBF. The RBF is often used and uses the squared euclidean distance between two points. The feature space of the kernel has an infinite number of dimensions [13] and this means the kernel can be used to project points any higher dimension - although this comes at a huge computational cost. \n", + "* The model has a long training time on large data sets and this is due to several reasons\n", + " * The C parameter is a hyperparameter used to penalise misclassified points - the higher this is, the more accurate the results but the slower the training process is.\n", + " * The general method is of $O(n^3)$ (where $O$ is big Oh notation) - meaning it has to run a number of operations proportional to $n^3$. With 10,000 data points this means the number of operations is proportional to $10^{12}$ - a huge number which can affect our computational time greatly.\n", + " \n", + "## Reasoning\n", + "Based on the above I believe SVMs can be used for our model\n", + "* We have a classification problem which is well suited to SVMs.\n", + "* They are very good when we have a large number of features and after transforming our category values into feature values we know we have 98 features for our category data.\n", + "* With careful selection of our C hyperparamter and a suitable kernel we can obtain good boundaries for our dataset." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# References\n", + "[1] https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3671239/\n", + "\n", + "[2] https://www.slideshare.net/ashrafmath/naive-bayes-15644818\n", + "\n", + "[3] https://www.cs.waikato.ac.nz/~eibe/pubs/nbr.pdf\n", + "\n", + "[4] https://link.springer.com/article/10.1023/A:1009778005914\n", + "\n", + "[5] https://en.wikipedia.org/wiki/Dirac_delta_function\n", + "\n", + "[6] https://www.cs.unb.ca/~hzhang/publications/FLAIRS04ZhangH.pdf\n", + "\n", + "[7] https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4466856/\n", + "\n", + "[8] https://www.analyticsvidhya.com/blog/2018/01/channel-attribution-modeling-using-markov-chains-in-r/\n", + "\n", + "[9] https://scikit-learn.org/stable/modules/tree.html\n", + "\n", + "[10] https://www.sciencedirect.com/science/article/pii/S0957417406001825\n", + "\n", + "[11] https://link.springer.com/article/10.1007/s00138-010-0275-y\n", + "\n", + "[12] https://www.ncbi.nlm.nih.gov/pubmed/20052093\n", + "\n", + "[13] https://en.wikipedia.org/wiki/Radial_basis_function_kernel\n", + "\n", + "[14] https://www.quora.com/What-is-the-kernel-trick|" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1101,10 +1273,8 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, + "execution_count": 57, + "metadata": {}, "outputs": [], "source": [ "# TODO: Import two metrics from sklearn - fbeta_score and accuracy_score\n", @@ -1124,11 +1294,11 @@ " \n", " # TODO: Fit the learner to the training data using slicing with 'sample_size' using .fit(training_features[:], training_labels[:])\n", " start = time() # Get start time\n", - " learner = None\n", + " learner = learner.fit(X_train, y_train)\n", " end = time() # Get end time\n", " \n", " # TODO: Calculate the training time\n", - " results['train_time'] = None\n", + " results['train_time'] = end - start\n", " \n", " # TODO: Get the predictions on the test set(X_test),\n", " # then get predictions on the first 300 training samples(X_train) using .predict()\n", @@ -1177,11 +1347,52 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n" + ] + }, + { + "ename": "TypeError", + "evalue": "unsupported operand type(s) for +: 'int' and 'NoneType'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;31m# Run metrics visualization for the three supervised learning models chosen\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m \u001b[0mvs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccuracy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfscore\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/courses/Udacity Machine Learning - Introduction Nanodegree Program/python/Supervised Learning/Project/visuals.py\u001b[0m in \u001b[0;36mevaluate\u001b[0;34m(results, accuracy, f1)\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;31m# Creative plot code\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 75\u001b[0;31m \u001b[0max\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m//\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mj\u001b[0m\u001b[0;34m%\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mbar_width\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mlearner\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmetric\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwidth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbar_width\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolor\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcolors\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 76\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m//\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mj\u001b[0m\u001b[0;34m%\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_xticks\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0.45\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1.45\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2.45\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m//\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mj\u001b[0m\u001b[0;34m%\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_xticklabels\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"1%\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"10%\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"100%\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/udacity-ML-3.7.3/lib/python3.7/site-packages/matplotlib/__init__.py\u001b[0m in \u001b[0;36minner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1599\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minner\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1600\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1601\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msanitize_sequence\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1602\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1603\u001b[0m \u001b[0mbound\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_sig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/udacity-ML-3.7.3/lib/python3.7/site-packages/matplotlib/axes/_axes.py\u001b[0m in \u001b[0;36mbar\u001b[0;34m(self, x, height, width, bottom, align, **kwargs)\u001b[0m\n\u001b[1;32m 2428\u001b[0m \u001b[0medgecolor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2429\u001b[0m \u001b[0mlinewidth\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlw\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2430\u001b[0;31m \u001b[0mlabel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'_nolegend_'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2431\u001b[0m )\n\u001b[1;32m 2432\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/udacity-ML-3.7.3/lib/python3.7/site-packages/matplotlib/patches.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, xy, width, height, angle, **kwargs)\u001b[0m\n\u001b[1;32m 714\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 715\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_x1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_x0\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_width\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 716\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_y1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_y0\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_height\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 717\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 718\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mangle\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mangle\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for +: 'int' and 'NoneType'" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAApMAAAGfCAYAAAADPFkbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAfDElEQVR4nO3dYYhld3k/8O9jtqnURi1mBclGE/lvqltbMB1Si1BTtGWTQvLCIgmE1hJctEYKSiHFYiW+slILQlq7pRIVNEZflAUjgdpIQFzNhGg0CZE12majNKumvhGNoc//xdy047gzc/c39+xM7n4+MHDPub+d83Bnv9zv3HvunOruAADAiOfs9gAAADx7KZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwLBty2RVfaSqnqiqb2xyf1XVh6rqRFU9UFWXL35MWF4yBtOSMZjWPK9M3pbk8Bb3X5Xk4OzrSJJ/3PlYcE65LTIGU7otMgaT2bZMdvc9SX64xZJrk3ys1xxP8sKqesmiBoRlJ2MwLRmDae1bwPe4KMlj67ZPzvZ9b+PCqjqStd/68rznPe+3X/GKVyzg8LA33Hfffd/v7v0TfGsZ45w3Yb6SOTMmXyyznWRsEWVybt19NMnRJFlZWenV1dWzeXiYVFX9x27PIGMsK/mCae0kY4v4NPfjSS5et31gtg9YDBmDackY7MAiyuSxJH8y+zTca5L8qLt/4e03YJiMwbRkDHZg27e5q+qTSa5McmFVnUzyN0l+KUm6+8NJ7kxydZITSX6c5M+mGhaWkYzBtGQMprVtmezu67e5v5O8fWETwTlGxmBaMgbTcgUcAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAybq0xW1eGqeqSqTlTVzae5/6VVdXdV3V9VD1TV1YsfFZaXjMF05AumtW2ZrKrzktya5Kokh5JcX1WHNiz76yR3dPerk1yX5B8WPSgsKxmD6cgXTG+eVyavSHKiux/t7qeS3J7k2g1rOsnzZ7dfkOS7ixsRlp6MwXTkCyY2T5m8KMlj67ZPzvat994kN1TVySR3JnnH6b5RVR2pqtWqWj116tTAuLCUZAymI18wsUV9AOf6JLd194EkVyf5eFX9wvfu7qPdvdLdK/v371/QoeGcIGMwHfmCHZinTD6e5OJ12wdm+9a7MckdSdLdX0ry3CQXLmJAOAfIGExHvmBi85TJe5McrKpLq+r8rJ2cfGzDmv9M8vokqapXZi2I3gOA+cgYTEe+YGLblsnufjrJTUnuSvJw1j7x9mBV3VJV18yWvSvJW6rqa0k+meTN3d1TDQ3LRMZgOvIF09s3z6LuvjNrJyWv3/eedbcfSvLaxY4G5w4Zg+nIF0zLFXAAABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABg2V5msqsNV9UhVnaiqmzdZ86aqeqiqHqyqTyx2TFhe8gXTkjGY1r7tFlTVeUluTfIHSU4mubeqjnX3Q+vWHEzyV0le291PVtWLpxoYlol8wbRkDKY3zyuTVyQ50d2PdvdTSW5Pcu2GNW9Jcmt3P5kk3f3EYseEpSVfMC0Zg4nNUyYvSvLYuu2Ts33rXZbksqr6YlUdr6rDp/tGVXWkqlaravXUqVNjE8NyWVi+EhmD0/AcBhNb1Adw9iU5mOTKJNcn+eeqeuHGRd19tLtXuntl//79Czo0LL258pXIGAzyHAY7ME+ZfDzJxeu2D8z2rXcyybHu/ll3fzvJN7MWTGBr8gXTkjGY2Dxl8t4kB6vq0qo6P8l1SY5tWPOvWfuNLlV1YdbeMnh0gXPCspIvmJaMwcS2LZPd/XSSm5LcleThJHd094NVdUtVXTNbdleSH1TVQ0nuTvKX3f2DqYaGZSFfMC0Zg+lVd+/KgVdWVnp1dXVXjg1TqKr7untlt+d4hoyxTOQLprWTjLkCDgAAw5RJAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGzVUmq+pwVT1SVSeq6uYt1r2xqrqqVhY3Iiw/GYPpyBdMa9syWVXnJbk1yVVJDiW5vqoOnWbdBUn+IsmXFz0kLDMZg+nIF0xvnlcmr0hyorsf7e6nktye5NrTrHtfkvcn+ckC54NzgYzBdOQLJjZPmbwoyWPrtk/O9v2vqro8ycXd/dmtvlFVHamq1apaPXXq1BkPC0tKxmA68gUT2/EHcKrqOUk+mORd263t7qPdvdLdK/v379/poeGcIGMwHfmCnZunTD6e5OJ12wdm+55xQZJXJflCVX0nyWuSHHMCM8xNxmA68gUTm6dM3pvkYFVdWlXnJ7kuybFn7uzuH3X3hd19SXdfkuR4kmu6e3WSiWH5yBhMR75gYtuWye5+OslNSe5K8nCSO7r7waq6paqumXpAWHYyBtORL5jevnkWdfedSe7csO89m6y9cudjwblFxmA68gXTcgUcAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAybq0xW1eGqeqSqTlTVzae5/51V9VBVPVBVn6+qly1+VFhO8gXTkjGY1rZlsqrOS3JrkquSHEpyfVUd2rDs/iQr3f1bST6T5G8XPSgsI/mCackYTG+eVyavSHKiux/t7qeS3J7k2vULuvvu7v7xbPN4kgOLHROWlnzBtGQMJjZPmbwoyWPrtk/O9m3mxiSfO90dVXWkqlaravXUqVPzTwnLa2H5SmQMTsNzGExsoR/Aqaobkqwk+cDp7u/uo9290t0r+/fvX+ShYeltl69ExmAnPIfBmH1zrHk8ycXrtg/M9v2cqnpDkncneV13/3Qx48HSky+YlozBxOZ5ZfLeJAer6tKqOj/JdUmOrV9QVa9O8k9JrunuJxY/Jiwt+YJpyRhMbNsy2d1PJ7kpyV1JHk5yR3c/WFW3VNU1s2UfSPKrST5dVV+tqmObfDtgHfmCackYTG+et7nT3XcmuXPDvvesu/2GBc8F5wz5gmnJGEzLFXAAABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABg2V5msqsNV9UhVnaiqm09z/y9X1adm93+5qi5Z9KCwzGQMpiNfMK1ty2RVnZfk1iRXJTmU5PqqOrRh2Y1Jnuzu/5fk75O8f9GDwrKSMZiOfMH05nll8ookJ7r70e5+KsntSa7dsObaJB+d3f5MktdXVS1uTFhqMgbTkS+Y2L451lyU5LF12yeT/M5ma7r76ar6UZIXJfn++kVVdSTJkdnmT6vqGyNDT+TCbJh3l5lna3ttniT59cF/J2O7wzxb22vzyNfW9trPK9l7M5lna6MZm6tMLkx3H01yNEmqarW7V87m8bdinq2ZZ3tVtbrbM8jY/Myztb04z27PIF9nZq/NZJ6t7SRj87zN/XiSi9dtH5jtO+2aqtqX5AVJfjA6FJxjZAymI18wsXnK5L1JDlbVpVV1fpLrkhzbsOZYkj+d3f7jJP/e3b24MWGpyRhMR75gYtu+zT07f+SmJHclOS/JR7r7waq6Jclqdx9L8i9JPl5VJ5L8MGth3c7RHcw9BfNszTzbG5pJxnaNeba2FPPI167aazOZZ2vD85RfvgAAGOUKOAAADFMmAQAYNnmZ3GuXsZpjnndW1UNV9UBVfb6qXrab86xb98aq6qqa9M8IzDNPVb1p9hg9WFWf2M15quqlVXV3Vd0/+5ldPfE8H6mqJzb7+3K15kOzeR+oqssnnmdP5WvOmWRMxjabZU/la3bMPZUx+dr5PGczX/PMtBQZ6+7JvrJ2svO3krw8yflJvpbk0IY1f57kw7Pb1yX51C7P8/tJfmV2+227Pc9s3QVJ7klyPMnKLj8+B5Pcn+TXZtsv3uV5jiZ52+z2oSTfmWqe2TF+L8nlSb6xyf1XJ/lckkrymiRf3uXH56zl6wxmkjEZ22yePZOvM3h8PIfJ105netZnbOpXJvfaZay2nae77+7uH882j2ftb5JNZZ7HJ0nel7Vrxf5kwlnmnectSW7t7ieTpLuf2OV5OsnzZ7dfkOS7E86T7r4na5/23My1ST7Wa44neWFVvWSicfZavuaaScZkbDN7LF/J3suYfO18nrOZr3lnetZnbOoyebrLWF202ZrufjrJM5ex2q151rsxaw19KtvOM3uJ+eLu/uyEc8w9T5LLklxWVV+squNVdXiX53lvkhuq6mSSO5O8Y8J55nGm/8emPtbZzNe8M60nYzJ2Js5mvuY9nuewLeY5x/M170zvzbM8Y2f1corPJlV1Q5KVJK/bxRmek+SDSd68WzOcxr6svU1wZdZ+472nqn6zu/97l+a5Pslt3f13VfW7Wftbca/q7v/ZpXmYk4xtSsbYMfna1F7LV7IEGZv6lcm9dhmreeZJVb0hybuTXNPdP51olnnmuSDJq5J8oaq+k7XzF45NeALzPI/PySTHuvtn3f3tJN/MWjB3a54bk9yRJN39pSTPTXLhRPPMY67/Y2fxWGf7MnEytrN5EhnbytnM17zH8xy2+Tzner7mnenZn7GpTvKcnci5L8mjSS7N/514+hsb1rw9P3/y8h27PM+rs3ay7MEpH5t559mw/guZ9uTleR6fw0k+Ort9YdZeDn/RLs7zuSRvnt1+ZdbONamJf26XZPOTl/8oP3/y8ld2+ed11vJ1BjPJmIxtNdOeyNcZPD6ew+RrpzM96zM26X+02WBXZ635fyvJu2f7bsnab0zJWgP/dJITSb6S5OW7PM+/JfmvJF+dfR3bzXk2rJ00iHM+PpW1ty0eSvL1JNft8jyHknxxFtCvJvnDief5ZJLvJflZ1n7DvTHJW5O8dd3jc+ts3q/vgZ/XWc3XnDPJmIxtNsueytecj4/nMPna6UzP+oy5nCIAAMNcAQcAgGHKJAAAw5RJAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYNi2ZbKqPlJVT1TVNza5v6rqQ1V1oqoeqKrLFz8mLC8Zg2nJGExrnlcmb0tyeIv7r0pycPZ1JMk/7nwsOKfcFhmDKd0WGYPJbFsmu/ueJD/cYsm1ST7Wa44neWFVvWRRA8KykzGYlozBtPYt4HtclOSxddsnZ/u+t3FhVR3J2m99ed7znvfbr3jFKxZweNgb7rvvvu939/4JvrWMcc6bMF/JnBmTL5bZTjK2iDI5t+4+muRokqysrPTq6urZPDxMqqr+Y7dnkDGWlXzBtHaSsUV8mvvxJBev2z4w2wcshozBtGQMdmARZfJYkj+ZfRruNUl+1N2/8PYbMEzGYFoyBjuw7dvcVfXJJFcmubCqTib5myS/lCTd/eEkdya5OsmJJD9O8mdTDQvLSMZgWjIG09q2THb39dvc30nevrCJ4BwjYzAtGYNpuQIOAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADD5iqTVXW4qh6pqhNVdfNp7n9pVd1dVfdX1QNVdfXiR4XlJWMwHfmCaW1bJqvqvCS3JrkqyaEk11fVoQ3L/jrJHd396iTXJfmHRQ8Ky0rGYDryBdOb55XJK5Kc6O5Hu/upJLcnuXbDmk7y/NntFyT57uJGhKUnYzAd+YKJzVMmL0ry2Lrtk7N96703yQ1VdTLJnUnecbpvVFVHqmq1qlZPnTo1MC4sJRmD6cgXTGxRH8C5Pslt3X0gydVJPl5Vv/C9u/tod69098r+/fsXdGg4J8gYTEe+YAfmKZOPJ7l43faB2b71bkxyR5J095eSPDfJhYsYEM4BMgbTkS+Y2Dxl8t4kB6vq0qo6P2snJx/bsOY/k7w+SarqlVkLovcAYD4yBtORL5jYtmWyu59OclOSu5I8nLVPvD1YVbdU1TWzZe9K8paq+lqSTyZ5c3f3VEPDMpExmI58wfT2zbOou+/M2knJ6/e9Z93th5K8drGjwblDxmA68gXTcgUcAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAybq0xW1eGqeqSqTlTVzZuseVNVPVRVD1bVJxY7Jiwv+YJpyRhMa992C6rqvCS3JvmDJCeT3FtVx7r7oXVrDib5qySv7e4nq+rFUw0My0S+YFoyBtOb55XJK5Kc6O5Hu/upJLcnuXbDmrckubW7n0yS7n5isWPC0pIvmJaMwcTmKZMXJXls3fbJ2b71LktyWVV9saqOV9Xh032jqjpSVatVtXrq1KmxiWG5LCxfiYzBaXgOg4kt6gM4+5IcTHJlkuuT/HNVvXDjou4+2t0r3b2yf//+BR0alt5c+UpkDAZ5DoMdmKdMPp7k4nXbB2b71juZ5Fh3/6y7v53km1kLJrA1+YJpyRhMbJ4yeW+Sg1V1aVWdn+S6JMc2rPnXrP1Gl6q6MGtvGTy6wDlhWckXTEvGYGLblsnufjrJTUnuSvJwkju6+8GquqWqrpktuyvJD6rqoSR3J/nL7v7BVEPDspAvmJaMwfSqu3flwCsrK726urorx4YpVNV93b2y23M8Q8ZYJvIF09pJxlwBBwCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADD5iqTVXW4qh6pqhNVdfMW695YVV1VK4sbEZafjMF05AumtW2ZrKrzktya5Kokh5JcX1WHTrPugiR/keTLix4SlpmMwXTkC6Y3zyuTVyQ50d2PdvdTSW5Pcu1p1r0vyfuT/GSB88G5QMZgOvIFE5unTF6U5LF12ydn+/5XVV2e5OLu/uxW36iqjlTValWtnjp16oyHhSUlYzAd+YKJ7fgDOFX1nCQfTPKu7dZ299HuXunulf379+/00HBOkDGYjnzBzs1TJh9PcvG67QOzfc+4IMmrknyhqr6T5DVJjjmBGeYmYzAd+YKJzVMm701ysKourarzk1yX5Ngzd3b3j7r7wu6+pLsvSXI8yTXdvTrJxLB8ZAymI18wsW3LZHc/neSmJHcleTjJHd39YFXdUlXXTD0gLDsZg+nIF0xv3zyLuvvOJHdu2PeeTdZeufOx4NwiYzAd+YJpuQIOAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADD5iqTVXW4qh6pqhNVdfNp7n9nVT1UVQ9U1eer6mWLHxWWk3zBtGQMprVtmayq85LcmuSqJIeSXF9VhzYsuz/JSnf/VpLPJPnbRQ8Ky0i+YFoyBtOb55XJK5Kc6O5Hu/upJLcnuXb9gu6+u7t/PNs8nuTAYseEpSVfMC0Zg4nNUyYvSvLYuu2Ts32buTHJ5053R1UdqarVqlo9derU/FPC8lpYvhIZg9PwHAYTW+gHcKrqhiQrST5wuvu7+2h3r3T3yv79+xd5aFh62+UrkTHYCc9hMGbfHGseT3Lxuu0Ds30/p6rekOTdSV7X3T9dzHiw9OQLpiVjMLF5Xpm8N8nBqrq0qs5Pcl2SY+sXVNWrk/xTkmu6+4nFjwlLS75gWjIGE9u2THb300luSnJXkoeT3NHdD1bVLVV1zWzZB5L8apJPV9VXq+rYJt8OWEe+YFoyBtOb523udPedSe7csO89626/YcFzwTlDvmBaMgbTcgUcAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAybq0xW1eGqeqSqTlTVzae5/5er6lOz+79cVZcselBYZjIG05EvmNa2ZbKqzktya5KrkhxKcn1VHdqw7MYkT3b3/0vy90nev+hBYVnJGExHvmB687wyeUWSE939aHc/leT2JNduWHNtko/Obn8myeurqhY3Jiw1GYPpyBdMbN8cay5K8ti67ZNJfmezNd39dFX9KMmLknx//aKqOpLkyGzzp1X1jZGhJ3JhNsy7y8yztb02T5L8+uC/k7HdYZ6t7bV55Gtre+3nley9mcyztdGMzVUmF6a7jyY5miRVtdrdK2fz+Fsxz9bMs72qWt3tGWRsfubZ2l6cZ7dnkK8zs9dmMs/WdpKxed7mfjzJxeu2D8z2nXZNVe1L8oIkPxgdCs4xMgbTkS+Y2Dxl8t4kB6vq0qo6P8l1SY5tWHMsyZ/Obv9xkn/v7l7cmLDUZAymI18wsW3f5p6dP3JTkruSnJfkI939YFXdkmS1u48l+ZckH6+qE0l+mLWwbufoDuaegnm2Zp7tDc0kY7vGPFtbinnka1fttZnMs7XhecovXwAAjHIFHAAAhimTAAAMm7xM7rXLWM0xzzur6qGqeqCqPl9VL9vNedate2NVdVVN+mcE5pmnqt40e4werKpP7OY8VfXSqrq7qu6f/cyunniej1TVE5v9fbla86HZvA9U1eUTz7On8jXnTDImY5vNsqfyNTvmnsqYfO18nrOZr3lmWoqMdfdkX1k72flbSV6e5PwkX0tyaMOaP0/y4dnt65J8apfn+f0kvzK7/bbdnme27oIk9yQ5nmRllx+fg0nuT/Jrs+0X7/I8R5O8bXb7UJLvTDXP7Bi/l+TyJN/Y5P6rk3wuSSV5TZIv7/Ljc9bydQYzyZiMbTbPnsnXGTw+nsPka6czPeszNvUrk3vtMlbbztPdd3f3j2ebx7P2N8mmMs/jkyTvy9q1Yn8y4SzzzvOWJLd295NJ0t1P7PI8neT5s9svSPLdCedJd9+TtU97bubaJB/rNceTvLCqXjLROHstX3PNJGMytpk9lq9k72VMvnY+z9nM17wzPeszNnWZPN1lrC7abE13P53kmctY7dY8692YtYY+lW3nmb3EfHF3f3bCOeaeJ8llSS6rqi9W1fGqOrzL87w3yQ1VdTLJnUneMeE88zjT/2NTH+ts5mvemdaTMRk7E2czX/Mez3PYFvOc4/mad6b35lmesbN6OcVnk6q6IclKktft4gzPSfLBJG/erRlOY1/W3ia4Mmu/8d5TVb/Z3f+9S/Ncn+S27v67qvrdrP2tuFd19//s0jzMScY2JWPsmHxtaq/lK1mCjE39yuReu4zVPPOkqt6Q5N1Jrunun040yzzzXJDkVUm+UFXfydr5C8cmPIF5nsfnZJJj3f2z7v52km9mLZi7Nc+NSe5Iku7+UpLnJrlwonnmMdf/sbN4rLN9mTgZ29k8iYxt5Wzma97jeQ7bfJ5zPV/zzvTsz9hUJ3nOTuTcl+TRJJfm/048/Y0Na96enz95+Y5dnufVWTtZ9uCUj82882xY/4VMe/LyPI/P4SQfnd2+MGsvh79oF+f5XJI3z26/MmvnmtTEP7dLsvnJy3+Unz95+Su7/PM6a/k6g5lkTMa2mmlP5OsMHh/PYfK105me9Rmb9D/abLCrs9b8v5Xk3bN9t2TtN6ZkrYF/OsmJJF9J8vJdnuffkvxXkq/Ovo7t5jwb1k4axDkfn8ra2xYPJfl6kut2eZ5DSb44C+hXk/zhxPN8Msn3kvwsa7/h3pjkrUneuu7xuXU279f3wM/rrOZrzplkTMY2m2VP5WvOx8dzmHztdKZnfcZcThEAgGGugAMAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADDs/wMuoMnRa2ETDAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ "# TODO: Import the three supervised learning models from sklearn\n", "\n", @@ -1284,9 +1495,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# TODO: Import 'GridSearchCV', 'make_scorer', and any other necessary libraries\n", @@ -1398,9 +1607,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# TODO: Import a supervised learning model that has 'feature_importances_'\n", @@ -1446,9 +1653,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# Import functionality for cloning a model\n", diff --git a/python/Supervised Learning/Project/__pycache__/visuals.cpython-37.pyc b/python/Supervised Learning/Project/__pycache__/visuals.cpython-37.pyc index 9720f3b..402914c 100644 Binary files a/python/Supervised Learning/Project/__pycache__/visuals.cpython-37.pyc and b/python/Supervised Learning/Project/__pycache__/visuals.cpython-37.pyc differ diff --git a/python/Supervised Learning/Project/finding_donors.ipynb b/python/Supervised Learning/Project/finding_donors.ipynb index a340ac0..4909e69 100644 --- a/python/Supervised Learning/Project/finding_donors.ipynb +++ b/python/Supervised Learning/Project/finding_donors.ipynb @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 39, "metadata": {}, "outputs": [ { @@ -155,9 +155,17 @@ "** HINT: ** You may need to look at the table above to understand how the `'income'` entries are formatted. " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check for missing data\n", + "To check for missing data we need to see if any rows are missing:" + ] + }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 40, "metadata": {}, "outputs": [ { @@ -173,18 +181,22 @@ "50% 37.000000 10.000000 0.000000 0.000000 40.000000\n", "75% 47.000000 13.000000 0.000000 0.000000 45.000000\n", "max 90.000000 16.000000 99999.000000 4356.000000 99.000000\n", - "45222\n" + "Total number of rows for income = 45222\n", + "\n", + "No missing data\n" ] } ], "source": [ "print(data.describe())\n", - "print(data['income'].shape[0])" + "print('Total number of rows for income = {}\\n'.format(data['income'].shape[0]))\n", + "if (data.isnull().values.any()) == False:\n", + " print('No missing data')" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -205,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 42, "metadata": {}, "outputs": [ { @@ -281,7 +293,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -291,16 +303,16 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 8, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" }, @@ -330,7 +342,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -339,7 +351,7 @@ "(0, 1500)" ] }, - "execution_count": 9, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" }, @@ -364,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -373,7 +385,7 @@ "(0, 1000)" ] }, - "execution_count": 10, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" }, @@ -398,7 +410,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 47, "metadata": {}, "outputs": [ { @@ -434,7 +446,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 48, "metadata": {}, "outputs": [ { @@ -462,7 +474,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 49, "metadata": {}, "outputs": [ { @@ -471,7 +483,7 @@ "(0, 1500)" ] }, - "execution_count": 13, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" }, @@ -496,7 +508,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 50, "metadata": {}, "outputs": [ { @@ -505,7 +517,7 @@ "(0, 1500)" ] }, - "execution_count": 14, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" }, @@ -540,7 +552,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 59, "metadata": {}, "outputs": [ { @@ -735,7 +747,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 60, "metadata": {}, "outputs": [ { @@ -771,55 +783,55 @@ " \n", " \n", " \n", - " 28686\n", + " 14475\n", " Private\n", " Bachelors\n", " Married-civ-spouse\n", - " Craft-repair\n", + " Sales\n", " Husband\n", " White\n", " Male\n", " United-States\n", " \n", " \n", - " 12512\n", - " Private\n", - " HS-grad\n", - " Divorced\n", - " Machine-op-inspct\n", - " Other-relative\n", - " White\n", - " Male\n", - " United-States\n", - " \n", - " \n", - " 16816\n", - " Private\n", - " Assoc-acdm\n", - " Separated\n", - " Craft-repair\n", + " 14051\n", + " State-gov\n", + " Bachelors\n", + " Never-married\n", + " Prof-specialty\n", " Not-in-family\n", " White\n", " Male\n", " United-States\n", " \n", " \n", - " 5473\n", + " 40954\n", " Private\n", - " HS-grad\n", - " Married-civ-spouse\n", - " Craft-repair\n", - " Wife\n", + " Some-college\n", + " Never-married\n", + " Adm-clerical\n", + " Own-child\n", + " Black\n", + " Male\n", + " United-States\n", + " \n", + " \n", + " 29769\n", + " State-gov\n", + " Masters\n", + " Never-married\n", + " Exec-managerial\n", + " Not-in-family\n", " White\n", " Female\n", " United-States\n", " \n", " \n", - " 41388\n", - " Local-gov\n", - " Some-college\n", + " 22474\n", + " Private\n", + " Bachelors\n", " Married-civ-spouse\n", - " Protective-serv\n", + " Exec-managerial\n", " Husband\n", " White\n", " Male\n", @@ -830,22 +842,22 @@ "" ], "text/plain": [ - " workclass education_level marital-status occupation \\\n", - "28686 Private Bachelors Married-civ-spouse Craft-repair \n", - "12512 Private HS-grad Divorced Machine-op-inspct \n", - "16816 Private Assoc-acdm Separated Craft-repair \n", - "5473 Private HS-grad Married-civ-spouse Craft-repair \n", - "41388 Local-gov Some-college Married-civ-spouse Protective-serv \n", + " workclass education_level marital-status occupation \\\n", + "14475 Private Bachelors Married-civ-spouse Sales \n", + "14051 State-gov Bachelors Never-married Prof-specialty \n", + "40954 Private Some-college Never-married Adm-clerical \n", + "29769 State-gov Masters Never-married Exec-managerial \n", + "22474 Private Bachelors Married-civ-spouse Exec-managerial \n", "\n", - " relationship race sex native-country \n", - "28686 Husband White Male United-States \n", - "12512 Other-relative White Male United-States \n", - "16816 Not-in-family White Male United-States \n", - "5473 Wife White Female United-States \n", - "41388 Husband White Male United-States " + " relationship race sex native-country \n", + "14475 Husband White Male United-States \n", + "14051 Not-in-family White Male United-States \n", + "40954 Own-child Black Male United-States \n", + "29769 Not-in-family White Female United-States \n", + "22474 Husband White Male United-States " ] }, - "execution_count": 16, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } @@ -857,7 +869,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 65, "metadata": { "scrolled": true }, @@ -867,12 +879,16 @@ "output_type": "stream", "text": [ "98 total features after one-hot encoding.\n", + "\n", + "Encoded feature names are:\n", "['workclass_ Federal-gov', 'workclass_ Local-gov', 'workclass_ Private', 'workclass_ Self-emp-inc', 'workclass_ Self-emp-not-inc', 'workclass_ State-gov', 'workclass_ Without-pay', 'education_level_ 10th', 'education_level_ 11th', 'education_level_ 12th', 'education_level_ 1st-4th', 'education_level_ 5th-6th', 'education_level_ 7th-8th', 'education_level_ 9th', 'education_level_ Assoc-acdm', 'education_level_ Assoc-voc', 'education_level_ Bachelors', 'education_level_ Doctorate', 'education_level_ HS-grad', 'education_level_ Masters', 'education_level_ Preschool', 'education_level_ Prof-school', 'education_level_ Some-college', 'marital-status_ Divorced', 'marital-status_ Married-AF-spouse', 'marital-status_ Married-civ-spouse', 'marital-status_ Married-spouse-absent', 'marital-status_ Never-married', 'marital-status_ Separated', 'marital-status_ Widowed', 'occupation_ Adm-clerical', 'occupation_ Armed-Forces', 'occupation_ Craft-repair', 'occupation_ Exec-managerial', 'occupation_ Farming-fishing', 'occupation_ Handlers-cleaners', 'occupation_ Machine-op-inspct', 'occupation_ Other-service', 'occupation_ Priv-house-serv', 'occupation_ Prof-specialty', 'occupation_ Protective-serv', 'occupation_ Sales', 'occupation_ Tech-support', 'occupation_ Transport-moving', 'relationship_ Husband', 'relationship_ Not-in-family', 'relationship_ Other-relative', 'relationship_ Own-child', 'relationship_ Unmarried', 'relationship_ Wife', 'race_ Amer-Indian-Eskimo', 'race_ Asian-Pac-Islander', 'race_ Black', 'race_ Other', 'race_ White', 'sex_ Female', 'sex_ Male', 'native-country_ Cambodia', 'native-country_ Canada', 'native-country_ China', 'native-country_ Columbia', 'native-country_ Cuba', 'native-country_ Dominican-Republic', 'native-country_ Ecuador', 'native-country_ El-Salvador', 'native-country_ England', 'native-country_ France', 'native-country_ Germany', 'native-country_ Greece', 'native-country_ Guatemala', 'native-country_ Haiti', 'native-country_ Holand-Netherlands', 'native-country_ Honduras', 'native-country_ Hong', 'native-country_ Hungary', 'native-country_ India', 'native-country_ Iran', 'native-country_ Ireland', 'native-country_ Italy', 'native-country_ Jamaica', 'native-country_ Japan', 'native-country_ Laos', 'native-country_ Mexico', 'native-country_ Nicaragua', 'native-country_ Outlying-US(Guam-USVI-etc)', 'native-country_ Peru', 'native-country_ Philippines', 'native-country_ Poland', 'native-country_ Portugal', 'native-country_ Puerto-Rico', 'native-country_ Scotland', 'native-country_ South', 'native-country_ Taiwan', 'native-country_ Thailand', 'native-country_ Trinadad&Tobago', 'native-country_ United-States', 'native-country_ Vietnam', 'native-country_ Yugoslavia']\n", - "42657 0\n", - "31802 0\n", - "7760 0\n", - "21086 0\n", - "19464 0\n", + "\n", + "The income col now looks like:\n", + "40240 1\n", + "19808 1\n", + "26118 0\n", + "590 0\n", + "42229 1\n", "Name: income, dtype: object\n" ] } @@ -887,11 +903,11 @@ "\n", "# Print the number of features after one-hot encoding\n", "encoded = list(features_final.columns)\n", - "print(\"{} total features after one-hot encoding.\".format(len(encoded)))\n", + "print(\"{} total features after one-hot encoding.\\n\".format(len(encoded)))\n", "\n", "# Uncomment the following line to see the encoded feature names\n", - "print(encoded)\n", - "print(income_raw.sample(frac=1).head(5))" + "print(f'Encoded feature names are:\\n{encoded}\\n')\n", + "print(f'The income col now looks like:\\n{income_raw.sample(frac=1).head(5)}')" ] }, { @@ -916,7 +932,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 66, "metadata": {}, "outputs": [ { @@ -1000,7 +1016,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 67, "metadata": {}, "outputs": [], "source": [ @@ -1012,7 +1028,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 68, "metadata": {}, "outputs": [ { @@ -1076,6 +1092,162 @@ "Structure your answer in the same format as above^, with 4 parts for each of the three models you pick. Please include references with your answer." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Naive Bayes (Gaussian)\n", + "\n", + "## Applications\n", + "Naive Bayes has many applications in the real-world. It is very popular in the Medical industry - which itself is a huge area and has many applications where it can be applied.\n", + "\n", + "From my own research in my MSc, I used the results of [1]. Here Naive Bayes was used to categorically identify phage virion proteins from features such as amino acid composition and nucleo-capsid thickness. My own research was motivated in trying to understand the numerical mathematical dynamics of these features. The classifier had excellent results in this case - and this raised further questions about the dynamics of the features of viral proteins over time which were (and still) largely unknown. **This was an excellent use case of machine learning** to identify viral proteins from seemingly healthy ones - but mathematically was interesting as current numerical methods were slow, and the dyanmics of the equations were unknown. The classifier can be used to save a lot of time practically but mathematically raised more questions about what was happening to the current numerical models used. \n", + "The model also has extensive usage in text classification (such as the Spam example used in this nano-degree), and can also be used for predicting and recommendations if someone or something will do something (assuming the features are largely independent).\n", + "\n", + "## Advantages\n", + "The model performs very well in problems where the features are largely conditionally independent from each other. This means that we are assuming that features do not depend on each other (E.g if we considered height and weight we would exepct them to correlate, taller people would weigh more than shorter people, but weight could still be conditionally independent as there are other factors to consider in how heavy someone is).\n", + "Because of this this means that the model will converge much quicker than other methods (such as linear regression) - in practice this means we can use less training data. We can use the model for both discrete and continuous data. [2]\n", + "\n", + "## Disadvatanges\n", + "The model also has disadvantages. When the featurees are dependent the conditional independence does not hold: in such a case computing\n", + "\n", + "$P(X|C_i)$\n", + "\n", + "can be computationally expensive if we cannot leverage the conditional probability condition\n", + "\n", + "$P(X|C_i) = P(x_1 | C_i) * P(x_2 | C_i) * ... * P(x_N | C_i)$\n", + "\n", + "Interestingly, it has been shown that Naive Bayes (Gaussian) can be used and will even perform well even when this independence assumption does not hold. [3] accredits this to the underlying zero-loss function used in the algorithm - which calculates the error as the number of incorrect predictions. The zero-loss function does not penalise inaccurate probability estimates as long as the highest probability is assigned to the correct class [4]. In addition - special care must be taken if using this model **for non indpendent features** in the continuous case - as the model minimises the expected loss by maximising the posterior probability $P(C_i|X)$. Care must be taken when considering the zero-loss function, as integrating a discrete indicator over a probability density function (for a continuous case) would always be zero. Functions such as the Dirac delta function can be applied in this case [5]. There is much more research into why this model performs well when this condition does not hold and can be seen in [6].\n", + "Although the model is a very good classifier - it does fall short on estimating. This means we can use the model to evaluate future people given their features - but the probability that someone belongs to either class is not a good indicator. This means that future modelling will require all our data to classify someone, using the probabilities that someone belongs in either class is not a practical use for this model.\n", + "\n", + "\n", + "\n", + "## Reasoning\n", + "Based on the above I believe this model is a good candidate.\n", + "* We have features that appear at face value to not closely depend on each other - by this I mean no features stand out as largely dependending on another. Age is largeley independent to Ethnicity. Although there could be some dependence on the Capital Gains or Loss with respect to the other features - further investigation would only be warrented in my opinion if the output of the model warrants it.\n", + "* We want to know categorically if someone earns above or below \\$50k" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Decision Trees\n", + "\n", + "## Applications\n", + "\n", + "Decision trees have many applications in industry:\n", + "* One example is in customer relations and reccomendations. Decision trees can be used to analyse how customers interact online and then analyse their usage to provide recomendations based on this information. [10] applied decision trees to investigate customers shopping habits online - by classing people into two categories, 1) Those who rarely used it and 2) Those who shopped online frequently. The result of the decision tree showed that the time it takes customers to complete a transaction and how urgently the item needed were the most important factors in whether a customer shopped online or not. \n", + "\n", + "## Advantages\n", + "\n", + "Decision trees have many advantages when being considered as a classifier\n", + "\n", + "* They perform well with missing data points. Other methods require the removal of data if any features are missing - which is inefficient and if not careful can affect the validity of your data. Decision trees work around this by classifying missing values as a seperate category - where other categories can be used to analyise the missing categories. Or you can build a decision tree to categorically predict the missing values based on the data as a preliminary step before using the model to obtain your results. [7]\n", + "* They have excellent use in determining whether or not features are relatively important to one another. By using a decision tree you can find how important a feature is by removing it from the tree and validating the result against the feature included. This is a popular method employed to finding importance of features - I myself have used such methods when considering the importance of states in Markov Chains. By removing a state one by one - you can find the relative importance of a state relative to the other states. This can tell you lots of information about how important each state is. [8]\n", + "* Unlike a Naive Bayes classifier, Decision Trees can be used to predict values. The resulting probabilites from the model can be used to predict whether or not someone will belong to a class without the need to run the model with the new data included each time. [7]\n", + "* They are excellent when using categorical data. If you have a category that has many values under it a decision tree is a very good model in deciding how these categories can be split or grouped together. They can break the category down into a more manageable group. We can see in the cell below that the Occuption column has 14 values present. A decision tree can handle this (and much higher counts) with ease. [7]\n", + "\n", + "## Disadvantages\n", + "* Although decision trees are excellent predictors - if the data changes or evolves over time (say the number of people belonging to a specific job goes up) then the model needs to be redrawn to account for this. This can be accounted for by using ensemble methods in tandem with a decision tree [9]\n", + "* The hyperparamters for the model are very important - popular hyperparamters to consider are max_depth which can be used to control over-fitting, min_samples_split and min_samples_leaf which consider how many samples are needed to split or be defined as a leaf and min_weight_fraction_leaf when considering weighted samples. The overabundance and consideration of the hyperparameters means there is a need to understand your data before considering values for these. Methods we have seen such as grid search can aid in the process when we have many to consider.\n", + "* Decision trees are sensitive if a category is dominated by a particular value. Careful consideration should be taken if this is the case and balancing the data can help. Methods used to balance could include resampling (adding copies of under respresented values) or under sampling (removal of some of the dominant class values. We should also pay close attention to the Precision, Recall and F1 Score when evaluating the model - as this can aid in selection. \n", + "\n", + "## Reasoning\n", + "Based on the above I believe Decision Trees are a good candidate for our problem.\n", + "* We have a single classification problem which lends itself well to decision trees. \n", + "* We have categories which contain many classes (such as occupation) - we know decision trees can be used to great affect with this kind of data.\n", + "* Easy to visulise and explain - justifying the model can be easily done with scikitlearn. We can plot the tree which will show us the categories and their values used to split at each node.\n", + "* Categorical data - although we have accounted for categories with pd.get_dummies(), the model lends itself well to categorical data." + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of unique values in occupation column is 14\n" + ] + } + ], + "source": [ + "import collections\n", + "occupation_cat_count = collections.Counter(data['occupation'].unique())\n", + "occupation_cat_count = sum(occupation_cat_count.values())\n", + "print(f'Number of unique values in occupation column is {occupation_cat_count}')\n", + "# sum(collections.Counter(encoded).values())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SVMs\n", + "\n", + "## Applications\n", + "\n", + "An application of a support vector machine in industry is:\n", + "* SVMs have a wide application in image recognition and classification problems. When applied to image recognition each feature of the data set corresponds to a single image pixel. An SVM can be used to classify these pixels into which category they belong to based on their features (such as edge, colour or shape). In [11] SVMs were used on microscopic images of cells to detect the location of the cell nucleus. Manual location of the cell nuclei is a long and arduous process especially when considering there may be many of thousands of images to consider. A SVM can be used to learn what part of on image corresponds to a nucleus, and what part does not and was used to great effect in this case - managing to locate the neclei across multiple scales and stains successfully. Their usage is not limited to the medical sector and have many practical uses such as in geography in identifying areas of land suitable for cultivation and farming [12]\n", + "\n", + "## Advantages\n", + "\n", + "SVMs have many advantages and are a powerful classification method\n", + "* They integerate well with kernel methods - this means that SVMs are very versatile. You can employ different Kernels when considering how you will map your points to a higher dimensional space and even write your own kernels if your data requires it.\n", + "* When compared to linear regression methods SVMs are more robust due to the maximising of the margin. With the hyperparamter C you can control how much an incorrectly classified point is penalised.\n", + "* They are excellent at non linear boundaries due to the kernels they employ. Although it is possible to employ kernels in other methods such as regression SVMs in scikitlearn have the kernels already implemented and it is much easier to use the \"kernel trick\" (the kernel trick using the fact that you can generalise kernels in higher dimensions by using the dot product in the original space and using a generalisation of the corresponding Kernel. [14] has an excellent write up on how this used for the Linear kernel) \n", + "\n", + "## Disadvantages\n", + "Although SVMs are powerful in classification problems there are disadvantages we must consider when using them\n", + "* Choosing a kernel function is not an easy task and can often be the main barrier to the success of the model. Although scikitlearn offers several kernals such as the linear kernel and the popular Radial Basis Function kernel or RBF. The RBF is often used and uses the squared euclidean distance between two points. The feature space of the kernel has an infinite number of dimensions [13] and this means the kernel can be used to project points any higher dimension - although this comes at a huge computational cost. \n", + "* The model has a long training time on large data sets and this is due to several reasons\n", + " * The C parameter is a hyperparameter used to penalise misclassified points - the higher this is, the more accurate the results but the slower the training process is.\n", + " * The general method is of $O(n^3)$ (where $O$ is big Oh notation) - meaning it has to run a number of operations proportional to $n^3$. With 10,000 data points this means the number of operations is proportional to $10^{12}$ - a huge number which can affect our computational time greatly.\n", + " \n", + "## Reasoning\n", + "Based on the above I believe SVMs can be used for our model\n", + "* We have a classification problem which is well suited to SVMs.\n", + "* They are very good when we have a large number of features and after transforming our category values into feature values we know we have 98 features for our category data.\n", + "* With careful selection of our C hyperparamter and a suitable kernel we can obtain good boundaries for our dataset." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# References\n", + "[1] https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3671239/\n", + "\n", + "[2] https://www.slideshare.net/ashrafmath/naive-bayes-15644818\n", + "\n", + "[3] https://www.cs.waikato.ac.nz/~eibe/pubs/nbr.pdf\n", + "\n", + "[4] https://link.springer.com/article/10.1023/A:1009778005914\n", + "\n", + "[5] https://en.wikipedia.org/wiki/Dirac_delta_function\n", + "\n", + "[6] https://www.cs.unb.ca/~hzhang/publications/FLAIRS04ZhangH.pdf\n", + "\n", + "[7] https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4466856/\n", + "\n", + "[8] https://www.analyticsvidhya.com/blog/2018/01/channel-attribution-modeling-using-markov-chains-in-r/\n", + "\n", + "[9] https://scikit-learn.org/stable/modules/tree.html\n", + "\n", + "[10] https://www.sciencedirect.com/science/article/pii/S0957417406001825\n", + "\n", + "[11] https://link.springer.com/article/10.1007/s00138-010-0275-y\n", + "\n", + "[12] https://www.ncbi.nlm.nih.gov/pubmed/20052093\n", + "\n", + "[13] https://en.wikipedia.org/wiki/Radial_basis_function_kernel\n", + "\n", + "[14] https://www.quora.com/What-is-the-kernel-trick|" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1101,10 +1273,8 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, + "execution_count": 57, + "metadata": {}, "outputs": [], "source": [ "# TODO: Import two metrics from sklearn - fbeta_score and accuracy_score\n", @@ -1124,11 +1294,11 @@ " \n", " # TODO: Fit the learner to the training data using slicing with 'sample_size' using .fit(training_features[:], training_labels[:])\n", " start = time() # Get start time\n", - " learner = None\n", + " learner = learner.fit(X_train, y_train)\n", " end = time() # Get end time\n", " \n", " # TODO: Calculate the training time\n", - " results['train_time'] = None\n", + " results['train_time'] = end - start\n", " \n", " # TODO: Get the predictions on the test set(X_test),\n", " # then get predictions on the first 300 training samples(X_train) using .predict()\n", @@ -1177,11 +1347,52 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n", + "NoneType trained on None samples.\n" + ] + }, + { + "ename": "TypeError", + "evalue": "unsupported operand type(s) for +: 'int' and 'NoneType'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;31m# Run metrics visualization for the three supervised learning models chosen\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m \u001b[0mvs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maccuracy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfscore\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/courses/Udacity Machine Learning - Introduction Nanodegree Program/python/Supervised Learning/Project/visuals.py\u001b[0m in \u001b[0;36mevaluate\u001b[0;34m(results, accuracy, f1)\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;31m# Creative plot code\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 75\u001b[0;31m \u001b[0max\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m//\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mj\u001b[0m\u001b[0;34m%\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mbar_width\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mlearner\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmetric\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwidth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbar_width\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolor\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcolors\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 76\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m//\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mj\u001b[0m\u001b[0;34m%\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_xticks\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0.45\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1.45\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2.45\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m//\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mj\u001b[0m\u001b[0;34m%\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_xticklabels\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"1%\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"10%\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"100%\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/udacity-ML-3.7.3/lib/python3.7/site-packages/matplotlib/__init__.py\u001b[0m in \u001b[0;36minner\u001b[0;34m(ax, data, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1599\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minner\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1600\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1601\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msanitize_sequence\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1602\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1603\u001b[0m \u001b[0mbound\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_sig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/udacity-ML-3.7.3/lib/python3.7/site-packages/matplotlib/axes/_axes.py\u001b[0m in \u001b[0;36mbar\u001b[0;34m(self, x, height, width, bottom, align, **kwargs)\u001b[0m\n\u001b[1;32m 2428\u001b[0m \u001b[0medgecolor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2429\u001b[0m \u001b[0mlinewidth\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlw\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2430\u001b[0;31m \u001b[0mlabel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'_nolegend_'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2431\u001b[0m )\n\u001b[1;32m 2432\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.virtualenvs/udacity-ML-3.7.3/lib/python3.7/site-packages/matplotlib/patches.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, xy, width, height, angle, **kwargs)\u001b[0m\n\u001b[1;32m 714\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 715\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_x1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_x0\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_width\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 716\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_y1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_y0\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_height\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 717\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 718\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mangle\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mangle\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for +: 'int' and 'NoneType'" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAApMAAAGfCAYAAAADPFkbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAfDElEQVR4nO3dYYhld3k/8O9jtqnURi1mBclGE/lvqltbMB1Si1BTtGWTQvLCIgmE1hJctEYKSiHFYiW+slILQlq7pRIVNEZflAUjgdpIQFzNhGg0CZE12majNKumvhGNoc//xdy047gzc/c39+xM7n4+MHDPub+d83Bnv9zv3HvunOruAADAiOfs9gAAADx7KZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwLBty2RVfaSqnqiqb2xyf1XVh6rqRFU9UFWXL35MWF4yBtOSMZjWPK9M3pbk8Bb3X5Xk4OzrSJJ/3PlYcE65LTIGU7otMgaT2bZMdvc9SX64xZJrk3ys1xxP8sKqesmiBoRlJ2MwLRmDae1bwPe4KMlj67ZPzvZ9b+PCqjqStd/68rznPe+3X/GKVyzg8LA33Hfffd/v7v0TfGsZ45w3Yb6SOTMmXyyznWRsEWVybt19NMnRJFlZWenV1dWzeXiYVFX9x27PIGMsK/mCae0kY4v4NPfjSS5et31gtg9YDBmDackY7MAiyuSxJH8y+zTca5L8qLt/4e03YJiMwbRkDHZg27e5q+qTSa5McmFVnUzyN0l+KUm6+8NJ7kxydZITSX6c5M+mGhaWkYzBtGQMprVtmezu67e5v5O8fWETwTlGxmBaMgbTcgUcAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAybq0xW1eGqeqSqTlTVzae5/6VVdXdV3V9VD1TV1YsfFZaXjMF05AumtW2ZrKrzktya5Kokh5JcX1WHNiz76yR3dPerk1yX5B8WPSgsKxmD6cgXTG+eVyavSHKiux/t7qeS3J7k2g1rOsnzZ7dfkOS7ixsRlp6MwXTkCyY2T5m8KMlj67ZPzvat994kN1TVySR3JnnH6b5RVR2pqtWqWj116tTAuLCUZAymI18wsUV9AOf6JLd194EkVyf5eFX9wvfu7qPdvdLdK/v371/QoeGcIGMwHfmCHZinTD6e5OJ12wdm+9a7MckdSdLdX0ry3CQXLmJAOAfIGExHvmBi85TJe5McrKpLq+r8rJ2cfGzDmv9M8vokqapXZi2I3gOA+cgYTEe+YGLblsnufjrJTUnuSvJw1j7x9mBV3VJV18yWvSvJW6rqa0k+meTN3d1TDQ3LRMZgOvIF09s3z6LuvjNrJyWv3/eedbcfSvLaxY4G5w4Zg+nIF0zLFXAAABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABg2V5msqsNV9UhVnaiqmzdZ86aqeqiqHqyqTyx2TFhe8gXTkjGY1r7tFlTVeUluTfIHSU4mubeqjnX3Q+vWHEzyV0le291PVtWLpxoYlol8wbRkDKY3zyuTVyQ50d2PdvdTSW5Pcu2GNW9Jcmt3P5kk3f3EYseEpSVfMC0Zg4nNUyYvSvLYuu2Ts33rXZbksqr6YlUdr6rDp/tGVXWkqlaravXUqVNjE8NyWVi+EhmD0/AcBhNb1Adw9iU5mOTKJNcn+eeqeuHGRd19tLtXuntl//79Czo0LL258pXIGAzyHAY7ME+ZfDzJxeu2D8z2rXcyybHu/ll3fzvJN7MWTGBr8gXTkjGY2Dxl8t4kB6vq0qo6P8l1SY5tWPOvWfuNLlV1YdbeMnh0gXPCspIvmJaMwcS2LZPd/XSSm5LcleThJHd094NVdUtVXTNbdleSH1TVQ0nuTvKX3f2DqYaGZSFfMC0Zg+lVd+/KgVdWVnp1dXVXjg1TqKr7untlt+d4hoyxTOQLprWTjLkCDgAAw5RJAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGzVUmq+pwVT1SVSeq6uYt1r2xqrqqVhY3Iiw/GYPpyBdMa9syWVXnJbk1yVVJDiW5vqoOnWbdBUn+IsmXFz0kLDMZg+nIF0xvnlcmr0hyorsf7e6nktye5NrTrHtfkvcn+ckC54NzgYzBdOQLJjZPmbwoyWPrtk/O9v2vqro8ycXd/dmtvlFVHamq1apaPXXq1BkPC0tKxmA68gUT2/EHcKrqOUk+mORd263t7qPdvdLdK/v379/poeGcIGMwHfmCnZunTD6e5OJ12wdm+55xQZJXJflCVX0nyWuSHHMCM8xNxmA68gUTm6dM3pvkYFVdWlXnJ7kuybFn7uzuH3X3hd19SXdfkuR4kmu6e3WSiWH5yBhMR75gYtuWye5+OslNSe5K8nCSO7r7waq6paqumXpAWHYyBtORL5jevnkWdfedSe7csO89m6y9cudjwblFxmA68gXTcgUcAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAybq0xW1eGqeqSqTlTVzae5/51V9VBVPVBVn6+qly1+VFhO8gXTkjGY1rZlsqrOS3JrkquSHEpyfVUd2rDs/iQr3f1bST6T5G8XPSgsI/mCackYTG+eVyavSHKiux/t7qeS3J7k2vULuvvu7v7xbPN4kgOLHROWlnzBtGQMJjZPmbwoyWPrtk/O9m3mxiSfO90dVXWkqlaravXUqVPzTwnLa2H5SmQMTsNzGExsoR/Aqaobkqwk+cDp7u/uo9290t0r+/fvX+ShYeltl69ExmAnPIfBmH1zrHk8ycXrtg/M9v2cqnpDkncneV13/3Qx48HSky+YlozBxOZ5ZfLeJAer6tKqOj/JdUmOrV9QVa9O8k9JrunuJxY/Jiwt+YJpyRhMbNsy2d1PJ7kpyV1JHk5yR3c/WFW3VNU1s2UfSPKrST5dVV+tqmObfDtgHfmCackYTG+et7nT3XcmuXPDvvesu/2GBc8F5wz5gmnJGEzLFXAAABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABg2V5msqsNV9UhVnaiqm09z/y9X1adm93+5qi5Z9KCwzGQMpiNfMK1ty2RVnZfk1iRXJTmU5PqqOrRh2Y1Jnuzu/5fk75O8f9GDwrKSMZiOfMH05nll8ookJ7r70e5+KsntSa7dsObaJB+d3f5MktdXVS1uTFhqMgbTkS+Y2L451lyU5LF12yeT/M5ma7r76ar6UZIXJfn++kVVdSTJkdnmT6vqGyNDT+TCbJh3l5lna3ttniT59cF/J2O7wzxb22vzyNfW9trPK9l7M5lna6MZm6tMLkx3H01yNEmqarW7V87m8bdinq2ZZ3tVtbrbM8jY/Myztb04z27PIF9nZq/NZJ6t7SRj87zN/XiSi9dtH5jtO+2aqtqX5AVJfjA6FJxjZAymI18wsXnK5L1JDlbVpVV1fpLrkhzbsOZYkj+d3f7jJP/e3b24MWGpyRhMR75gYtu+zT07f+SmJHclOS/JR7r7waq6Jclqdx9L8i9JPl5VJ5L8MGth3c7RHcw9BfNszTzbG5pJxnaNeba2FPPI167aazOZZ2vD85RfvgAAGOUKOAAADFMmAQAYNnmZ3GuXsZpjnndW1UNV9UBVfb6qXrab86xb98aq6qqa9M8IzDNPVb1p9hg9WFWf2M15quqlVXV3Vd0/+5ldPfE8H6mqJzb7+3K15kOzeR+oqssnnmdP5WvOmWRMxjabZU/la3bMPZUx+dr5PGczX/PMtBQZ6+7JvrJ2svO3krw8yflJvpbk0IY1f57kw7Pb1yX51C7P8/tJfmV2+227Pc9s3QVJ7klyPMnKLj8+B5Pcn+TXZtsv3uV5jiZ52+z2oSTfmWqe2TF+L8nlSb6xyf1XJ/lckkrymiRf3uXH56zl6wxmkjEZ22yePZOvM3h8PIfJ105netZnbOpXJvfaZay2nae77+7uH882j2ftb5JNZZ7HJ0nel7Vrxf5kwlnmnectSW7t7ieTpLuf2OV5OsnzZ7dfkOS7E86T7r4na5/23My1ST7Wa44neWFVvWSicfZavuaaScZkbDN7LF/J3suYfO18nrOZr3lnetZnbOoyebrLWF202ZrufjrJM5ex2q151rsxaw19KtvOM3uJ+eLu/uyEc8w9T5LLklxWVV+squNVdXiX53lvkhuq6mSSO5O8Y8J55nGm/8emPtbZzNe8M60nYzJ2Js5mvuY9nuewLeY5x/M170zvzbM8Y2f1corPJlV1Q5KVJK/bxRmek+SDSd68WzOcxr6svU1wZdZ+472nqn6zu/97l+a5Pslt3f13VfW7Wftbca/q7v/ZpXmYk4xtSsbYMfna1F7LV7IEGZv6lcm9dhmreeZJVb0hybuTXNPdP51olnnmuSDJq5J8oaq+k7XzF45NeALzPI/PySTHuvtn3f3tJN/MWjB3a54bk9yRJN39pSTPTXLhRPPMY67/Y2fxWGf7MnEytrN5EhnbytnM17zH8xy2+Tzner7mnenZn7GpTvKcnci5L8mjSS7N/514+hsb1rw9P3/y8h27PM+rs3ay7MEpH5t559mw/guZ9uTleR6fw0k+Ort9YdZeDn/RLs7zuSRvnt1+ZdbONamJf26XZPOTl/8oP3/y8ld2+ed11vJ1BjPJmIxtNdOeyNcZPD6ew+RrpzM96zM26X+02WBXZ635fyvJu2f7bsnab0zJWgP/dJITSb6S5OW7PM+/JfmvJF+dfR3bzXk2rJ00iHM+PpW1ty0eSvL1JNft8jyHknxxFtCvJvnDief5ZJLvJflZ1n7DvTHJW5O8dd3jc+ts3q/vgZ/XWc3XnDPJmIxtNsueytecj4/nMPna6UzP+oy5nCIAAMNcAQcAgGHKJAAAw5RJAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYNi2ZbKqPlJVT1TVNza5v6rqQ1V1oqoeqKrLFz8mLC8Zg2nJGExrnlcmb0tyeIv7r0pycPZ1JMk/7nwsOKfcFhmDKd0WGYPJbFsmu/ueJD/cYsm1ST7Wa44neWFVvWRRA8KykzGYlozBtPYt4HtclOSxddsnZ/u+t3FhVR3J2m99ed7znvfbr3jFKxZweNgb7rvvvu939/4JvrWMcc6bMF/JnBmTL5bZTjK2iDI5t+4+muRokqysrPTq6urZPDxMqqr+Y7dnkDGWlXzBtHaSsUV8mvvxJBev2z4w2wcshozBtGQMdmARZfJYkj+ZfRruNUl+1N2/8PYbMEzGYFoyBjuw7dvcVfXJJFcmubCqTib5myS/lCTd/eEkdya5OsmJJD9O8mdTDQvLSMZgWjIG09q2THb39dvc30nevrCJ4BwjYzAtGYNpuQIOAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADD5iqTVXW4qh6pqhNVdfNp7n9pVd1dVfdX1QNVdfXiR4XlJWMwHfmCaW1bJqvqvCS3JrkqyaEk11fVoQ3L/jrJHd396iTXJfmHRQ8Ky0rGYDryBdOb55XJK5Kc6O5Hu/upJLcnuXbDmk7y/NntFyT57uJGhKUnYzAd+YKJzVMmL0ry2Lrtk7N96703yQ1VdTLJnUnecbpvVFVHqmq1qlZPnTo1MC4sJRmD6cgXTGxRH8C5Pslt3X0gydVJPl5Vv/C9u/tod69098r+/fsXdGg4J8gYTEe+YAfmKZOPJ7l43faB2b71bkxyR5J095eSPDfJhYsYEM4BMgbTkS+Y2Dxl8t4kB6vq0qo6P2snJx/bsOY/k7w+SarqlVkLovcAYD4yBtORL5jYtmWyu59OclOSu5I8nLVPvD1YVbdU1TWzZe9K8paq+lqSTyZ5c3f3VEPDMpExmI58wfT2zbOou+/M2knJ6/e9Z93th5K8drGjwblDxmA68gXTcgUcAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAybq0xW1eGqeqSqTlTVzZuseVNVPVRVD1bVJxY7Jiwv+YJpyRhMa992C6rqvCS3JvmDJCeT3FtVx7r7oXVrDib5qySv7e4nq+rFUw0My0S+YFoyBtOb55XJK5Kc6O5Hu/upJLcnuXbDmrckubW7n0yS7n5isWPC0pIvmJaMwcTmKZMXJXls3fbJ2b71LktyWVV9saqOV9Xh032jqjpSVatVtXrq1KmxiWG5LCxfiYzBaXgOg4kt6gM4+5IcTHJlkuuT/HNVvXDjou4+2t0r3b2yf//+BR0alt5c+UpkDAZ5DoMdmKdMPp7k4nXbB2b71juZ5Fh3/6y7v53km1kLJrA1+YJpyRhMbJ4yeW+Sg1V1aVWdn+S6JMc2rPnXrP1Gl6q6MGtvGTy6wDlhWckXTEvGYGLblsnufjrJTUnuSvJwkju6+8GquqWqrpktuyvJD6rqoSR3J/nL7v7BVEPDspAvmJaMwfSqu3flwCsrK726urorx4YpVNV93b2y23M8Q8ZYJvIF09pJxlwBBwCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADD5iqTVXW4qh6pqhNVdfMW695YVV1VK4sbEZafjMF05AumtW2ZrKrzktya5Kokh5JcX1WHTrPugiR/keTLix4SlpmMwXTkC6Y3zyuTVyQ50d2PdvdTSW5Pcu1p1r0vyfuT/GSB88G5QMZgOvIFE5unTF6U5LF12ydn+/5XVV2e5OLu/uxW36iqjlTValWtnjp16oyHhSUlYzAd+YKJ7fgDOFX1nCQfTPKu7dZ299HuXunulf379+/00HBOkDGYjnzBzs1TJh9PcvG67QOzfc+4IMmrknyhqr6T5DVJjjmBGeYmYzAd+YKJzVMm701ysKourarzk1yX5Ngzd3b3j7r7wu6+pLsvSXI8yTXdvTrJxLB8ZAymI18wsW3LZHc/neSmJHcleTjJHd39YFXdUlXXTD0gLDsZg+nIF0xv3zyLuvvOJHdu2PeeTdZeufOx4NwiYzAd+YJpuQIOAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADD5iqTVXW4qh6pqhNVdfNp7n9nVT1UVQ9U1eer6mWLHxWWk3zBtGQMprVtmayq85LcmuSqJIeSXF9VhzYsuz/JSnf/VpLPJPnbRQ8Ky0i+YFoyBtOb55XJK5Kc6O5Hu/upJLcnuXb9gu6+u7t/PNs8nuTAYseEpSVfMC0Zg4nNUyYvSvLYuu2Ts32buTHJ5053R1UdqarVqlo9derU/FPC8lpYvhIZg9PwHAYTW+gHcKrqhiQrST5wuvu7+2h3r3T3yv79+xd5aFh62+UrkTHYCc9hMGbfHGseT3Lxuu0Ds30/p6rekOTdSV7X3T9dzHiw9OQLpiVjMLF5Xpm8N8nBqrq0qs5Pcl2SY+sXVNWrk/xTkmu6+4nFjwlLS75gWjIGE9u2THb300luSnJXkoeT3NHdD1bVLVV1zWzZB5L8apJPV9VXq+rYJt8OWEe+YFoyBtOb523udPedSe7csO89626/YcFzwTlDvmBaMgbTcgUcAACGKZMAAAxTJgEAGKZMAgAwTJkEAGCYMgkAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADBMmQQAYJgyCQDAMGUSAIBhyiQAAMOUSQAAhimTAAAMUyYBABimTAIAMEyZBABgmDIJAMAwZRIAgGHKJAAAw5RJAACGKZMAAAybq0xW1eGqeqSqTlTVzae5/5er6lOz+79cVZcselBYZjIG05EvmNa2ZbKqzktya5KrkhxKcn1VHdqw7MYkT3b3/0vy90nev+hBYVnJGExHvmB687wyeUWSE939aHc/leT2JNduWHNtko/Obn8myeurqhY3Jiw1GYPpyBdMbN8cay5K8ti67ZNJfmezNd39dFX9KMmLknx//aKqOpLkyGzzp1X1jZGhJ3JhNsy7y8yztb02T5L8+uC/k7HdYZ6t7bV55Gtre+3nley9mcyztdGMzVUmF6a7jyY5miRVtdrdK2fz+Fsxz9bMs72qWt3tGWRsfubZ2l6cZ7dnkK8zs9dmMs/WdpKxed7mfjzJxeu2D8z2nXZNVe1L8oIkPxgdCs4xMgbTkS+Y2Dxl8t4kB6vq0qo6P8l1SY5tWHMsyZ/Obv9xkn/v7l7cmLDUZAymI18wsW3f5p6dP3JTkruSnJfkI939YFXdkmS1u48l+ZckH6+qE0l+mLWwbufoDuaegnm2Zp7tDc0kY7vGPFtbinnka1fttZnMs7XhecovXwAAjHIFHAAAhimTAAAMm7xM7rXLWM0xzzur6qGqeqCqPl9VL9vNedate2NVdVVN+mcE5pmnqt40e4werKpP7OY8VfXSqrq7qu6f/cyunniej1TVE5v9fbla86HZvA9U1eUTz7On8jXnTDImY5vNsqfyNTvmnsqYfO18nrOZr3lmWoqMdfdkX1k72flbSV6e5PwkX0tyaMOaP0/y4dnt65J8apfn+f0kvzK7/bbdnme27oIk9yQ5nmRllx+fg0nuT/Jrs+0X7/I8R5O8bXb7UJLvTDXP7Bi/l+TyJN/Y5P6rk3wuSSV5TZIv7/Ljc9bydQYzyZiMbTbPnsnXGTw+nsPka6czPeszNvUrk3vtMlbbztPdd3f3j2ebx7P2N8mmMs/jkyTvy9q1Yn8y4SzzzvOWJLd295NJ0t1P7PI8neT5s9svSPLdCedJd9+TtU97bubaJB/rNceTvLCqXjLROHstX3PNJGMytpk9lq9k72VMvnY+z9nM17wzPeszNnWZPN1lrC7abE13P53kmctY7dY8692YtYY+lW3nmb3EfHF3f3bCOeaeJ8llSS6rqi9W1fGqOrzL87w3yQ1VdTLJnUneMeE88zjT/2NTH+ts5mvemdaTMRk7E2czX/Mez3PYFvOc4/mad6b35lmesbN6OcVnk6q6IclKktft4gzPSfLBJG/erRlOY1/W3ia4Mmu/8d5TVb/Z3f+9S/Ncn+S27v67qvrdrP2tuFd19//s0jzMScY2JWPsmHxtaq/lK1mCjE39yuReu4zVPPOkqt6Q5N1Jrunun040yzzzXJDkVUm+UFXfydr5C8cmPIF5nsfnZJJj3f2z7v52km9mLZi7Nc+NSe5Iku7+UpLnJrlwonnmMdf/sbN4rLN9mTgZ29k8iYxt5Wzma97jeQ7bfJ5zPV/zzvTsz9hUJ3nOTuTcl+TRJJfm/048/Y0Na96enz95+Y5dnufVWTtZ9uCUj82882xY/4VMe/LyPI/P4SQfnd2+MGsvh79oF+f5XJI3z26/MmvnmtTEP7dLsvnJy3+Unz95+Su7/PM6a/k6g5lkTMa2mmlP5OsMHh/PYfK105me9Rmb9D/abLCrs9b8v5Xk3bN9t2TtN6ZkrYF/OsmJJF9J8vJdnuffkvxXkq/Ovo7t5jwb1k4axDkfn8ra2xYPJfl6kut2eZ5DSb44C+hXk/zhxPN8Msn3kvwsa7/h3pjkrUneuu7xuXU279f3wM/rrOZrzplkTMY2m2VP5WvOx8dzmHztdKZnfcZcThEAgGGugAMAwDBlEgCAYcokAADDlEkAAIYpkwAADFMmAQAYpkwCADDs/wMuoMnRa2ETDAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ "# TODO: Import the three supervised learning models from sklearn\n", "\n", @@ -1284,9 +1495,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# TODO: Import 'GridSearchCV', 'make_scorer', and any other necessary libraries\n", @@ -1398,9 +1607,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# TODO: Import a supervised learning model that has 'feature_importances_'\n", @@ -1446,9 +1653,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# Import functionality for cloning a model\n", diff --git a/python/Supervised Learning/Training and Tuning/__pycache__/check_file.cpython-37.pyc b/python/Supervised Learning/Training and Tuning/__pycache__/check_file.cpython-37.pyc index fa3e4b5..e723db3 100644 Binary files a/python/Supervised Learning/Training and Tuning/__pycache__/check_file.cpython-37.pyc and b/python/Supervised Learning/Training and Tuning/__pycache__/check_file.cpython-37.pyc differ