python-VM/bayes-learning/seaborn-graphing.py

import pandas as pd
import numpy as np
import os
import matplotlib
import matplotlib.pyplot as plt
from scipy.stats import trim_mean
from scipy.stats.mstats import mode, gmean, hmean
from sklearn.model_selection import train_test_split
import seaborn as sns


def linebreak():
    """prints a line break to split up functions"""
    print('\n ============================================== \n')


matplotlib.rcParams['backend'] = 'TkAgg'
# plt.style.use('seaborn-dark-palette')

path = os.getcwd()
data_file = str('/data/Social_Network_Ads.csv')

df = pd.read_csv(path + data_file)

df = df.sample(frac=1).reset_index(drop=True)

print(df[0:5])

X = df[['Age', 'EstimatedSalary']]
y = df['Purchased']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)


# ax1 = df.plot.scatter(x='Age', y='EstimatedSalary', c='DarkBlue')
# ax2 = df.query('Age < 30').plot.scatter(x='Age', y='EstimatedSalary',
# c='DarkBlue')


# figure_1 = df.query('Age < 35').plot(kind='scatter', x='Age',
# y='EstimatedSalary')

df_purchased_sum = df['Purchased'].value_counts()

# figure_2 = plt.plot(df_purchased_sum)

# cp = sns.countplot(data=df, y='Purchased')

# pal = dict(1="seagreen", 0="gray")

fig, axs = plt.subplots(ncols=2)

sns.countplot(data=df, x='Age', hue='Purchased', ax=axs[0])
cp = sns.countplot(data=df, x='Purchased', ax=axs[1])

plt.show()

# print(df_purchased_sum)