Files
python-VM/bayes-learning/seaborn-graphing.py

59 lines
1.4 KiB
Python

import pandas as pd
import numpy as np
import os
import matplotlib
import matplotlib.pyplot as plt
from scipy.stats import trim_mean
from scipy.stats.mstats import mode, gmean, hmean
from sklearn.model_selection import train_test_split
import seaborn as sns
def linebreak():
"""prints a line break to split up functions"""
print('\n ============================================== \n')
matplotlib.rcParams['backend'] = 'TkAgg'
# plt.style.use('seaborn-dark-palette')
path = os.getcwd()
data_file = str('/data/Social_Network_Ads.csv')
df = pd.read_csv(path + data_file)
df = df.sample(frac=1).reset_index(drop=True)
print(df[0:5])
X = df[['Age', 'EstimatedSalary']]
y = df['Purchased']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
# ax1 = df.plot.scatter(x='Age', y='EstimatedSalary', c='DarkBlue')
# ax2 = df.query('Age < 30').plot.scatter(x='Age', y='EstimatedSalary',
# c='DarkBlue')
# figure_1 = df.query('Age < 35').plot(kind='scatter', x='Age',
# y='EstimatedSalary')
df_purchased_sum = df['Purchased'].value_counts()
# figure_2 = plt.plot(df_purchased_sum)
# cp = sns.countplot(data=df, y='Purchased')
# pal = dict(1="seagreen", 0="gray")
fig, axs = plt.subplots(ncols=2)
sns.countplot(data=df, x='Age', hue='Purchased', ax=axs[0])
cp = sns.countplot(data=df, x='Purchased', ax=axs[1])
plt.show()
# print(df_purchased_sum)