59 lines
1.4 KiB
Python
59 lines
1.4 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
import os
|
|
import matplotlib
|
|
import matplotlib.pyplot as plt
|
|
from scipy.stats import trim_mean
|
|
from scipy.stats.mstats import mode, gmean, hmean
|
|
from sklearn.model_selection import train_test_split
|
|
import seaborn as sns
|
|
|
|
|
|
def linebreak():
|
|
"""prints a line break to split up functions"""
|
|
print('\n ============================================== \n')
|
|
|
|
|
|
matplotlib.rcParams['backend'] = 'TkAgg'
|
|
# plt.style.use('seaborn-dark-palette')
|
|
|
|
path = os.getcwd()
|
|
data_file = str('/data/Social_Network_Ads.csv')
|
|
|
|
df = pd.read_csv(path + data_file)
|
|
|
|
df = df.sample(frac=1).reset_index(drop=True)
|
|
|
|
print(df[0:5])
|
|
|
|
X = df[['Age', 'EstimatedSalary']]
|
|
y = df['Purchased']
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
|
|
|
|
|
|
# ax1 = df.plot.scatter(x='Age', y='EstimatedSalary', c='DarkBlue')
|
|
# ax2 = df.query('Age < 30').plot.scatter(x='Age', y='EstimatedSalary',
|
|
# c='DarkBlue')
|
|
|
|
|
|
# figure_1 = df.query('Age < 35').plot(kind='scatter', x='Age',
|
|
# y='EstimatedSalary')
|
|
|
|
df_purchased_sum = df['Purchased'].value_counts()
|
|
|
|
# figure_2 = plt.plot(df_purchased_sum)
|
|
|
|
# cp = sns.countplot(data=df, y='Purchased')
|
|
|
|
# pal = dict(1="seagreen", 0="gray")
|
|
|
|
fig, axs = plt.subplots(ncols=2)
|
|
|
|
sns.countplot(data=df, x='Age', hue='Purchased', ax=axs[0])
|
|
cp = sns.countplot(data=df, x='Purchased', ax=axs[1])
|
|
|
|
plt.show()
|
|
|
|
# print(df_purchased_sum)
|