import pandas as pd import numpy as np import os import matplotlib import matplotlib.pyplot as plt from scipy.stats import trim_mean from scipy.stats.mstats import mode, gmean, hmean from sklearn.model_selection import train_test_split import seaborn as sns def linebreak(): """prints a line break to split up functions""" print('\n ============================================== \n') matplotlib.rcParams['backend'] = 'TkAgg' # plt.style.use('seaborn-dark-palette') path = os.getcwd() data_file = str('/data/Social_Network_Ads.csv') df = pd.read_csv(path + data_file) df = df.sample(frac=1).reset_index(drop=True) print(df[0:5]) X = df[['Age', 'EstimatedSalary']] y = df['Purchased'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # ax1 = df.plot.scatter(x='Age', y='EstimatedSalary', c='DarkBlue') # ax2 = df.query('Age < 30').plot.scatter(x='Age', y='EstimatedSalary', # c='DarkBlue') # figure_1 = df.query('Age < 35').plot(kind='scatter', x='Age', # y='EstimatedSalary') df_purchased_sum = df['Purchased'].value_counts() # figure_2 = plt.plot(df_purchased_sum) # cp = sns.countplot(data=df, y='Purchased') # pal = dict(1="seagreen", 0="gray") fig, axs = plt.subplots(ncols=2) sns.countplot(data=df, x='Age', hue='Purchased', ax=axs[0]) cp = sns.countplot(data=df, x='Purchased', ax=axs[1]) plt.show() # print(df_purchased_sum)