adding all files done so far
This commit is contained in:
84
bayes-learning/seaborn-descr.py
Normal file
84
bayes-learning/seaborn-descr.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
# from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.model_selection import train_test_split
|
||||
import numpy as np
|
||||
from scipy.stats import trim_mean, kurtosis
|
||||
from scipy.stats.mstats import mode, gmean, hmean
|
||||
|
||||
|
||||
def linebreak():
|
||||
"""prints a line break to split up functions"""
|
||||
print('\n ============================================== \n')
|
||||
|
||||
|
||||
matplotlib.rcParams['backend'] = 'TkAgg'
|
||||
plt.style.use('seaborn-dark-palette')
|
||||
|
||||
path = os.getcwd()
|
||||
data_file = str('/data/Social_Network_Ads.csv')
|
||||
|
||||
df = pd.read_csv(path + data_file)
|
||||
# df = pd.DataFrame(df)
|
||||
|
||||
df = df.sample(frac=1).reset_index(drop=True)
|
||||
|
||||
print('{} rows. {} cols.'.format(df.shape[0], df.shape[1]))
|
||||
|
||||
linebreak()
|
||||
print(df.iloc[0:10, :])
|
||||
|
||||
linebreak()
|
||||
X = df[['Age', 'EstimatedSalary']]
|
||||
y = df['Purchased'].to_frame()
|
||||
|
||||
print('X equals:')
|
||||
print(X.iloc[0:5])
|
||||
linebreak()
|
||||
print('y equals:')
|
||||
print(y[0:5])
|
||||
linebreak()
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
|
||||
|
||||
description = df.describe().drop(columns=['User ID'])
|
||||
|
||||
description_grouped = df.groupby(['Purchased'])
|
||||
|
||||
description_grouped_split = description_grouped['Age', 'EstimatedSalary']\
|
||||
.describe().unstack()
|
||||
|
||||
description_grouped_mode = description_grouped['Age'].apply(mode, axis=None)
|
||||
|
||||
df_quartile_slary = df.groupby('Purchased')['EstimatedSalary']\
|
||||
.quantile([.1, .5, .9])
|
||||
df_quartile_age = df.groupby('Purchased')['Age'].quantile([.1, .5, .9])
|
||||
|
||||
df_trimmed_mean = description_grouped['Age', 'EstimatedSalary'].\
|
||||
aggregate(trim_mean, .1)
|
||||
|
||||
df_summary = description_grouped['Age', 'EstimatedSalary']\
|
||||
.aggregate([np.median, np.std, np.mean, gmean, hmean])
|
||||
|
||||
df_var = description_grouped['Age', 'EstimatedSalary'].var()
|
||||
|
||||
df_null = df.isna().sum()
|
||||
|
||||
print(description)
|
||||
linebreak()
|
||||
print(description_grouped_split)
|
||||
linebreak()
|
||||
print(description_grouped_mode)
|
||||
linebreak()
|
||||
print(df_quartile_slary)
|
||||
print(df_quartile_age)
|
||||
linebreak()
|
||||
print(df_trimmed_mean)
|
||||
linebreak()
|
||||
print(df_summary)
|
||||
linebreak()
|
||||
print(df_var)
|
||||
linebreak()
|
||||
print(df_null)
|
||||
Reference in New Issue
Block a user