completed 2 clustering parts of unsupervised learning section

This commit is contained in:
2019-07-25 00:12:04 +01:00
parent 9648dfe7db
commit 15dfbd5d91
23 changed files with 5877 additions and 0 deletions

View File

@@ -0,0 +1,58 @@
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
def simulate_data(n = 500, features = 10, centroids = 3):
'''
Simulates n data points, each with number of features equal to features, with a number of centers equal to centroids
INPUT (defaults)
n = number of rows (500)
features = number of columns (10)
centroids = number of centers (3)
Output
dataset = a dataset with the the specified characteristics
'''
dataset, y = make_blobs(n_samples=n, n_features=features, centers=centroids, random_state=42)
return dataset
def plot_data(data, labels):
'''
Plot data with colors associated with labels
'''
fig = plt.figure();
ax = Axes3D(fig)
ax.scatter(data[:, 0], data[:, 1], data[:, 2], c=labels, cmap='tab10');
data = simulate_data(200, 5, 4)
def get_kmeans_score(data, center):
'''
returns the kmeans score regarding SSE for points to centers
INPUT:
data - the dataset you want to fit kmeans to
center - the number of centers you want (the k value)
OUTPUT:
score - the SSE score for the kmeans model fit to the data
'''
#instantiate kmeans
kmeans = KMeans(n_clusters=center)
# Then fit the model to your data using the fit method
model = kmeans.fit(data)
# Obtain a score related to the model fit
score = np.abs(model.score(data))
return score
def fit_mods():
scores = []
centers = list(range(1,11))
for center in centers:
scores.append(get_kmeans_score(data, center))
return centers, scores