completed 2 clustering parts of unsupervised learning section
This commit is contained in:
58
python/Unsupervised Learning/Clustering/helpers2.py
Normal file
58
python/Unsupervised Learning/Clustering/helpers2.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
from sklearn.cluster import KMeans
|
||||
from sklearn.datasets import make_blobs
|
||||
|
||||
def simulate_data(n = 500, features = 10, centroids = 3):
|
||||
'''
|
||||
Simulates n data points, each with number of features equal to features, with a number of centers equal to centroids
|
||||
INPUT (defaults)
|
||||
n = number of rows (500)
|
||||
features = number of columns (10)
|
||||
centroids = number of centers (3)
|
||||
Output
|
||||
dataset = a dataset with the the specified characteristics
|
||||
'''
|
||||
dataset, y = make_blobs(n_samples=n, n_features=features, centers=centroids, random_state=42)
|
||||
|
||||
return dataset
|
||||
|
||||
def plot_data(data, labels):
|
||||
'''
|
||||
Plot data with colors associated with labels
|
||||
'''
|
||||
fig = plt.figure();
|
||||
ax = Axes3D(fig)
|
||||
ax.scatter(data[:, 0], data[:, 1], data[:, 2], c=labels, cmap='tab10');
|
||||
|
||||
data = simulate_data(200, 5, 4)
|
||||
|
||||
def get_kmeans_score(data, center):
|
||||
'''
|
||||
returns the kmeans score regarding SSE for points to centers
|
||||
INPUT:
|
||||
data - the dataset you want to fit kmeans to
|
||||
center - the number of centers you want (the k value)
|
||||
OUTPUT:
|
||||
score - the SSE score for the kmeans model fit to the data
|
||||
'''
|
||||
#instantiate kmeans
|
||||
kmeans = KMeans(n_clusters=center)
|
||||
|
||||
# Then fit the model to your data using the fit method
|
||||
model = kmeans.fit(data)
|
||||
|
||||
# Obtain a score related to the model fit
|
||||
score = np.abs(model.score(data))
|
||||
|
||||
return score
|
||||
|
||||
def fit_mods():
|
||||
scores = []
|
||||
centers = list(range(1,11))
|
||||
|
||||
for center in centers:
|
||||
scores.append(get_kmeans_score(data, center))
|
||||
|
||||
return centers, scores
|
||||
Reference in New Issue
Block a user