bagaimana cara mengelompokkan warna cluster Kmeans python

hallo, saya mencoba melakukan clustering kmeans menggunakan sklearn. tapi waktu di plotting, warna clusternya tidak menyatu (n dataset +-2000). mohon pencerahannya, terimakasih.

 berikut coding saya:
from copy import deepcopy
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score
import matplotlib.cm as cm
plt.rcParams['figure.figsize'] = (16, 9)
plt.style.use('ggplot')
import sklearn.cluster as cluster
import scipy.spatial.distance as sdist

# Importing the dataset
df = pd.read_csv('2000an.csv')
print("Input Data and Shape")
print(df.shape)
print(df.head())
print(df.describe())
print(df.columns.values) #attribute
print (df.isna().head()) #letak missing values False: no missing values; True: There are missing values
print(df.isna().sum()) #jumlah missing value

#data Fill missing values with mean column values in the dataset
df.fillna(df.mean(), inplace=True)
print("\n")
print(df.isna().sum()) #jumlah missing value setelah diberi fungsi fillna()

points = df.drop(['ip.src','ip.dst', 'icmp.ident'], axis=1)

#convert non numerical to numerical feature
labelEncoder = LabelEncoder()
labelEncoder.fit(points['frame.time'])
points['frame.time'] = labelEncoder.transform(points['frame.time'])

# Getting the values and plotting it
np.array(points).astype(np.float)
X = points.values
#f2 = data['V2'].values

Z = np.array(list(zip(points)))
#np.array(df).astype(np.float)
plt.scatter(X[:, 0], X[:, 1], label='True Position', s=7)
#plt.show()

#points = df.drop('id', axis=1)
# or points = df[['Type1', 'Type2', 'Type3']]
kmeans = cluster.KMeans(n_clusters=2, random_state=0).fit(points)
df['cluster'] = kmeans.labels_

centroids = kmeans.cluster_centers_
dists = pd.DataFrame(
    sdist.cdist(points, centroids),
    columns=['dist_{}'.format(i) for i in range(len(centroids))],
    index=df.index)
df = pd.concat([df, dists], axis=1)
print(df)
ha=df.sort_values(by=['cluster'])
ha.to_csv('3okt.csv')
kmeans = KMeans(n_clusters=2)
kmeans.fit(X)
model = kmeans.fit(X)
plt.scatter(X[:, 0], X[:, 1], c=kmeans.labels_, cmap='rainbow', s = 50)
plt.scatter(centroids[:, 0], centroids[:, 1], color='black', s = 200, alpha = 0.5)
plt.show()

ekspetasinya seperti pada visualisasi weka ini:

avatar Meilindaeka
@Meilindaeka

1 Kontribusi 0 Poin

Diperbarui 5 tahun yang lalu

Belum ada Jawaban. Jadi yang pertama Jawaban

Login untuk ikut Jawaban