A blog for collecting diverse useful information

r language to cluster iris dataset through k-means and hierarchical clustering

Posted on 2018-09-27 | Edited on 2018-07-24 | In machine learning

description: cluster iris data set by hierarchical clustering and k-means

iris data set

library(RWeka)
iris
#     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
# 1            5.1         3.5          1.4         0.2     setosa
# 2            4.9         3.0          1.4         0.2     setosa
# 3            4.7         3.2          1.3         0.2     setosa
# 4            4.6         3.1          1.5         0.2     setosa
# 5            5.0         3.6          1.4         0.2     setosa
# 6            5.4         3.9          1.7         0.4     setosa
# 7            4.6         3.4          1.4         0.3     setosa
# 8            5.0         3.4          1.5         0.2     setosa
# 9            4.4         2.9          1.4         0.2     setosa
# 10           4.9         3.1          1.5         0.1     setosa
# 11           5.4         3.7          1.5         0.2     setosa

delete class column

datairis<-iris
datairis$Species<-NULL
datairis 
#     Sepal.Length Sepal.Width Petal.Length Petal.Width
# 1            5.1         3.5          1.4         0.2
# 2            4.9         3.0          1.4         0.2
# 3            4.7         3.2          1.3         0.2
# 4            4.6         3.1          1.5         0.2
# 5            5.0         3.6          1.4         0.2
# 6            5.4         3.9          1.7         0.4
# 7            4.6         3.4          1.4         0.3
# 8            5.0         3.4          1.5         0.2
# 9            4.4         2.9          1.4         0.2
# 10           4.9         3.1          1.5         0.1
# 11           5.4         3.7          1.5         0.2

partitional clustering: K means clustering

kcluster<-kmeans(datairis,3) # k as 3, divide into 3 species
kcluster
# K-means clustering with 3 clusters of sizes 38, 50, 62

# Cluster means:
#   Sepal.Length Sepal.Width Petal.Length Petal.Width
# 1     6.850000    3.073684     5.742105    2.071053
# 2     5.006000    3.428000     1.462000    0.246000
# 3     5.901613    2.748387     4.393548    1.433871

plot cluster with “Sepal.Width” and “Petal.Width”

1 2	plot(datairis[c("Sepal.Width","Petal.Width")],col=kcluster$cluster) points(kcluster$centers[,c("Sepal.Width","Petal.Width")], col=1:3, pch=8, cex=2)

plot cluster with “Sepal.Length” and “Petal.Length”

1 2	plot(datairis[c("Sepal.Length","Petal.Length")],col=kcluster$cluster) points(kcluster$centers[,c("Sepal.Length","Petal.Length")], col=1:3, pch=8, cex=2)

plot cluster with “Petal.Length” and “Petal.Length”

1 2	plot(datairis[c("Petal.Length","Petal.Length")],col=kcluster$cluster) points(kcluster$centers[,c("Petal.Length","Petal.Length")], col=1:3, pch=8, cex=2)

Check the result through a table

iris$Species
table(iris$Species,kcluster$cluster) 
#               1  2  3
#   setosa      0 50  0
#   versicolor  2  0 48
#   virginica  36  0 14

hierarchical clustering

dim(iris)[1] # dim:	It	retrieve	or	set	the	dimension	of	an	object
random = sample(1:dim(iris)[1],50) 
datairis = iris[random,]
datairis$Species=NULL
datairis
#     Sepal.Length Sepal.Width Petal.Length Petal.Width
# 137          6.3         3.4          5.6         2.4
# 5            5.0         3.6          1.4         0.2
# 122          5.6         2.8          4.9         2.0
# 27           5.0         3.4          1.6         0.4
# 40           5.1         3.4          1.5         0.2
# 140          6.9         3.1          5.4         2.1
# 12           4.8         3.4          1.6         0.2
# 116          6.4         3.2          5.3         2.3
# 66           6.7         3.1          4.4         1.4

plot hierarchical clustering

1
2
3

hcluster<-hclust(dist(datairis),method="ave")
hcluster
plot(hcluster,hang=-1) # hang:	The	fraction	of	the	plot	height	by	which	labels	should	hang	below	the	rest	of	the	plot.	A	negative	value	will	cause	the	labels	to	hang	down	from	0.

1 2	iris$Species plot(hcluster,hang=-1,labels=iris$Species[random])

Post author: killfun
Post link: http://search4fan.github.io/post/r_AI_kmeans_hcluster.html
Copyright Notice: All articles in this blog are licensed under CC BY-NC-SA 4.0 unless stating additionally.