r language to cluster iris dataset through k-means and hierarchical clustering

description: cluster iris data set by hierarchical clustering and k-means

iris data set

1
2
3
4
5
6
7
8
9
10
11
12
13
14
library(RWeka)
iris
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 1 5.1 3.5 1.4 0.2 setosa
# 2 4.9 3.0 1.4 0.2 setosa
# 3 4.7 3.2 1.3 0.2 setosa
# 4 4.6 3.1 1.5 0.2 setosa
# 5 5.0 3.6 1.4 0.2 setosa
# 6 5.4 3.9 1.7 0.4 setosa
# 7 4.6 3.4 1.4 0.3 setosa
# 8 5.0 3.4 1.5 0.2 setosa
# 9 4.4 2.9 1.4 0.2 setosa
# 10 4.9 3.1 1.5 0.1 setosa
# 11 5.4 3.7 1.5 0.2 setosa

delete class column

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
datairis<-iris
datairis$Species<-NULL
datairis
# Sepal.Length Sepal.Width Petal.Length Petal.Width
# 1 5.1 3.5 1.4 0.2
# 2 4.9 3.0 1.4 0.2
# 3 4.7 3.2 1.3 0.2
# 4 4.6 3.1 1.5 0.2
# 5 5.0 3.6 1.4 0.2
# 6 5.4 3.9 1.7 0.4
# 7 4.6 3.4 1.4 0.3
# 8 5.0 3.4 1.5 0.2
# 9 4.4 2.9 1.4 0.2
# 10 4.9 3.1 1.5 0.1
# 11 5.4 3.7 1.5 0.2

partitional clustering: K means clustering

1
2
3
4
5
6
7
8
9
kcluster<-kmeans(datairis,3) # k as 3, divide into 3 species
kcluster
# K-means clustering with 3 clusters of sizes 38, 50, 62

# Cluster means:
# Sepal.Length Sepal.Width Petal.Length Petal.Width
# 1 6.850000 3.073684 5.742105 2.071053
# 2 5.006000 3.428000 1.462000 0.246000
# 3 5.901613 2.748387 4.393548 1.433871

plot cluster with “Sepal.Width” and “Petal.Width”

1
2
plot(datairis[c("Sepal.Width","Petal.Width")],col=kcluster$cluster)
points(kcluster$centers[,c("Sepal.Width","Petal.Width")], col=1:3, pch=8, cex=2)

plot cluster with “Sepal.Length” and “Petal.Length”

1
2
plot(datairis[c("Sepal.Length","Petal.Length")],col=kcluster$cluster)
points(kcluster$centers[,c("Sepal.Length","Petal.Length")], col=1:3, pch=8, cex=2)

plot cluster with “Petal.Length” and “Petal.Length”

1
2
plot(datairis[c("Petal.Length","Petal.Length")],col=kcluster$cluster)
points(kcluster$centers[,c("Petal.Length","Petal.Length")], col=1:3, pch=8, cex=2)

Check the result through a table

1
2
3
4
5
6
iris$Species
table(iris$Species,kcluster$cluster)
# 1 2 3
# setosa 0 50 0
# versicolor 2 0 48
# virginica 36 0 14

hierarchical clustering

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
dim(iris)[1] # dim:	It	retrieve	or	set	the	dimension	of	an	object
random = sample(1:dim(iris)[1],50)
datairis = iris[random,]
datairis$Species=NULL
datairis
# Sepal.Length Sepal.Width Petal.Length Petal.Width
# 137 6.3 3.4 5.6 2.4
# 5 5.0 3.6 1.4 0.2
# 122 5.6 2.8 4.9 2.0
# 27 5.0 3.4 1.6 0.4
# 40 5.1 3.4 1.5 0.2
# 140 6.9 3.1 5.4 2.1
# 12 4.8 3.4 1.6 0.2
# 116 6.4 3.2 5.3 2.3
# 66 6.7 3.1 4.4 1.4

plot hierarchical clustering

1
2
3
hcluster<-hclust(dist(datairis),method="ave")
hcluster
plot(hcluster,hang=-1) # hang: The fraction of the plot height by which labels should hang below the rest of the plot. A negative value will cause the labels to hang down from 0.

1
2
iris$Species
plot(hcluster,hang=-1,labels=iris$Species[random])