R language to build ANN network for wine data

description: build ANN network from wine data in r language

read table data

1
2
3
4
wine <- read.table("https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",
sep=",", # 分隔符为 , comma
col.names=c("Type","Alcohol","Malic","Ash","Alcalinity","Magnesium","Phenols","Flavanoids","Nonflavanoids","Proanthocyanins","Color","Hue","Dilution","Proline")
)

preview data

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
print(wine)
# Type Alcohol Malic Ash Alcalinity Magnesium Phenols Flavanoids
# 1 1 14.23 1.71 2.43 15.6 127 2.80 3.06
# 2 1 13.20 1.78 2.14 11.2 100 2.65 2.76
# 3 1 13.16 2.36 2.67 18.6 101 2.80 3.24
# 4 1 14.37 1.95 2.50 16.8 113 3.85 3.49
# 5 1 13.24 2.59 2.87 21.0 118 2.80 2.69
# ....

print(nrow(wine))
# [1] 178

print(ncol(wine))
# [1] 14

print(summary(wine))
# Type Alcohol Malic Ash
# Min. :1.000 Min. :11.03 Min. :0.740 Min. :1.360
# 1st Qu.:1.000 1st Qu.:12.36 1st Qu.:1.603 1st Qu.:2.210
# Median :2.000 Median :13.05 Median :1.865 Median :2.360
# Mean :1.938 Mean :13.00 Mean :2.336 Mean :2.367
# ...

attribute to categorical data

1
2
3
4
5
6
7
8
wine$Type=factor(wine$Type)
print(summary(wine))
# Type Alcohol Malic Ash Alcalinity
# 1:59 Min. :11.03 Min. :0.740 Min. :1.360 Min. :10.60
# 2:71 1st Qu.:12.36 1st Qu.:1.603 1st Qu.:2.210 1st Qu.:17.20
# 3:48 Median :13.05 Median :1.865 Median :2.360 Median :19.50
# Mean :13.00 Mean :2.336 Mean :2.367 Mean :19.49
# ...

data pre-processing and transformation

1
2
3
4
5
6
7
8
9
wine.scale <- cbind(wine[1], scale(wine[-1]))
print(summary(wine.scale))
apply(wine.scale[-1], 2, sd)
# Type Alcohol Malic Ash
# 1:59 Min. :-2.42739 Min. :-1.4290 Min. :-3.66881
# 2:71 1st Qu.:-0.78603 1st Qu.:-0.6569 1st Qu.:-0.57051
# 3:48 Median : 0.06083 Median :-0.4219 Median :-0.02375
# Mean : 0.00000 Mean : 0.0000 Mean : 0.00000
# ...

partitioning the data into training and test data

1
2
3
4
5
6
7
8
data.size <- nrow(wine.scale) 
set.seed(1111)
samp <- c(sample(1:data.size, data.size*0.7))
data.tr <- wine.scale[samp, ]
data.test <- wine.scale[-samp, ]

summary(data.tr)
summary(data.test)

data mining using a neural Network classification technique

1
2
3
4
5
6
7
8
9
10
11
12
13
library(nnet)
model.nnet <- nnet(Type ~ .,
data = data.tr,
size=2, # 2 level
decay=5e-04,
maxit=200) # stopped after 200 iterations
summary(model.nnet)
# initial value 164.152084
# iter 10 value 7.066118
# iter 20 value 1.286566
# iter 30 value 0.542334
# iter 40 value 0.444873
# ...

pattern (model) evaluation

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
predicted <- predict(model.nnet,
data.test,
type="class")
predicted

actual <- data.test$Type
model.confusion.matrix <- table(actual, predicted)
model.confusion.matrix

confusion.matrix.rate = prop.table(model.confusion.matrix) * 100
round(confusion.matrix.rate, digit=2)

diag.index <- cbind(1:3, 1:3)

error.overall = sum(confusion.matrix.rate) - sum(confusion.matrix.rate[diag.index])
paste("Error Rate =", round(error.overall, digit=2), "%")

Data mining Task using a nearest neighbour approach

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
P1=c(1,3) 
P2=c(2,4)
P3=c(6,6)

N1=c(2,1)
N2=c(5,3)
N3=c(6,4)

train=rbind(P1,P2,P3,N1,N2,N3) # build the classification matrix
plot(train)

class=factor(c(rep("lover",3),rep("hater",3))) # [1] lover lover lover hater hater hater
class

test=c(1,4) # testing data to be classified
test # 1 4

library(class)
summary(knn(train, test, class, k=3)) # hater 0 lover 1

test=c(6,4)
summary(knn(train, test, class, k=3)) # hater 1 lover 0