A blog for collecting diverse useful information

R language to build ANN network for wine data

Posted on 2018-09-27 | Edited on 2018-07-23 | In machine learning

description: build ANN network from wine data in r language

read table data

wine <- read.table("https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data",
    sep=",", # 分隔符为 , comma
    col.names=c("Type","Alcohol","Malic","Ash","Alcalinity","Magnesium","Phenols","Flavanoids","Nonflavanoids","Proanthocyanins","Color","Hue","Dilution","Proline")
)

preview data

print(wine)
#     Type Alcohol Malic  Ash Alcalinity Magnesium Phenols Flavanoids
# 1      1   14.23  1.71 2.43       15.6       127    2.80       3.06
# 2      1   13.20  1.78 2.14       11.2       100    2.65       2.76
# 3      1   13.16  2.36 2.67       18.6       101    2.80       3.24
# 4      1   14.37  1.95 2.50       16.8       113    3.85       3.49
# 5      1   13.24  2.59 2.87       21.0       118    2.80       2.69
# ....

print(nrow(wine))
# [1] 178

print(ncol(wine))
# [1] 14

print(summary(wine))
#       Type          Alcohol          Malic            Ash       
#  Min.   :1.000   Min.   :11.03   Min.   :0.740   Min.   :1.360  
#  1st Qu.:1.000   1st Qu.:12.36   1st Qu.:1.603   1st Qu.:2.210  
#  Median :2.000   Median :13.05   Median :1.865   Median :2.360  
#  Mean   :1.938   Mean   :13.00   Mean   :2.336   Mean   :2.367 
# ...

attribute to categorical data

wine$Type=factor(wine$Type)
print(summary(wine))
# Type      Alcohol          Malic            Ash          Alcalinity   
#  1:59   Min.   :11.03   Min.   :0.740   Min.   :1.360   Min.   :10.60  
#  2:71   1st Qu.:12.36   1st Qu.:1.603   1st Qu.:2.210   1st Qu.:17.20  
#  3:48   Median :13.05   Median :1.865   Median :2.360   Median :19.50  
#         Mean   :13.00   Mean   :2.336   Mean   :2.367   Mean   :19.49  
# ...

data pre-processing and transformation

wine.scale <- cbind(wine[1], scale(wine[-1]))
print(summary(wine.scale))
apply(wine.scale[-1], 2, sd)
#  Type      Alcohol             Malic              Ash          
#  1:59   Min.   :-2.42739   Min.   :-1.4290   Min.   :-3.66881  
#  2:71   1st Qu.:-0.78603   1st Qu.:-0.6569   1st Qu.:-0.57051  
#  3:48   Median : 0.06083   Median :-0.4219   Median :-0.02375  
#         Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.00000  
# ...

partitioning the data into training and test data

data.size <- nrow(wine.scale) 
set.seed(1111) 
samp <- c(sample(1:data.size, data.size*0.7))
data.tr <- wine.scale[samp, ]
data.test <- wine.scale[-samp, ]

summary(data.tr)
summary(data.test)

data mining using a neural Network classification technique

library(nnet)
model.nnet <- nnet(Type ~ ., 
                   data = data.tr,
                   size=2, # 2 level 
                   decay=5e-04,
                   maxit=200) # stopped after 200 iterations
summary(model.nnet)
# initial  value 164.152084 
# iter  10 value 7.066118
# iter  20 value 1.286566
# iter  30 value 0.542334
# iter  40 value 0.444873
# ...

pattern (model) evaluation

predicted <- predict(model.nnet,
                     data.test,
                     type="class")
predicted

actual <- data.test$Type
model.confusion.matrix <- table(actual, predicted)
model.confusion.matrix 

confusion.matrix.rate = prop.table(model.confusion.matrix) * 100
round(confusion.matrix.rate, digit=2)

diag.index <- cbind(1:3, 1:3)

error.overall = sum(confusion.matrix.rate) - sum(confusion.matrix.rate[diag.index])
paste("Error Rate =", round(error.overall, digit=2), "%")

Data mining Task using a nearest neighbour approach

P1=c(1,3) 
P2=c(2,4)
P3=c(6,6)

N1=c(2,1) 
N2=c(5,3)
N3=c(6,4)

train=rbind(P1,P2,P3,N1,N2,N3) # build the classification matrix 
plot(train)

class=factor(c(rep("lover",3),rep("hater",3))) # [1] lover lover lover hater hater hater
class

test=c(1,4) # testing data to be classified 
test # 1 4 

library(class)
summary(knn(train, test, class, k=3)) # hater 0 lover 1

test=c(6,4)
summary(knn(train, test, class, k=3)) # hater 1 lover 0

Post author: killfun
Post link: http://search4fan.github.io/post/r_AI_ann_for_wine_data.html
Copyright Notice: All articles in this blog are licensed under CC BY-NC-SA 4.0 unless stating additionally.