#clear the memory
rm(list=ls())

#******************
#ANALYSIS FOR DATA2
#******************

#loading the data file "data2"
setwd("D:/DataMining/Databases_for_mining/dataset_for_soft_dev_and_comparison/perceptron")
data2 <- read.table(file="artificial2d_data2.txt",sep="\t",dec=".",header=T)

#number of instances
n <- nrow(data2)
print(n)

#summary
print(summary(data2))

#splitting in train and test samples
set.seed(5)
index <- sample(n,1000)
data2.train <- data2[index,]
data2.test <- data2[-index,]
print(summary(data2.train))
print(summary(data2.test))

#loading the nnet package
library(nnet)

#multilayer perceptron, 2 neurons into the hidden layer
set.seed(10)
model2 <- nnet(Y ~ .,skip=FALSE,size=2,data=data2.train,maxit=300,trace=F)
print(model2)
print(summary(model2))

#evaluating the model
err.rate <- function(test.data,model){
 #prediction on the test set
 pred <- predict(model,newdata=test.data,type="class")
 #confusion matrix
 mc <- table(test.data$Y,pred)
 #error rate
 error <- 1-sum(diag(mc))/sum(mc)
 #output
 return(error)
}

#printing the error rate on the test set
print(err.rate(data2.test,model2))

#******************
#ANALYSIS FOR DATA2
#******************

#loading the data file "data4"
data4 <- read.table(file="artificial2d_data4.txt",sep="\t",dec=".",header=T)

#splitting in train and test samples
data4.train <- data4[index,]
data4.test <- data4[-index,]

#detecting the right number of neurons in the hidden layer
K <- 20
res <- numeric(K)
for (k in 1:K){
 set.seed(10)
 model <- nnet(Y ~ .,skip=FALSE,size=k,data=data4.train,maxit=300,trace=F)
 #print(model)
 error <- err.rate(data4.test,model)
 res[k] <- error
}

print(res)
plot(1:K,res,type="b",xlab="Neurons into the hidden layer",ylab="Test error rate")

#graphical representation of the instances
plot(data4$X1,data4$X2, pch = 21, bg = c("blue","red")[unclass(data4$Y)], main="Data4 problem")


