########################################################################## #This program implements 2 classification techniques for the IRIS dataset# #and compares their results. # ########################################################################## #Requires packages e1071, stats , vglm# #removes previously stored variables, do not run if you have important variables stored in your current #workspace. rm(list=ls()) #A function to calculate the error rate/Accuracy of the classifier. calcerror<-function(predict1,iristest) { b<-as.integer(predict1) a<-iristest[,ncol(iris)] a1<-as.integer(a) c<-a1-b error<-0 for(i in 1:length(c)) { if(c[i]!=0) error<-error+1 } errorrate<-error/length(c) accuracy<-(1-errorrate) return(accuracy) } #Randomly select rows for training and testing trainrows<-sample((1:nrow(iris)),as.integer((.75)*nrow(iris)),replace=FALSE) iristrain<-iris[trainrows,] iristest<-iris[-trainrows,] #Run the naive Bayes function in R and predict the model. model1<-naiveBayes(iristrain[,ncol(iris)]~.,data=iristrain) predict1<-predict(model1,iristest[,-ncol(iris)]) accuracy1<-calcerror(predict1,iristest) #This code runs the mlogit regression and finds prediction iris.vglm <- vglm(iristrain[,ncol(iristrain)] ~. , family=multinomial, data=iristrain) myprediction<-predict(iris.vglm,newdata=iristest,type="response") myprediction<-predict(iris.vglm,newdata=iristest,type="response") rounded<-round(myprediction) predict2<-rep(0,nrow(iristest)) #This code converts 3 columns of binary values for the predictors into a single column with 3 #Possible values for(i in 1:nrow(rounded)) { if(rounded[i,"setosa"]==1) predict2[i]=1 if(rounded[i,"versicolor"]==1) predict2[i]=2 if(rounded[i,"virginica"]==1) predict2[i]=3 } #Comparison of accuracy accuracy2<-calcerror(predict2,iristest) accuracy1 accuracy2