setwd("c:/data/BUAN6357/HW_3"); source("prep.txt", echo=T) library(partykit) library(data.table) library(tidyverse) cols <- 7 byRows <- 1 byCols <- 2 p <- 0.9 classes <- c(0,1,2,3,4,5,6,7,8,9) minDigit <- min(classes) maxDigit <- max(classes) numDigits <- length(classes) seed <- 777639282 setup = function(reps,cols=7,byRows=1,byCols=2,p=0.9,classes=c(0,1,2,3,4,5,6,7,8,9),seed=777639282){ n <- reps set.seed(seed) minDigit <- min(classes) maxDigit <- max(classes) numDigits <- length(classes) t1 <- rep(classes, n) t2 <- c(1,1,1,0,1,1,1, 0,0,1,0,0,1,0, 1,0,1,1,1,0,1, 1,0,1,1,0,1,1, 0,1,1,1,0,1,0, 1,1,0,1,0,1,1, 0,1,0,1,1,1,1, 1,0,1,0,0,1,0, 1,1,1,1,1,1,1, 1,1,1,1,0,1,0) t3 <- rep(t2, n) t4 <- rbinom(length(t3), 1, 1-p) t5 <- ifelse(t4 == 1, 1-t3, t3) t5 <- matrix(data=t5, nrow=length(classes)*n, ncol=cols, byrow=T) dim(t1) <- c(length(t1), 1) t6 <- cbind(t1, t5) simDigits <- as.data.frame(t6) colnames(simDigits) <- c("digit", "s1", "s2", "s3", "s4", "s5", "s6", "s7") return(simDigits) } mbr <- function(df, classes, scale=T) { # byRows <- 1 [ global ] idx <- apply(df, byRows, which.max) cat <- classes[idx] pcat <- apply(df, byRows, max) if (scale) { sc <- apply(df, byRows, sum) pcat <- pcat/sc } return (data.table(cat=cat,p.value=pcat) ) } logit_10 <- function(td, classes,digits,fitted.logit){ for ( i in 1:length(classes) ) { d <- classes[i] td$y <- 0 # initialize td$y[digits == d] <- 1 # indicator for -each- digit m <- glm(y ~ ., data=td, family=binomial()) fitted.logit[,i] <- m$fitted.values } return(fitted.logit) } tree_10 <-function(td,classes,digits){ fitted.tree10 <- matrix(rep(NA,nrow(td)*numDigits),nrow=nrow(td) ) for ( i in 1:length(classes) ) { d <- classes[i] td$y <- 0 td$y[digits == d] <- 1 m <- ctree(y ~ ., data=td) fitted.tree10[,i] <- predict(m) } return(fitted.tree10) } #function: 1 tree classification tree_factor <-function(td,digits){ td$fDigits <- as.factor(digits) m <- ctree(fDigits~., data=td) fitted.tree1 <- predict(m) pprob.tree1 <- predict(m,type="prob") return(pprob.tree1) } s50 <- data.table() td <- setup(50) # temporary copy fitted.logit <- matrix(rep(NA,nrow(td)*numDigits),nrow=nrow(td) ) digits <- td$digit # for re-use s50$digits <- td$digit td$digit <- NULL td$y <- NULL t_l <- mbr(logit_10(td,classes,digits,fitted.logit), classes) #classifications and classifications probabilities s50$lCl <- t_l$cat s50$lPr <- t_l$p.value t_10 <- mbr(tree_10(td,classes,digits), classes) s50$t10Cl <- t_10$cat s50$t10Pr <- t_10$p.value t_1 <- mbr(tree_factor(td,digits), classes, scale=F) s50$t1Cl <- t_1$cat s50$t1Pr <- t_1$p.value s25 <- data.table() td <- setup(25) fitted.logit <- matrix(rep(NA,nrow(td)*numDigits),nrow=nrow(td) ) digits <- td$digit s25$digits <- td$digit td$digit <- NULL td$y <- NULL t_l <- mbr(logit_10(td,classes,digits,fitted.logit), classes) #classifications and classifications probabilities s25$lCl <- t_l$cat s25$lPr <- t_l$p.value t_10 <- mbr(tree_10(td,classes,digits), classes) s25$t10Cl <- t_10$cat s25$t10Pr <- t_10$p.value t_1 <- mbr(tree_factor(td,digits), classes, scale=F) s25$t1Cl <- t_1$cat s25$t1Pr <- t_1$p.value source("validate.txt", echo=T)