Uploaded by jalalpromos

R hw

advertisement
setwd("c:/data/BUAN6357/HW_3"); source("prep.txt", echo=T)
library(partykit)
library(data.table)
library(tidyverse)
cols <- 7 byRows <- 1 byCols <- 2 p <- 0.9 classes <- c(0,1,2,3,4,5,6,7,8,9)
minDigit <- min(classes)
maxDigit <- max(classes)
numDigits <- length(classes)
seed <- 777639282
setup = function(reps,cols=7,byRows=1,byCols=2,p=0.9,classes=c(0,1,2,3,4,5,6,7,8,9),seed=777639282){
n <- reps set.seed(seed)
minDigit <- min(classes)
maxDigit <- max(classes)
numDigits <- length(classes)
t1 <- rep(classes, n)
t2 <- c(1,1,1,0,1,1,1,
0,0,1,0,0,1,0,
1,0,1,1,1,0,1,
1,0,1,1,0,1,1,
0,1,1,1,0,1,0,
1,1,0,1,0,1,1,
0,1,0,1,1,1,1,
1,0,1,0,0,1,0,
1,1,1,1,1,1,1,
1,1,1,1,0,1,0)
t3 <- rep(t2, n) t4 <- rbinom(length(t3), 1, 1-p) t5 <- ifelse(t4 == 1, 1-t3, t3)
t5 <- matrix(data=t5,
nrow=length(classes)*n, ncol=cols, byrow=T)
dim(t1) <- c(length(t1), 1)
t6 <- cbind(t1, t5)
simDigits <- as.data.frame(t6)
colnames(simDigits) <- c("digit", "s1", "s2", "s3", "s4", "s5", "s6", "s7")
return(simDigits)
}
mbr <- function(df, classes, scale=T) {
# byRows <- 1 [ global ]
idx <- apply(df, byRows, which.max) cat <- classes[idx] pcat <- apply(df, byRows, max)
if (scale) { sc <- apply(df, byRows, sum) pcat <- pcat/sc }
return (data.table(cat=cat,p.value=pcat) )
}
logit_10 <- function(td, classes,digits,fitted.logit){
for ( i in 1:length(classes) ) {
d <- classes[i]
td$y <- 0 # initialize
td$y[digits == d] <- 1 # indicator for -each- digit
m <- glm(y ~ ., data=td, family=binomial())
fitted.logit[,i] <- m$fitted.values
}
return(fitted.logit)
}
tree_10 <-function(td,classes,digits){
fitted.tree10 <- matrix(rep(NA,nrow(td)*numDigits),nrow=nrow(td) )
for ( i in 1:length(classes) ) {
d <- classes[i]
td$y <- 0 td$y[digits == d] <- 1 m <- ctree(y ~ ., data=td)
fitted.tree10[,i] <- predict(m)
}
return(fitted.tree10)
}
#function: 1 tree classification
tree_factor <-function(td,digits){ td$fDigits <- as.factor(digits)
m <- ctree(fDigits~.,
data=td)
fitted.tree1 <- predict(m)
pprob.tree1 <- predict(m,type="prob")
return(pprob.tree1)
}
s50 <- data.table()
td <- setup(50) # temporary copy
fitted.logit <- matrix(rep(NA,nrow(td)*numDigits),nrow=nrow(td) )
digits <- td$digit # for re-use
s50$digits <- td$digit
td$digit <- NULL
td$y <- NULL
t_l <- mbr(logit_10(td,classes,digits,fitted.logit), classes)
#classifications and classifications probabilities
s50$lCl <- t_l$cat s50$lPr <- t_l$p.value t_10 <- mbr(tree_10(td,classes,digits), classes)
s50$t10Cl <- t_10$cat s50$t10Pr <- t_10$p.value
t_1 <- mbr(tree_factor(td,digits), classes, scale=F)
s50$t1Cl <- t_1$cat s50$t1Pr <- t_1$p.value
s25 <- data.table()
td <- setup(25) fitted.logit <- matrix(rep(NA,nrow(td)*numDigits),nrow=nrow(td) )
digits <- td$digit s25$digits <- td$digit
td$digit <- NULL
td$y <- NULL
t_l <- mbr(logit_10(td,classes,digits,fitted.logit), classes)
#classifications and classifications probabilities
s25$lCl <- t_l$cat s25$lPr <- t_l$p.value t_10 <- mbr(tree_10(td,classes,digits), classes)
s25$t10Cl <- t_10$cat s25$t10Pr <- t_10$p.value
t_1 <- mbr(tree_factor(td,digits), classes, scale=F)
s25$t1Cl <- t_1$cat s25$t1Pr <- t_1$p.value
source("validate.txt", echo=T)
Download