############################################################ # # Simulation to investigate size and power of the t-test # ############################################################ # function to view the first k lines of a data frame view <- function(dat,k){ message <- paste("First",k,"rows") krows <- dat[1:k,] cat(message,"\n","\n") print(krows) } # # function to generate S data sets of size n from normal distribution with mean mu and variance sigma^2 generate.normal <- function(S,n,mu,sigma){ dat <- matrix(rnorm(n*S,mu,sigma),ncol=n,byrow=T) # # # # # # Note: for this very simple data generation, we can get the data in one step like this, which requires no looping. In more complex statistical models, looping is often required to set up each data set, because the scenario is much more complicated. Here is a loop to get the same data as above; try running the program and see how much longer it takes! # # # # # # # # dat <- NULL for (i in 1:S){ Y <- rnorm(n,mu,sigma) dat <- rbind(dat,Y) } out <- list(dat=dat) return(out) } # # function to generate S data sets of size n from gamma distribution with mean mu, variance sigma^2 mu^2 generate.gamma <- function(S,n,mu,sigma){ a <- 1/(sigma^2) s <- mu/a dat <- matrix(rgamma(n*S,shape=a,scale=s),ncol=n,byrow=T) # # # # # # # # # Alternative loop dat <- NULL for (i in 1:S){ Y <- rgamma(n,shape=a,scale=s) dat <- rbind(dat,Y) } out <- list(dat=dat) return(out) } # # # function to generate S data sets of size n from a t distribution with df degrees of freedom centered at the value mu (a t distribution has mean 0 and variance df/(df-2) for df>2) generate.t <- function(S,n,mu,df){ dat <- matrix(mu + rt(n*S,df),ncol=n,byrow=T) # Alternative loop # # # # # # # # dat <- NULL for (i in 1:S){ Y <- mu + rt(n,df) dat <- rbind(dat,Y) } out <- list(dat=dat) return(out) } # set the seed for the simulation set.seed(3) # set number of simulated data sets and sample size S <- 10000 n <- 15 # # generate data --Distribution choices are normal with mu,sigma (rnorm), gamma (rgamma) and student t (rt) # # mu0 is the value of mu under the null hypothesis mu is the actual value for the true distribution of the data # # # if mu0=mu, then we are investigating size of the test if mu0 is different from mu, then we are investigating power of the test at the departure mu from the null hypothesis value mu0 mu0 <- 1 mu <- 1.75 sigma <- sqrt(5/3) # # out <- generate.normal(S,n,mu,sigma) # generate normal samples out <- generate.gamma(S,n,mu,sigma) # generate gamma samples out <- generate.t(S,n,mu,5) # generate t_5 samples # get the sample means from each data set outsampmean <- apply(out$dat,1,mean) # get the estimated standard errors from each data set sampmean.ses <- sqrt(apply(out$dat,1,var)/n) # form the t-statistics for each data set under the null ttests <- (outsampmean-mu0)/sampmean.ses # critical value for test with 0.05 significance level t05 <- qt(0.975,n-1) power <- sum(abs(ttests)>t05)/S