Assignment 2 due March 14 2006. HW 2 sample of simulation

advertisement
############################################################
#
# Simulation to investigate size and power of the t-test
#
############################################################
#
function to view the first k lines of a data frame
view <- function(dat,k){
message <- paste("First",k,"rows")
krows <- dat[1:k,]
cat(message,"\n","\n")
print(krows)
}
#
#
function to generate S data sets of size n from normal
distribution with mean mu and variance sigma^2
generate.normal <- function(S,n,mu,sigma){
dat <- matrix(rnorm(n*S,mu,sigma),ncol=n,byrow=T)
#
#
#
#
#
#
Note: for this very simple data generation, we can get the data
in one step like this, which requires no looping. In more complex
statistical models, looping is often required to set up each
data set, because the scenario is much more complicated. Here is
a loop to get the same data as above; try running the program and see
how much longer it takes!
#
#
#
#
#
#
#
#
dat <- NULL
for (i in 1:S){
Y <- rnorm(n,mu,sigma)
dat <- rbind(dat,Y)
}
out <- list(dat=dat)
return(out)
}
#
#
function to generate S data sets of size n from gamma
distribution with mean mu, variance sigma^2 mu^2
generate.gamma <- function(S,n,mu,sigma){
a <- 1/(sigma^2)
s <- mu/a
dat <- matrix(rgamma(n*S,shape=a,scale=s),ncol=n,byrow=T)
#
#
#
#
#
#
#
#
#
Alternative loop
dat <- NULL
for (i in 1:S){
Y <- rgamma(n,shape=a,scale=s)
dat <- rbind(dat,Y)
}
out <- list(dat=dat)
return(out)
}
#
#
#
function to generate S data sets of size n from a t distribution
with df degrees of freedom centered at the value mu (a t distribution
has mean 0 and variance df/(df-2) for df>2)
generate.t <- function(S,n,mu,df){
dat <- matrix(mu + rt(n*S,df),ncol=n,byrow=T)
#
Alternative loop
#
#
#
#
#
#
#
#
dat <- NULL
for (i in 1:S){
Y <- mu + rt(n,df)
dat <- rbind(dat,Y)
}
out <- list(dat=dat)
return(out)
}
#
set the seed for the simulation
set.seed(3)
#
set number of simulated data sets and sample size
S <- 10000
n <- 15
#
#
generate data --Distribution choices are normal with mu,sigma
(rnorm), gamma (rgamma) and student t (rt)
#
#
mu0 is the value of mu under the null hypothesis
mu is the actual value for the true distribution of the data
#
#
#
if mu0=mu, then we are investigating size of the test
if mu0 is different from mu, then we are investigating power
of the test at the departure mu from the null hypothesis value mu0
mu0 <- 1
mu <- 1.75
sigma <- sqrt(5/3)
#
#
out <- generate.normal(S,n,mu,sigma) # generate normal samples
out <- generate.gamma(S,n,mu,sigma) # generate gamma samples
out <- generate.t(S,n,mu,5)
# generate t_5 samples
#
get the sample means from each data set
outsampmean <- apply(out$dat,1,mean)
#
get the estimated standard errors from each data set
sampmean.ses <- sqrt(apply(out$dat,1,var)/n)
#
form the t-statistics for each data set under the null
ttests <- (outsampmean-mu0)/sampmean.ses
#
critical value for test with 0.05 significance level
t05 <- qt(0.975,n-1)
power <- sum(abs(ttests)>t05)/S
Download