Standard error of slope coefficient estimate: A simulation demonstration for a simple linear model. πΜ 2 = π π2 = ∑(π¦π − π¦Μ)2 ππ πππ πππ’πππ = π−2 π. π. ππΈπ½Μ = πΜ√ππππ[(π π π)−1 ] For the slope in simple linear least squares this is: πΜπ½Μ1 = πΜπ √ 1 π π₯π₯ where π π₯π₯ = ∑(π₯π − π₯Μ )2 > ########## A simulation demonstration of > ########## the standard error of the slope > ########## coefficient from a simple linear model. > set.seed(510) > par(mfrow=c(3,1)) # graphs as 3 rows, 1 column per page > beta0 <- 4 # intercept > beta1 <- 1.5 # slope > x <- seq(from=1, to=8, by=.25) # x data to be used > n <- length(x) > # Create data from theory perfect model > y <- beta0 + beta1*x + rnorm(n,mean=0,sd=5) # sd of error is 5 > plot(x,y, main="A single dataset") > > fit.lm <- lm( y ~ x ) > fit.summary <- summary( fit.lm ) > fit.summary # Note the standard error for slope Call: lm(formula = y ~ x) Residuals: Min 1Q -11.8529 -2.9665 Median -0.2306 3Q 2.5988 Max 17.8405 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 4.8708 2.6034 1.871 0.0722 . x 1.3895 0.5246 2.649 0.0133 * --Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 5.909 on 27 degrees of freedom Multiple R-squared: 0.2062, Adjusted R-squared: 0.1768 F-statistic: 7.015 on 1 and 27 DF, p-value: 0.01334 > X <- cbind(1,x); 5*sqrt( diag( solve( t(X)%*%X ) )[2]) for slope x 0.4438968 > abline( fit.lm ) # Theoretical SD 1 > names( fit.summary ) [1] "call" "terms" "residuals" "coefficients" [5] "aliased" "sigma" "df" "r.squared" [9] "adj.r.squared" "fstatistic" "cov.unscaled" > fit.summary$coef Estimate Std. Error t value Pr(>|t|) (Intercept) 4.870752 2.6033769 1.870936 0.07222911 x 1.389484 0.5246249 2.648528 0.01333858 > clas( fit.summary$coef ) # fit.summary$coef is an object of class matrix Error: could not find function "clas" > colnames( fit.summary$coef ) # FYI: Matrices can be given row and column names [1] "Estimate" "Std. Error" "t value" "Pr(>|t|)" > fit.summary$coef[,"Estimate"] # Can use row or column names (Intercept) x 4.870752 1.389484 > # fit.summary$coef[,1] is the equivalent > > # Do N datasets and estimate slope for each simulation > N <- 1000 > plot( x=c(0,8), y=c(-5,30), type="n", xlab="X", ylab="Y" ) > title( main="Regression lines for first 20 simulated datasets") > title( main=expression(paste(beta[0]==4,", ",beta[1]==1.5," and ",sigma==5)), line=1) > > simBeta1Hat <- rep(NA,N) > for( i in 1:N ) + { + ysim <- beta0 + beta1*x + rnorm(n,mean=0,sd=5) # simulated data + fit.sim <- lm( ysim~x ) + if (i<=20) abline(fit.sim) # Graph regression lines for first 20 + simBeta1Hat[i] <- fit.sim$coef[2] + } > # For colors see http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf > abline(a=beta0, b=beta1, col="mediumvioletred") > hist(simBeta1Hat, main=paste("Estimates of slope from",N,"simulations"), + xlab=expression(hat(beta)[1]) ) > title(sub=paste("SD=",signif(sd(simBeta1Hat), digits=3), + " Mean=",signif(mean(simBeta1Hat),3)) ) 2 ########## A simulation demonstration of ########## the standard error of the slope ########## coefficient from a simple linear model. set.seed(510) par(mfrow=c(3,1)) # graphs as 3 rows, 1 column per page beta0 <- 4 # intercept beta1 <- 1.5 # slope x <- seq(from=1, to=8, by=.25) # x data to be used n <- length(x) # Create data from theory perfect model y <- beta0 + beta1*x + rnorm(n,mean=0,sd=5) # sd of error is 5 plot(x,y, main="A single dataset") fit.lm <- lm( y ~ x ) fit.summary <- summary( fit.lm ) 3 fit.summary # Note the standard error for slope X <- cbind(1,x); 5*sqrt( diag( solve( t(X)%*%X ) )[2]) # Theoretical SD for slope abline( fit.lm ) names( fit.summary ) fit.summary$coef clas( fit.summary$coef ) # fit.summary$coef is an object of class matrix colnames( fit.summary$coef ) # FYI: Matrices can be given row and column names fit.summary$coef[,"Estimate"] # Can use row or column names # fit.summary$coef[,1] is the equivalent # Do N datasets and estimate slope for each simulation N <- 1000 plot( x=c(0,8), y=c(-5,30), type="n", xlab="X", ylab="Y" ) title( main="Regression lines for first 20 simulated datasets") title( main=expression(paste(beta[0]==4,", ",beta[1]==1.5," and ",sigma==5)), line=1) simBeta1Hat <- rep(NA,N) for( i in 1:N ) { ysim <- beta0 + beta1*x + rnorm(n,mean=0,sd=5) # simulated data fit.sim <- lm( ysim~x ) if (i<=20) abline(fit.sim) # Graph regression lines for first 20 simBeta1Hat[i] <- fit.sim$coef[2] } # For colors see http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf abline(a=beta0, b=beta1, col="mediumvioletred") hist(simBeta1Hat, main=paste("Estimates of slope from",N,"simulations"), xlab=expression(hat(beta)[1]) ) title(sub=paste("SD=",signif(sd(simBeta1Hat), digits=3), " Mean=",signif(mean(simBeta1Hat),3)) ) 4