Standard error of slope coefficient estimate: A simulation

advertisement
Standard error of slope coefficient estimate:
A simulation demonstration for a simple linear model.
πœŽΜ‚ 2 = π‘ πœ€2 =
∑(𝑦𝑖 − 𝑦̂)2 𝑆𝑆 π‘Ÿπ‘’π‘ π‘–π‘‘π‘’π‘Žπ‘™π‘ 
=
𝑛−2
𝑑. 𝑓.
𝑆𝐸𝛽̂ = πœŽΜ‚√π‘‘π‘–π‘Žπ‘”[(𝑋 𝑇 𝑋)−1 ]
For the slope in simple linear least squares this is:
πœŽΜ‚π›½Μ‚1 = πœŽΜ‚πœ€ √
1
𝑠π‘₯π‘₯
where 𝑠π‘₯π‘₯ = ∑(π‘₯𝑖 − π‘₯Μ… )2
> ########## A simulation demonstration of
> ########## the standard error of the slope
> ########## coefficient from a simple linear model.
> set.seed(510)
> par(mfrow=c(3,1)) # graphs as 3 rows, 1 column per page
> beta0 <- 4
# intercept
> beta1 <- 1.5
# slope
> x <- seq(from=1, to=8, by=.25) # x data to be used
> n <- length(x)
> # Create data from theory perfect model
> y <- beta0 + beta1*x + rnorm(n,mean=0,sd=5) # sd of error is 5
> plot(x,y, main="A single dataset")
>
> fit.lm <- lm( y ~ x )
> fit.summary <- summary( fit.lm )
> fit.summary # Note the standard error for slope
Call:
lm(formula = y ~ x)
Residuals:
Min
1Q
-11.8529 -2.9665
Median
-0.2306
3Q
2.5988
Max
17.8405
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept)
4.8708
2.6034
1.871
0.0722 .
x
1.3895
0.5246
2.649
0.0133 *
--Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 5.909 on 27 degrees of freedom
Multiple R-squared: 0.2062,
Adjusted R-squared: 0.1768
F-statistic: 7.015 on 1 and 27 DF, p-value: 0.01334
> X <- cbind(1,x); 5*sqrt( diag( solve( t(X)%*%X ) )[2])
for slope
x
0.4438968
> abline( fit.lm )
# Theoretical SD
1
> names( fit.summary )
[1] "call"
"terms"
"residuals"
"coefficients"
[5] "aliased"
"sigma"
"df"
"r.squared"
[9] "adj.r.squared" "fstatistic"
"cov.unscaled"
> fit.summary$coef
Estimate Std. Error t value
Pr(>|t|)
(Intercept) 4.870752 2.6033769 1.870936 0.07222911
x
1.389484 0.5246249 2.648528 0.01333858
> clas( fit.summary$coef ) # fit.summary$coef is an object of class matrix
Error: could not find function "clas"
> colnames( fit.summary$coef )
# FYI: Matrices can be given row and column
names
[1] "Estimate"
"Std. Error" "t value"
"Pr(>|t|)"
> fit.summary$coef[,"Estimate"] # Can use row or column names
(Intercept)
x
4.870752
1.389484
> # fit.summary$coef[,1] is the equivalent
>
> # Do N datasets and estimate slope for each simulation
> N <- 1000
> plot( x=c(0,8), y=c(-5,30), type="n", xlab="X", ylab="Y" )
> title( main="Regression lines for first 20 simulated datasets")
> title( main=expression(paste(beta[0]==4,", ",beta[1]==1.5," and
",sigma==5)), line=1)
>
> simBeta1Hat <- rep(NA,N)
> for( i in 1:N )
+ {
+
ysim <- beta0 + beta1*x + rnorm(n,mean=0,sd=5) # simulated data
+
fit.sim <- lm( ysim~x )
+
if (i<=20) abline(fit.sim)
# Graph regression lines for first 20
+
simBeta1Hat[i] <- fit.sim$coef[2]
+ }
> # For colors see http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf
> abline(a=beta0, b=beta1, col="mediumvioletred")
> hist(simBeta1Hat, main=paste("Estimates of slope from",N,"simulations"),
+
xlab=expression(hat(beta)[1]) )
> title(sub=paste("SD=",signif(sd(simBeta1Hat), digits=3),
+
" Mean=",signif(mean(simBeta1Hat),3))
)
2
########## A simulation demonstration of
########## the standard error of the slope
########## coefficient from a simple linear model.
set.seed(510)
par(mfrow=c(3,1)) # graphs as 3 rows, 1 column per page
beta0 <- 4
# intercept
beta1 <- 1.5
# slope
x <- seq(from=1, to=8, by=.25) # x data to be used
n <- length(x)
# Create data from theory perfect model
y <- beta0 + beta1*x + rnorm(n,mean=0,sd=5) # sd of error is 5
plot(x,y, main="A single dataset")
fit.lm <- lm( y ~ x )
fit.summary <- summary( fit.lm )
3
fit.summary # Note the standard error for slope
X <- cbind(1,x); 5*sqrt( diag( solve( t(X)%*%X ) )[2]) # Theoretical SD for slope
abline( fit.lm )
names( fit.summary )
fit.summary$coef
clas( fit.summary$coef ) # fit.summary$coef is an object of class matrix
colnames( fit.summary$coef )
# FYI: Matrices can be given row and column names
fit.summary$coef[,"Estimate"] # Can use row or column names
# fit.summary$coef[,1] is the equivalent
# Do N datasets and estimate slope for each simulation
N <- 1000
plot( x=c(0,8), y=c(-5,30), type="n", xlab="X", ylab="Y" )
title( main="Regression lines for first 20 simulated datasets")
title( main=expression(paste(beta[0]==4,", ",beta[1]==1.5," and ",sigma==5)), line=1)
simBeta1Hat <- rep(NA,N)
for( i in 1:N )
{
ysim <- beta0 + beta1*x + rnorm(n,mean=0,sd=5) # simulated data
fit.sim <- lm( ysim~x )
if (i<=20) abline(fit.sim)
# Graph regression lines for first 20
simBeta1Hat[i] <- fit.sim$coef[2]
}
# For colors see http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf
abline(a=beta0, b=beta1, col="mediumvioletred")
hist(simBeta1Hat, main=paste("Estimates of slope from",N,"simulations"),
xlab=expression(hat(beta)[1]) )
title(sub=paste("SD=",signif(sd(simBeta1Hat), digits=3),
" Mean=",signif(mean(simBeta1Hat),3))
)
4
Download