Simple Linear Model summary() output

advertisement
Introduction to summary( ) output for a simple linear model
> trillium <- read.table(file="http://users.humboldt.edu/rizzardi/Data.dir/trillium",
+
header=T, skip=9)
> class(trillium) # A data frame, not a matrix
[1] "data.frame"
> head(trillium) # See first few rows of trillium
leaf stem flower site
1 11.6 3.4
p
4
2 8.2 2.9
w
4
3 12.7 4.6
w
4
4 10.6 4.2
w
4
5 10.1 3.4
p
4
6 13.7 4.7
p
4
> nrow( trillium ) #582 observations
[1] 582
>
> ls()
[1] "trillium"
> leaf # leaf not present because it's hiden in trillium data frame
Error: object 'leaf' not found
> search()
[1] ".GlobalEnv"
"package:stats"
"package:graphics"
[4] "package:grDevices" "package:utils"
"package:datasets"
[7] "package:methods"
"Autoloads"
"package:base"
> attach(trillium) # tell R to look inside trillium data frame
> search()
[1] ".GlobalEnv"
"trillium"
"package:stats"
[4] "package:graphics" "package:grDevices" "package:utils"
[7] "package:datasets" "package:methods"
"Autoloads"
[10] "package:base"
> mean(leaf) # leaf can now be found inside trillium data frame
[1] 12.78847
> # To no longer have trillium in search path do: detach(trillium)
>
> class(leaf)
[1] "numeric"
> class(flower) # categorical data
[1] "factor"
> levels(flower)
[1] "p" "s" "w"
> class(site) # Really it's a factor, but entered as a number
[1] "integer"
> unique(site)
[1] 4 2 3 1
>
> fit1 <- lm( leaf ~ stem, data=trillium )
> # "data=trillium" is superfluous because trillium is already attached
>
> dev.new()
> plot(stem,leaf)
> abline( fit1 )
>
1
> summary( fit1 )
Call:
lm(formula = leaf ~ stem, data = trillium)
Residuals:
Min
1Q
-8.1176 -1.2707
Median
0.0406
3Q
1.2171
Max
8.8456
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.41981
0.29575
25.09
<2e-16 ***
stem
1.16323
0.06155
18.90
<2e-16 ***
--Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.986 on 580 degrees of freedom
Multiple R-squared: 0.3811,
Adjusted R-squared:
0.38
F-statistic: 357.2 on 1 and 580 DF, p-value: < 2.2e-16
> # t-value=t-stat for Ho:beta1=0
> (1.16323 - 0 )/0.06155
[1] 18.89894
>
> # p-value for two-tailed test Ho: beta1=0 vs Ha:beta1!=0
> 2*pt(18.90, df=(582-2), lower.tail=FALSE)
[1] 1.948522e-62
>
> # Suppose we wanted to test Ho: beta1= 1 vs Ha:beta1>1
> pt( (1.16323-1)/0.06155, df=580, lower.tail=F ) #P-val small, so reject Ho
[1] 0.004110392
>
> # 95% CI for beta1
> tmult <- qt( 0.975, df=580 )
> 1.16323 + tmult * 0.06155
[1] 1.284118
> 1.16323 - tmult * 0.06155
[1] 1.042342
>
2
> # Let 1 subscript denote Model 1 which is nested within Model 2
> # Likewise 2 subscript denotes Model 2
> # Let c1=number of coefficients estimated for model 1
> # Let c2=number of coefficients estimated for model 2
> # Let n = number of observations
> # Let RSS denote residual sum of squares
> # F-stat = [(RSS1-RSS2)/(c2-c1)] / [ RSS2/(n-c2) ]
> # If model 2 no improvement over model 1, then
> # F-stat a random variable from F(df1=c2-c1, df2=n-c2)
> RSS1 <- sum( (leaf-mean(leaf))^2 )
> RSS1
[1] 3694.735
> RSS2 <- sum( residuals(fit1)^2 )
> RSS2
[1] 2286.613
> Fstat <- ( (RSS1-RSS2)/(2-1) ) / ( RSS2/(582-2) )
> Fstat
[1] 357.1705
> pf( Fstat, df1=1, df2=580, lower.tail=FALSE)
[1] 1.972571e-62
>
> # Trivia: For simple linear regression: t_{p}^2 = F(1,p)
> 18.89894^2
[1] 357.1699
>
> # Also note for R^2
> 1- ( var(residuals(fit1))/var(leaf) )
[1] 0.3811158
>
#################################################
trillium <- read.table(file="http://users.humboldt.edu/rizzardi/Data.dir/trillium",
header=T, skip=9)
class(trillium) # A data frame, not a matrix
head(trillium) # See first few rows of trillium
nrow( trillium ) #582 observations
ls()
leaf # leaf not present because it's hiden in trillium data frame
search()
attach(trillium) # tell R to look inside trillium data frame
search()
mean(leaf) # leaf can now be found inside trillium data frame
# To no longer have trillium in search path do: detach(trillium)
class(leaf)
class(flower) # categorical data
levels(flower)
class(site) # Really it's a factor, but entered as a number
unique(site)
fit1 <- lm( leaf ~ stem, data=trillium )
# "data=trillium" is superfluous because trillium is already attached
dev.new()
plot(stem,leaf)
abline( fit1 )
3
summary( fit1 )
# t-value=t-stat for Ho:beta1=0
(1.16323 - 0 )/0.06155
# p-value for two-tailed test Ho: beta1=0 vs Ha:beta1!=0
2*pt(18.90, df=(582-2), lower.tail=FALSE)
# Suppose we wanted to test Ho: beta1= 1 vs Ha:beta1>1
pt( (1.16323-1)/0.06155, df=580, lower.tail=F ) #P-val small, so reject Ho
# 95% CI for beta1
tmult <- qt( 0.975, df=580 )
1.16323 + tmult * 0.06155
1.16323 - tmult * 0.06155
# Let 1 subscript denote Model 1 which is nested within Model 2
# Likewise 2 subscript denotes Model 2
# Let c1=number of coefficients estimated for model 1
# Let c2=number of coefficients estimated for model 2
# Let n = number of observations
# Let RSS denote residual sum of squares
# F-stat = [(RSS1-RSS2)/(c2-c1)] / [ RSS2/(n-c2) ]
# If model 2 no improvement over model 1, then
# F-stat a random variable from F(df1=c2-c1, df2=n-c2)
RSS1 <- sum( (leaf-mean(leaf))^2 )
RSS1
RSS2 <- sum( residuals(fit1)^2 )
RSS2
Fstat <- ( (RSS1-RSS2)/(2-1) ) / ( RSS2/(582-2) )
Fstat
pf( Fstat, df1=1, df2=580, lower.tail=FALSE)
# Trivia: For simple linear regression: t_{p}^2 = F(1,p)
18.89894^2
# Also note for R^2
1- ( var(residuals(fit1))/var(leaf) )
4
Download