Introduction to summary( ) output for a simple linear model > trillium <- read.table(file="http://users.humboldt.edu/rizzardi/Data.dir/trillium", + header=T, skip=9) > class(trillium) # A data frame, not a matrix [1] "data.frame" > head(trillium) # See first few rows of trillium leaf stem flower site 1 11.6 3.4 p 4 2 8.2 2.9 w 4 3 12.7 4.6 w 4 4 10.6 4.2 w 4 5 10.1 3.4 p 4 6 13.7 4.7 p 4 > nrow( trillium ) #582 observations [1] 582 > > ls() [1] "trillium" > leaf # leaf not present because it's hiden in trillium data frame Error: object 'leaf' not found > search() [1] ".GlobalEnv" "package:stats" "package:graphics" [4] "package:grDevices" "package:utils" "package:datasets" [7] "package:methods" "Autoloads" "package:base" > attach(trillium) # tell R to look inside trillium data frame > search() [1] ".GlobalEnv" "trillium" "package:stats" [4] "package:graphics" "package:grDevices" "package:utils" [7] "package:datasets" "package:methods" "Autoloads" [10] "package:base" > mean(leaf) # leaf can now be found inside trillium data frame [1] 12.78847 > # To no longer have trillium in search path do: detach(trillium) > > class(leaf) [1] "numeric" > class(flower) # categorical data [1] "factor" > levels(flower) [1] "p" "s" "w" > class(site) # Really it's a factor, but entered as a number [1] "integer" > unique(site) [1] 4 2 3 1 > > fit1 <- lm( leaf ~ stem, data=trillium ) > # "data=trillium" is superfluous because trillium is already attached > > dev.new() > plot(stem,leaf) > abline( fit1 ) > 1 > summary( fit1 ) Call: lm(formula = leaf ~ stem, data = trillium) Residuals: Min 1Q -8.1176 -1.2707 Median 0.0406 3Q 1.2171 Max 8.8456 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 7.41981 0.29575 25.09 <2e-16 *** stem 1.16323 0.06155 18.90 <2e-16 *** --Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 1.986 on 580 degrees of freedom Multiple R-squared: 0.3811, Adjusted R-squared: 0.38 F-statistic: 357.2 on 1 and 580 DF, p-value: < 2.2e-16 > # t-value=t-stat for Ho:beta1=0 > (1.16323 - 0 )/0.06155 [1] 18.89894 > > # p-value for two-tailed test Ho: beta1=0 vs Ha:beta1!=0 > 2*pt(18.90, df=(582-2), lower.tail=FALSE) [1] 1.948522e-62 > > # Suppose we wanted to test Ho: beta1= 1 vs Ha:beta1>1 > pt( (1.16323-1)/0.06155, df=580, lower.tail=F ) #P-val small, so reject Ho [1] 0.004110392 > > # 95% CI for beta1 > tmult <- qt( 0.975, df=580 ) > 1.16323 + tmult * 0.06155 [1] 1.284118 > 1.16323 - tmult * 0.06155 [1] 1.042342 > 2 > # Let 1 subscript denote Model 1 which is nested within Model 2 > # Likewise 2 subscript denotes Model 2 > # Let c1=number of coefficients estimated for model 1 > # Let c2=number of coefficients estimated for model 2 > # Let n = number of observations > # Let RSS denote residual sum of squares > # F-stat = [(RSS1-RSS2)/(c2-c1)] / [ RSS2/(n-c2) ] > # If model 2 no improvement over model 1, then > # F-stat a random variable from F(df1=c2-c1, df2=n-c2) > RSS1 <- sum( (leaf-mean(leaf))^2 ) > RSS1 [1] 3694.735 > RSS2 <- sum( residuals(fit1)^2 ) > RSS2 [1] 2286.613 > Fstat <- ( (RSS1-RSS2)/(2-1) ) / ( RSS2/(582-2) ) > Fstat [1] 357.1705 > pf( Fstat, df1=1, df2=580, lower.tail=FALSE) [1] 1.972571e-62 > > # Trivia: For simple linear regression: t_{p}^2 = F(1,p) > 18.89894^2 [1] 357.1699 > > # Also note for R^2 > 1- ( var(residuals(fit1))/var(leaf) ) [1] 0.3811158 > ################################################# trillium <- read.table(file="http://users.humboldt.edu/rizzardi/Data.dir/trillium", header=T, skip=9) class(trillium) # A data frame, not a matrix head(trillium) # See first few rows of trillium nrow( trillium ) #582 observations ls() leaf # leaf not present because it's hiden in trillium data frame search() attach(trillium) # tell R to look inside trillium data frame search() mean(leaf) # leaf can now be found inside trillium data frame # To no longer have trillium in search path do: detach(trillium) class(leaf) class(flower) # categorical data levels(flower) class(site) # Really it's a factor, but entered as a number unique(site) fit1 <- lm( leaf ~ stem, data=trillium ) # "data=trillium" is superfluous because trillium is already attached dev.new() plot(stem,leaf) abline( fit1 ) 3 summary( fit1 ) # t-value=t-stat for Ho:beta1=0 (1.16323 - 0 )/0.06155 # p-value for two-tailed test Ho: beta1=0 vs Ha:beta1!=0 2*pt(18.90, df=(582-2), lower.tail=FALSE) # Suppose we wanted to test Ho: beta1= 1 vs Ha:beta1>1 pt( (1.16323-1)/0.06155, df=580, lower.tail=F ) #P-val small, so reject Ho # 95% CI for beta1 tmult <- qt( 0.975, df=580 ) 1.16323 + tmult * 0.06155 1.16323 - tmult * 0.06155 # Let 1 subscript denote Model 1 which is nested within Model 2 # Likewise 2 subscript denotes Model 2 # Let c1=number of coefficients estimated for model 1 # Let c2=number of coefficients estimated for model 2 # Let n = number of observations # Let RSS denote residual sum of squares # F-stat = [(RSS1-RSS2)/(c2-c1)] / [ RSS2/(n-c2) ] # If model 2 no improvement over model 1, then # F-stat a random variable from F(df1=c2-c1, df2=n-c2) RSS1 <- sum( (leaf-mean(leaf))^2 ) RSS1 RSS2 <- sum( residuals(fit1)^2 ) RSS2 Fstat <- ( (RSS1-RSS2)/(2-1) ) / ( RSS2/(582-2) ) Fstat pf( Fstat, df1=1, df2=580, lower.tail=FALSE) # Trivia: For simple linear regression: t_{p}^2 = F(1,p) 18.89894^2 # Also note for R^2 1- ( var(residuals(fit1))/var(leaf) ) 4