January 29 R code: Simple Linear Regression Demo

advertisement
>
>
>
>
>
>
+
>
>
>
>
>
>
>
##########################
# A simple demonstration of solving least-squares regression
# using R and matrix algebra
##########################
# Create a blank plot to get the right framing
plot(c(0,6),c(0,10), type="n", xlab="X", ylab="Y",
main="Simple linear regression" )
# The fake data
Y <- c(4,5,9)
x1 <- c(2,4,5)
points( x1, Y )
# Least-squares regression in R, lm=linear model
fitA <- lm( Y ~ x1 )
fitA
Call:
lm(formula = Y ~ x1)
Coefficients:
(Intercept)
0.5
x1
1.5
> names( fitA )
[1] "coefficients" "residuals"
"effects"
"rank"
[5] "fitted.values" "assign"
"qr"
"df.residual"
[9] "xlevels"
"call"
"terms"
"model"
> # abline() draws a straight line, many ways to enter input for function
> abline( fitA )
> # coefficients
> fitA$coef
(Intercept)
x1
0.5
1.5
> # predicted values
> Yhat <- predict(fitA)
> Yhat
1
2
3
3.5 6.5 8.0
> # Place predicted values on graph
> points( x1, Yhat, pch=8 )
> # Draw residuals
> segments( x1,Y, x1,Yhat, lty=2 )
> # see help(legend)
> legend(0,8, legend=c("Observed values","Prediction line",
+
"Predicted values","Residual"),
+
pch=c(1,-1,8,-1), lty=c(-1,1,-1,2) )
1
>
> # residuals from lm() object
> residuals(fitA)
1
2
3
0.5 -1.5 1.0
> # Doing it the longer way
> Y - Yhat
1
2
3
0.5 -1.5 1.0
>
> # Residual Sum of Squares (RSS)
> # The RSS is what was minimized by the coefficients
> sum( residuals(fitA)^2 )
[1] 3.5
>
2
>
>
>
>
# Use matrix algebra to do the same thing
# cbind() = column bind, rep()=repeat
X <- cbind( rep(1,3), x1 )
X
x1
[1,] 1 2
[2,] 1 4
[3,] 1 5
> # Show how with matrix algebra you can get predicted
> # y values if you have the coefficients
> # %*% is matrix multiplication
> coef( fitA )
(Intercept)
x1
0.5
1.5
> X %*% coef(fitA)
[,1]
[1,] 3.5
[2,] 6.5
[3,] 8.0
> predict(fitA)
1
2
3
3.5 6.5 8.0
> 1*.5 + 2*1.5
[1] 3.5
> 1*.5 + 4*1.5
[1] 6.5
> 1*.5 + 4*1.5
[1] 6.5
>
> # transpose a matrix
> t(X)
[,1] [,2] [,3]
1
1
1
x1
2
4
5
>
> XtX <- t(X) %*% X
> XtX
x1
3 11
x1 11 45
3
> # get the inverse of a matrix
> XtX.inv <- solve( XtX )
> XtX.inv
x1
3.2142857 -0.7857143
x1 -0.7857143 0.2142857
> # Demonstrate an inverse of a matrix times the matrix is I
> # I is usually called the "identity matrix
> XtX.inv %*% XtX
x1
1 0
x1 0 1
>
> # Do "by hand"
> 3.2142857*3 + -0.7857143*11 # =1
[1] 0.9999998
> 3.2142857*11 + -0.7857143*45 # =0
[1] -8e-07
> -0.7857143*3 + 0.2142857*11 # =0
[1] -2e-07
> -0.7857143*11 + 0.2142857*45 # =1
[1] 0.9999992
>
> # get the estimated coefficents using matrix algebra
> XtX.inv %*% t(X) %*% Y
[,1]
0.5
x1 1.5
> fitA$coef
(Intercept)
x1
0.5
1.5
####################################################################
##########################
# A simple demonstration of solving least-squares regression
# using R and matrix algebra
##########################
# Create a blank plot to get the right framing
plot(c(0,6),c(0,10), type="n", xlab="X", ylab="Y",
main="Simple linear regression" )
# The fake data
Y <- c(4,5,9)
x1 <- c(2,4,5)
points( x1, Y )
# Least-squares regression in R, lm=linear model
fitA <- lm( Y ~ x1 )
fitA
names( fitA )
# abline() draws a straight line, many ways to enter input for function
abline( fitA )
# coefficients
fitA$coef
# predicted values
Yhat <- predict(fitA)
Yhat
4
# Place predicted values on graph
points( x1, Yhat, pch=8 )
# Draw residuals
segments( x1,Y, x1,Yhat, lty=2 )
# see help(legend)
legend(0,8, legend=c("Observed values","Prediction line",
"Predicted values","Residual"),
pch=c(1,-1,8,-1), lty=c(-1,1,-1,2) )
# residuals from lm() object
residuals(fitA)
# Doing it the longer way
Y - Yhat
# Residual Sum of Squares (RSS)
# The RSS is what was minimized by the coefficients
sum( residuals(fitA)^2 )
# Use matrix algebra to do the same thing
# cbind() = column bind, rep()=repeat
X <- cbind( rep(1,3), x1 )
X
# Show how with matrix algebra you can get predicted
# y values if you have the coefficients
# %*% is matrix multiplication
coef( fitA )
X %*% coef(fitA)
predict(fitA)
1*.5 + 2*1.5
1*.5 + 4*1.5
1*.5 + 4*1.5
# transpose a matrix
t(X)
XtX <- t(X) %*% X
XtX
# get the inverse of a matrix
XtX.inv <- solve( XtX )
XtX.inv
# Demonstrate an inverse of a matrix times the matrix is I
# I is usually called the "identity matrix
XtX.inv %*% XtX
# Do "by hand"
3.2142857*3 + -0.7857143*11 # =1
3.2142857*11 + -0.7857143*45 # =0
-0.7857143*3 + 0.2142857*11 # =0
-0.7857143*11 + 0.2142857*45 # =1
# get the estimated coefficents using matrix algebra
XtX.inv %*% t(X) %*% Y
fitA$coef
5
Download