# Script 7.1 - First example source("C:/R/functions.txt") library(car)
# Reading the data chap7.ex1 <- read.table("c:/rbook/chap6.ex1.txt", header = TRUE) attach(chap7.ex1)
# Drawing Figure 7.5 plot(jitter(wkdaded, factor = 1), jitter(wkmomed, factor = 1),
pch = 20, xlim = c(0, 10), ylim = c(0, 10))
# Drawing Figure 7.6 plot(jitter(wkdaded,factor=1),jitter(wkmomed,factor=1),pch=20,xlim=c(0,10),ylim=c(0,10
)) abline(3.0,.5) abline(0,1) text(2,7, "Line 1") text(8,4, "Line 2") arrows(1.9, 6.2, 1.9, 4, code = 2, length = .15, angle = 20) arrows(8, 4.5, 8, 6.7, code = 2, length = .15, angle = 20)
# Finding the regression equation reg <- lm(wkmomed~wkdaded) reg
# Adding the predicted values and residuals to the data frame chap7.ex1$pred <- fitted(reg) chap7.ex1$res <- resid(reg) names(chap7.ex1) attach(chap7.ex1) pred res
# Looking at the intercorrelations cor.mat <- cov2cor(cov(chap7.ex1)) print(cor.mat, digits = 3)
# Reversing the IV and DV reg.rev <- lm(wkdaded~wkmomed) reg.rev
# Plotting the confidence and prediction intervals: Figure 7.10 interval.frame <- data.frame(wkdaded = 0:10) pp <- predict(reg, int = "p", newdata = interval.frame) pc <- predict(reg, int = "c", newdata = interval.frame) plot(jitter(wkdaded), jitter(wkmomed), ylim = c(0, 10),
xlim = c(0, 10), pch = 16) pred.wkmomed <- interval.frame$wkdaded matlines(pred.wkmomed, pc, lty = c(1, 2, 2), col = "black") matlines(pred.wkmomed, pp, lty = c(1, 3, 3), col = "black") chap7.ex1$z.wkmomed <- scale(wkmomed) chap7.ex1$z.wkdaded <- scale(wkdaded) attach(chap7.ex1) z.reg <- lm(z.wkmomed~z.wkdaded) z.reg
# Script 7.2 - Plotting Figure 7.7, the SSE as a function of intercept and slope
# Clean out workspace
1
rm(list = ls()) library(MASS) library(lattice) errors <- read.table("c:/rbook/sse.txt", header = TRUE) names(errors) attach(errors) wireframe(sse ~ b0*b1, scales = list(arrows=FALSE,
cex = .45,
font = 3, tck = 1),
screen = list(z = -245, x = -50, y = 0),shade = FALSE, aspect = c(87/87, 1.0))
# Script 7.3 - Complete example with Males and Females
# Clear out workspace rm(list = ls()) library(car) source("C:/R/functions.txt")
# Read in the data ecls200 <- read.table("c:/rbook/ecls200.txt", header = TRUE) attach(ecls200) names(ecls200)
# Create factor variable with labels ecls200$f.gender <- factor(gender, levels = 1:2, labels =
c("Male", "Female"), ordered = TRUE)
# Create separate data frames by gender ecls200.female <- subset(ecls200, f.gender == "Female") ecls200.male <- subset(ecls200, f.gender == "Male") attach(ecls200.male) attach(ecls200.female) attach(ecls200)
# Looking at General Knowledge measured Fall-K tapply(c1rgscal, f.gender, length) tapply(c1rgscal, f.gender, table) tapply(c1rgscal, f.gender, summary) tapply(c1rgscal, f.gender, sd) tapply(c1rgscal, f.gender, skewness) tapply(c1rgscal, f.gender, SEsk) tapply(c1rgscal, f.gender, kurtosis) tapply(c1rgscal, f.gender, SEku) boxplot(c1rgscal ~ f.gender, ylab = "Fall-K General Knowledge Scores")
# Looking at Reading Scores measured Spring-1 tapply(c4rrscal, f.gender, length) tapply(c4rrscal, f.gender, table) tapply(c4rrscal, f.gender, summary) tapply(c4rrscal, f.gender, sd) tapply(c4rrscal, f.gender, skewness) tapply(c4rrscal, f.gender, SEsk) tapply(c4rrscal, f.gender, kurtosis) tapply(c4rrscal, f.gender, SEku) boxplot(c4rrscal ~ f.gender, ylab = "Spring-1 Reading Scores")
# Produce a scatterplot with subgroups
# Set plotting character to 1 for all cases pchval <- rep(1, length = length(f.gender))
# Set plotting character to 18 for Males pchval[f.gender == "Male"] <- 18
2
plot(jitter(c1rgscal, factor = 1), jitter(c4rrscal, factor = 1),
pch = pchval) legend(32, 28, c("Male", "Female"), pch = c(18, 1)) legend(32, 40, c("Male", "Female"), lty = c(1, 2)) with(ecls200.male,abline(lm(c4rrscal ~ c1rgscal), lty = 1)) with(ecls200.female,abline(lm(c4rrscal~c1rgscal), lty = 2)) attach(ecls200.female) mod.female <- lm (c4rrscal ~ c1rgscal) mod.female ecls200.female$res <- resid(mod.female) attach(ecls200.female) ecls200.female$z.res <- scale(res) attach(ecls200.female) range(z.res)
# Look at residuals) plot(z.res, ylim = c(-5, 5)) abline(h=2.5, lty = 2) abline(h= - 2.5, lty = 2) which(z.res < -2) which(z.res > 2)
# Look at Cook's D
# cook.d() function updated to cooks.distance() denom <- length(res) - 2 plot(cooks.distance(mod.female)) abline(h = 4/denom, lty = 2) which(cooks.distance(mod.female) > (4/denom)) detach(ecls200.female) attach(ecls200.male) mod.male <- lm (c4rrscal ~ c1rgscal) mod.male ecls200.male$res <- resid(mod.male) attach(ecls200.male) ecls200.male$z.res <- scale(res) attach(ecls200.male) range(z.res)
# Look at residuals) plot(z.res, ylim = c(-5, 5)) abline(h = 2.5, lty = 2) abline(h = - 2.5, lty = 2) which(z.res < -2) which(z.res > 2)
# Look at Cook's D denom <- length(res) - 2 plot(cooks.distance(mod.male)) abline(h=4/denom, lty = 2) which(cooks.distance(mod.male) > (4/denom)) detach(ecls200.male)
# Looking at the two groups as one group attach(ecls200) mod.ecls200 <- lm (c4rrscal ~ c1rgscal) mod.ecls200 ecls200$res <- resid(mod.ecls200) attach(ecls200) ecls200$z.res <- scale(res) attach(ecls200)
3
range(z.res)
# Look at residuals plot(z.res, ylim = c(-5,5)) abline(h = 2.5, lty = 2) abline(h= - 2.5, lty = 2) which(z.res < -2) which(z.res > 2)
# Look at Cook's D denom <- length(res) - 2 plot(cooks.distance(mod.ecls200)) abline(h = 4/denom, lty = 2) which(cooks.distance(mod.ecls200) > (4/denom))
# Script 7.4 - Solution to Exercises rm(list = ls()) library(car) source("C:/R/functions.txt")
# Read the data from Exercise 6.3 ex7.3 <- read.table("c:/rbook/chap6.ex3.txt", header = TRUE) attach(ex7.3)
# Calculate predicted values ex7.3$yhat <- -5.69 + .0677*iq attach(ex7.3)
# Estimate errors ex7.3$error <- gpa - yhat attach(ex7.3)
# Find squared errors ex7.3$error.sq <- error^2 attach(ex7.3)
# Examining the results ex7.3
# Finding the sums sum(gpa) sum(yhat) sum(error) sum(error.sq)
# Using lm function in R to do analysis model <- lm(gpa ~ iq) summary(model) anova(model)
4