R Scripts for Chapter 15

advertisement
R Scripts for Chapter for Chapter 15
# Script 15.1 - Simulating the F-distribution
# Rerun with changes in n1 and n2
# Clean out workspace
rm(list = ls())
# Suppress levels of significance
options(show.signif.stars = FALSE)
library(car)
library(MASS)
source("C:/R/functions.txt")
n1 <- 5
n2 <- 10
N <- 100000
var1 <- numeric(N)
var2 <- numeric(N)
stat5.10 <- numeric(N)
for (i in 1:N) {
sam1 <- rnorm(n1)
var1[i] <- var(sam1)
sam2 <- rnorm(n2)
var2[i] <- var(sam2)
stat5.10[i] <- var1[i]/var2[i]
}
mean(var1)
mean(var2)
min(stat5.10)
median(stat5.10)
mean(stat5.10)
max(stat5.10)
var.pop(stat5.10)
skewness(stat5.10)
kurtosis(stat5.10)
hist(stat5.10, prob=TRUE, breaks = 60, ylim = c(0,.8),
main = "Sample Sizes (5,10)")
lines(density(stat5.10), lwd = 2)
# Script 15.2 - Expected value and variance of the F-distribution
# Rerun with changes in n1 and n2
# Clean out workspace
rm(list = ls())
# Suppress levels of significance
options(show.signif.stars=FALSE)
library(car)
source("C:/R/functions.txt")
n1 <- 15
n2 <- 30
v1 <- n1 - 1
v2 <- n2 - 1
ev <- v2/(v2-2)
var <- (2*v2^2*(v1 + v2 - 2))/(v1*(v2 - 2)^2*(v2 - 4))
ev; var
# Script 15.3 - Depicting F-Distributions
nval <- pretty(c(0,15), 1000)
plot(nval,df(nval, df1 = 4, df2 = 9), type = "l", lty = 1, lwd = 1,
xlim = c(0,8), ylim = c(0,1.0), xlab = "Values of F", ylab = "Density")
title(main = "Three Different F-Distributions")
lines(nval, df(nval, df1 = 9, df2 = 19), type = "l", lty = 2, lwd = 1)
lines(nval, df(nval, df1 = 14, df2 = 29), type = "l", lty = 3, lwd = 1)
abline(h = 0)
legend(4, .95, cex = 1.0, legend = c("df1 = 4, df2 = 9",
"df1 = 9, df2 = 19", "df1 = 14, df2 = 29"), lty=1:3)
# Script 15.4 - Testing for the Equality of Two Variances
# Clean out workspace
rm(list = ls())
# Suppress levels of significance
options(show.signif.stars = FALSE)
library(car)
source("C:/R/functions.txt")
# Read in the data
white <- c(15,24,21,16,20,29,32,14,24,24,20,27,26,29,33,19,23,23,17,
14,24,37,20,13,22)
asian <- c(12,26,56,11,48,19,17,40,31,12,33,20,34,54,21,24,66,24,31,
33,16,21,27,20,62)
# Find the two variances
var(white)
var(asian)
# Complete the F-test
var.test(white, asian)
# Examine possible nonnormality
skewness(white)
kurtosis(white)
shapiro.test(white)
skewness(asian)
kurtosis(asian)
shapiro.test(asian)
# Build a data frame
nw <- length(white)
na <- length(asian)
g1 <- rep(1, nw)
g2 <- rep(2, na)
read <- c(white, asian)
group <- c(g1, g2)
group <- factor(group)
data.var <- data.frame(read, group)
# Levene's test
leveneTest(read ~ group, data = data.var)
# Fligner-Killen test
fligner.test(read ~ group)
# Script 15.5 - Depicting decision rule for F-test example (Figure 15.3)
nval <- pretty(c(0, 4), 1000)
plot(nval, df(nval, df1 = 24, df2 = 24), type = "l", lty = 1,
lwd = 1, xlim = c(0, 4), ylim= c(0, 1.1), xlab = "Values of F",
ylab = "Density")
title(main = "F-Distribution for 24 and 24 degrees-of freedom")
abline(h = 0)
lines(x = c(.441, .441), y = c(0, df(.441, 24, 24)))
lines(x = c(2.269, 2.269), y= c(0, df(2.269, 24, 24)))
# Shading the upper tail
fu0 <- nval[nval >= 2.269]
fu0 <- c(fu0[1], fu0)
pu0 <- df(nval[nval >= 2.269], 24, 24)
pu0 <- c(0, pu0)
polygon(fu0, pu0, col = "gray")
# Shading the lower tail
fl0 <- nval[nval <= .441]
fl0 <- c(fl0[1], fl0)
pl0 <- df(nval[nval <= .441], 24, 24)
pl0 <- c(pl0, 0)
polygon(fl0,pl0,col="gray")
arrows(.2, 1, .441, 0, length=.1)
text(x = .2, y = 1.05, label = "0.441", cex = .85)
arrows(2.6, 1, 2.269, 0, length = .1)
text(x= 2.6, y = 1.05, label = "2.269", cex = .85)text(x=1.3,y=.1,label="Region of
Non-Rejection",cex=.85)
# Script 15.6 - Levene's test as a t
# Clean out workspace
rm(list = ls())
# Suppress levels of significance
options(show.signif.stars = FALSE)
library(car)
source("C:/R/functions.txt")
# Read in the data
white <- c(15,24,21,16,20,29,32,14,24,24,20,27,26,29,33,19,23,23,17,
14,24,37,20,13,22)
asian <- c(12,26,56,11,48,19,17,40,31,12,33,20,34,54,21,24,66,24,31,
33,16,21,27,20,62)
medw <- median(white)
meda <- median(asian)
devw <- abs(white - medw)
deva <- abs(asian - meda)
t.test(devw, deva, var.equal = TRUE)
# From the output, we note the observed t-statistic is -3.04243
# Squaring the observed t to get the F
(-3.04243)^2
# Script 15.7 - Welch/Satterthwaite approach
# Clean out workspace
rm(list = ls())
#Suppress levels of significance
options(show.signif.stars = FALSE)
library(car)
source("C:/R/functions.txt")
# Define the groups
low <- c(13,13,21,16,22,15,18,20,22,14)
high <- c(40,28,18,32,17,27,19,21,16,30)
t.test(low, high, var.equal = FALSE)
# Script 15.8 Two Independent Correlations
# Clean out workspace
rm(list = ls())
# Suppress levels of significance
options(show.signif.stars = FALSE)
library(car)
source("C:/R/functions.txt")
# If working with summary data, skip down
# Working from raw data in text file
ecls200 <- read.table("c:/rbook/ecls200.txt", header = TRUE)
# Create the two subfiles
ecls200.white <- subset(ecls200, race == "1",
select = c(c1rrscal, c4rmscal))
ecls200.aa <- subset(ecls200, race == "2",
select=c(c1rrscal, c4rmscal))
r1 <- with(ecls200.white, cor(c1rrscal, c4rmscal))
r1
n1 <- with(ecls200.white, length(c1rrscal))
n1
r2 <- with(ecls200.aa, cor(c1rrscal, c4rmscal))
r2
n2 <- with(ecls200.aa, length(c1rrscal))
n2
# If working from summary data, start here...
# Remove the # from each line and substitute your values
# r1 <- ??
# n1 <- ??
# r2 <- ??
# n2 <- ??
# Employ inverse arcsin transformation
r1.prime <- atanh(r1)
r1.prime
r2.prime <- atanh(r2)
r2.prime
# Calculate the test statistic
z.obs <- (r1.prime - r2.prime)/sqrt((1/(n1 - 3)) + (1/(n2 - 3)))
z.obs
# Two tailed p-value
2*(1 - pnorm(abs(z.obs)))
# One tailed p-value, confirm direction
(1 - pnorm(abs(z.obs)))
# Script 15.9 - Comparing two dependent correlations for African Americans
# Clean out the workspace
rm(list = ls())
# If working with summary data, skip down
# Reading raw data
ecls200 <- read.table ("c:/rbook/ecls200.txt", header = TRUE)
attach(ecls200)
names(ecls200)
ecls200.aa <- subset(ecls200, race == "2",
select = c(c1rrscal, c1rmscal, c4rmscal))
attach(ecls200.aa)
n <- length(c1rrscal)
n
rxz <- cor(c1rrscal, c4rmscal)
rxz
ryz <- cor(c1rmscal, c4rmscal)
ryz
rxy <- cor(c1rrscal, c1rmscal)
rxy
# If working with summarized data, start here removing the #
# from the next 12 lines and substituting your values
# n <- 100
# rxz <- .6049761
# ryz <- .6492096
# rxy <- .7433614
# detR <- 1 - rxz*rxz - ryz*ryz - rxy*rxy + 2*rxz*ryz*rxy
# t.obs <- (rxz - ryz)*sqrt(((n - 1)*(1 + rxy))/(2*((n - 1)/
# (n - 3))*detR + (rxz + ryz)^2/4 *(1 - rxy)^3))
# t.obs
# # Two tailed p-value
# 2*(1-pt(abs(t.obs),n-3))
# # One tailed p-value, confirm direction
# (1-pt(abs(t.obs),n-3))
# Doing the test with R
mat <- cbind(c1rrscal, c1rmscal, c4rmscal)
r.mat <- cor(mat)
r.mat
detR <- det(r.mat)
t.obs <- (rxz - ryz)*sqrt(((n - 1)*(1 + rxy))/(2*((n - 1)/
(n - 3))*detR + (rxz + ryz)^2/4 *(1 - rxy)^3))
t.obs
# Two tailed p-value
2*(1-pt(abs(t.obs),n-3))
# One tailed p-value, confirm direction
(1-pt(abs(t.obs),n-3))
# Script 15.10 - Two Independent Regression Slopes
# Clean out workspace
rm(list = ls())
# Suppress levels of significance
options(show.signif.stars = FALSE)
library(car)
source("C:/R/functions.txt")
# Working from raw data in text file
ecls200 <- read.table("c:/rbook/ecls200.txt", header = TRUE)
names(ecls200)
# Create the two subfiles
ecls200.white <- subset(ecls200, race == "1",
select = c(c1rrscal, c4rmscal))
ecls200.aa <- subset(ecls200, race == "2",
select = c(c1rrscal, c4rmscal))
n.1 <- with(ecls200.white, length(c4rmscal))
n.1
lm.1 <- with(ecls200.white, lm(c4rmscal ~ c1rrscal))
sum.1 <- summary(lm.1)
sum.1
b.1 <- sum.1$coefficients[2]
b.1
se.1 <- sum.1$coefficients[4]
se.1
n.2 <- with(ecls200.aa, length(c1rrscal))
lm.2 <- with(ecls200.aa, lm(c4rmscal ~ c1rrscal))
sum.2 <- summary(lm.2)
sum.2
b.2 <- sum.2$coefficients[2]
b.2
se.2 <- sum.2$coefficients[4]
se.2
# If working from summary data, start here
# Remove the # from each of the next six lines and supply the values
# b.1 <- 0.4143225
# se.1 <- 0.0823185
# n.1 <- 100
# b.2 <- 0.6218583
# se.2 <- 0.08267727
# n.2 <- 100
se.pooled <- sqrt(se.1^2 + se.2^2)
se.pooled
t.obs <- (b.1 - b.2)/se.pooled
t.obs
# Two tailed p-value
2*(1-pt(abs(t.obs), n.1+ n.2 - 4))
# One tailed p-value
(1-pt(abs(t.obs), n.1 + n.2 - 4))
# Script 15.11 - Exercise 15.3
# Clean out workspace
rm(list = ls())
# Suppress levels of significance
options(show.signif.stars = FALSE)
library(car)
source("C:/R/functions.txt")
# Read in the data
white <- c(29,32,23,23,20,24,17,20,33,29,14,24)
asian <- c(12,26,56,11,48,19,17,40,31,12,33,20,34,54,21,24,66,24,31,
33,16,21,27,20,62)
# Build a data frame
nw <- length(white)
na <- length(asian)
g1 <- rep(1, nw)
g2 <- rep(2, na)
read <- c(white, asian)
group <- c(g1, g2)
f.group <- factor(group, levels = 1:2, labels = c("white", "asian"),
ordered=TRUE)
data.var <- data.frame(read, group)
# Descriptive statistics
table(f.group)
tapply(read, f.group, mean)
tapply(read, f.group, var)
tapply(read, f.group, sd)
tapply(read, f.group, skewness)
tapply(read, f.group, SEsk)
tapply(read, f.group, kurtosis)
tapply(read, f.group, SEku)
boxplot(read ~ f.group)
# Testing assumptions
tapply(read, f.group, shapiro.test)
# Levene's test
leveneTest(read ~ f.group, data = data.var)
# Fligner-Killen test
fligner.test(read ~ f.group)
t.test(read ~ f.group, var.equal = TRUE)
t.test(read ~ f.group, var.equal = FALSE)
wilcox.test(read ~ f.group)
Download