R Scripts for Chapter for Chapter 15 # Script 15.1 - Simulating the F-distribution # Rerun with changes in n1 and n2 # Clean out workspace rm(list = ls()) # Suppress levels of significance options(show.signif.stars = FALSE) library(car) library(MASS) source("C:/R/functions.txt") n1 <- 5 n2 <- 10 N <- 100000 var1 <- numeric(N) var2 <- numeric(N) stat5.10 <- numeric(N) for (i in 1:N) { sam1 <- rnorm(n1) var1[i] <- var(sam1) sam2 <- rnorm(n2) var2[i] <- var(sam2) stat5.10[i] <- var1[i]/var2[i] } mean(var1) mean(var2) min(stat5.10) median(stat5.10) mean(stat5.10) max(stat5.10) var.pop(stat5.10) skewness(stat5.10) kurtosis(stat5.10) hist(stat5.10, prob=TRUE, breaks = 60, ylim = c(0,.8), main = "Sample Sizes (5,10)") lines(density(stat5.10), lwd = 2) # Script 15.2 - Expected value and variance of the F-distribution # Rerun with changes in n1 and n2 # Clean out workspace rm(list = ls()) # Suppress levels of significance options(show.signif.stars=FALSE) library(car) source("C:/R/functions.txt") n1 <- 15 n2 <- 30 v1 <- n1 - 1 v2 <- n2 - 1 ev <- v2/(v2-2) var <- (2*v2^2*(v1 + v2 - 2))/(v1*(v2 - 2)^2*(v2 - 4)) ev; var # Script 15.3 - Depicting F-Distributions nval <- pretty(c(0,15), 1000) plot(nval,df(nval, df1 = 4, df2 = 9), type = "l", lty = 1, lwd = 1, xlim = c(0,8), ylim = c(0,1.0), xlab = "Values of F", ylab = "Density") title(main = "Three Different F-Distributions") lines(nval, df(nval, df1 = 9, df2 = 19), type = "l", lty = 2, lwd = 1) lines(nval, df(nval, df1 = 14, df2 = 29), type = "l", lty = 3, lwd = 1) abline(h = 0) legend(4, .95, cex = 1.0, legend = c("df1 = 4, df2 = 9", "df1 = 9, df2 = 19", "df1 = 14, df2 = 29"), lty=1:3) # Script 15.4 - Testing for the Equality of Two Variances # Clean out workspace rm(list = ls()) # Suppress levels of significance options(show.signif.stars = FALSE) library(car) source("C:/R/functions.txt") # Read in the data white <- c(15,24,21,16,20,29,32,14,24,24,20,27,26,29,33,19,23,23,17, 14,24,37,20,13,22) asian <- c(12,26,56,11,48,19,17,40,31,12,33,20,34,54,21,24,66,24,31, 33,16,21,27,20,62) # Find the two variances var(white) var(asian) # Complete the F-test var.test(white, asian) # Examine possible nonnormality skewness(white) kurtosis(white) shapiro.test(white) skewness(asian) kurtosis(asian) shapiro.test(asian) # Build a data frame nw <- length(white) na <- length(asian) g1 <- rep(1, nw) g2 <- rep(2, na) read <- c(white, asian) group <- c(g1, g2) group <- factor(group) data.var <- data.frame(read, group) # Levene's test leveneTest(read ~ group, data = data.var) # Fligner-Killen test fligner.test(read ~ group) # Script 15.5 - Depicting decision rule for F-test example (Figure 15.3) nval <- pretty(c(0, 4), 1000) plot(nval, df(nval, df1 = 24, df2 = 24), type = "l", lty = 1, lwd = 1, xlim = c(0, 4), ylim= c(0, 1.1), xlab = "Values of F", ylab = "Density") title(main = "F-Distribution for 24 and 24 degrees-of freedom") abline(h = 0) lines(x = c(.441, .441), y = c(0, df(.441, 24, 24))) lines(x = c(2.269, 2.269), y= c(0, df(2.269, 24, 24))) # Shading the upper tail fu0 <- nval[nval >= 2.269] fu0 <- c(fu0[1], fu0) pu0 <- df(nval[nval >= 2.269], 24, 24) pu0 <- c(0, pu0) polygon(fu0, pu0, col = "gray") # Shading the lower tail fl0 <- nval[nval <= .441] fl0 <- c(fl0[1], fl0) pl0 <- df(nval[nval <= .441], 24, 24) pl0 <- c(pl0, 0) polygon(fl0,pl0,col="gray") arrows(.2, 1, .441, 0, length=.1) text(x = .2, y = 1.05, label = "0.441", cex = .85) arrows(2.6, 1, 2.269, 0, length = .1) text(x= 2.6, y = 1.05, label = "2.269", cex = .85)text(x=1.3,y=.1,label="Region of Non-Rejection",cex=.85) # Script 15.6 - Levene's test as a t # Clean out workspace rm(list = ls()) # Suppress levels of significance options(show.signif.stars = FALSE) library(car) source("C:/R/functions.txt") # Read in the data white <- c(15,24,21,16,20,29,32,14,24,24,20,27,26,29,33,19,23,23,17, 14,24,37,20,13,22) asian <- c(12,26,56,11,48,19,17,40,31,12,33,20,34,54,21,24,66,24,31, 33,16,21,27,20,62) medw <- median(white) meda <- median(asian) devw <- abs(white - medw) deva <- abs(asian - meda) t.test(devw, deva, var.equal = TRUE) # From the output, we note the observed t-statistic is -3.04243 # Squaring the observed t to get the F (-3.04243)^2 # Script 15.7 - Welch/Satterthwaite approach # Clean out workspace rm(list = ls()) #Suppress levels of significance options(show.signif.stars = FALSE) library(car) source("C:/R/functions.txt") # Define the groups low <- c(13,13,21,16,22,15,18,20,22,14) high <- c(40,28,18,32,17,27,19,21,16,30) t.test(low, high, var.equal = FALSE) # Script 15.8 Two Independent Correlations # Clean out workspace rm(list = ls()) # Suppress levels of significance options(show.signif.stars = FALSE) library(car) source("C:/R/functions.txt") # If working with summary data, skip down # Working from raw data in text file ecls200 <- read.table("c:/rbook/ecls200.txt", header = TRUE) # Create the two subfiles ecls200.white <- subset(ecls200, race == "1", select = c(c1rrscal, c4rmscal)) ecls200.aa <- subset(ecls200, race == "2", select=c(c1rrscal, c4rmscal)) r1 <- with(ecls200.white, cor(c1rrscal, c4rmscal)) r1 n1 <- with(ecls200.white, length(c1rrscal)) n1 r2 <- with(ecls200.aa, cor(c1rrscal, c4rmscal)) r2 n2 <- with(ecls200.aa, length(c1rrscal)) n2 # If working from summary data, start here... # Remove the # from each line and substitute your values # r1 <- ?? # n1 <- ?? # r2 <- ?? # n2 <- ?? # Employ inverse arcsin transformation r1.prime <- atanh(r1) r1.prime r2.prime <- atanh(r2) r2.prime # Calculate the test statistic z.obs <- (r1.prime - r2.prime)/sqrt((1/(n1 - 3)) + (1/(n2 - 3))) z.obs # Two tailed p-value 2*(1 - pnorm(abs(z.obs))) # One tailed p-value, confirm direction (1 - pnorm(abs(z.obs))) # Script 15.9 - Comparing two dependent correlations for African Americans # Clean out the workspace rm(list = ls()) # If working with summary data, skip down # Reading raw data ecls200 <- read.table ("c:/rbook/ecls200.txt", header = TRUE) attach(ecls200) names(ecls200) ecls200.aa <- subset(ecls200, race == "2", select = c(c1rrscal, c1rmscal, c4rmscal)) attach(ecls200.aa) n <- length(c1rrscal) n rxz <- cor(c1rrscal, c4rmscal) rxz ryz <- cor(c1rmscal, c4rmscal) ryz rxy <- cor(c1rrscal, c1rmscal) rxy # If working with summarized data, start here removing the # # from the next 12 lines and substituting your values # n <- 100 # rxz <- .6049761 # ryz <- .6492096 # rxy <- .7433614 # detR <- 1 - rxz*rxz - ryz*ryz - rxy*rxy + 2*rxz*ryz*rxy # t.obs <- (rxz - ryz)*sqrt(((n - 1)*(1 + rxy))/(2*((n - 1)/ # (n - 3))*detR + (rxz + ryz)^2/4 *(1 - rxy)^3)) # t.obs # # Two tailed p-value # 2*(1-pt(abs(t.obs),n-3)) # # One tailed p-value, confirm direction # (1-pt(abs(t.obs),n-3)) # Doing the test with R mat <- cbind(c1rrscal, c1rmscal, c4rmscal) r.mat <- cor(mat) r.mat detR <- det(r.mat) t.obs <- (rxz - ryz)*sqrt(((n - 1)*(1 + rxy))/(2*((n - 1)/ (n - 3))*detR + (rxz + ryz)^2/4 *(1 - rxy)^3)) t.obs # Two tailed p-value 2*(1-pt(abs(t.obs),n-3)) # One tailed p-value, confirm direction (1-pt(abs(t.obs),n-3)) # Script 15.10 - Two Independent Regression Slopes # Clean out workspace rm(list = ls()) # Suppress levels of significance options(show.signif.stars = FALSE) library(car) source("C:/R/functions.txt") # Working from raw data in text file ecls200 <- read.table("c:/rbook/ecls200.txt", header = TRUE) names(ecls200) # Create the two subfiles ecls200.white <- subset(ecls200, race == "1", select = c(c1rrscal, c4rmscal)) ecls200.aa <- subset(ecls200, race == "2", select = c(c1rrscal, c4rmscal)) n.1 <- with(ecls200.white, length(c4rmscal)) n.1 lm.1 <- with(ecls200.white, lm(c4rmscal ~ c1rrscal)) sum.1 <- summary(lm.1) sum.1 b.1 <- sum.1$coefficients[2] b.1 se.1 <- sum.1$coefficients[4] se.1 n.2 <- with(ecls200.aa, length(c1rrscal)) lm.2 <- with(ecls200.aa, lm(c4rmscal ~ c1rrscal)) sum.2 <- summary(lm.2) sum.2 b.2 <- sum.2$coefficients[2] b.2 se.2 <- sum.2$coefficients[4] se.2 # If working from summary data, start here # Remove the # from each of the next six lines and supply the values # b.1 <- 0.4143225 # se.1 <- 0.0823185 # n.1 <- 100 # b.2 <- 0.6218583 # se.2 <- 0.08267727 # n.2 <- 100 se.pooled <- sqrt(se.1^2 + se.2^2) se.pooled t.obs <- (b.1 - b.2)/se.pooled t.obs # Two tailed p-value 2*(1-pt(abs(t.obs), n.1+ n.2 - 4)) # One tailed p-value (1-pt(abs(t.obs), n.1 + n.2 - 4)) # Script 15.11 - Exercise 15.3 # Clean out workspace rm(list = ls()) # Suppress levels of significance options(show.signif.stars = FALSE) library(car) source("C:/R/functions.txt") # Read in the data white <- c(29,32,23,23,20,24,17,20,33,29,14,24) asian <- c(12,26,56,11,48,19,17,40,31,12,33,20,34,54,21,24,66,24,31, 33,16,21,27,20,62) # Build a data frame nw <- length(white) na <- length(asian) g1 <- rep(1, nw) g2 <- rep(2, na) read <- c(white, asian) group <- c(g1, g2) f.group <- factor(group, levels = 1:2, labels = c("white", "asian"), ordered=TRUE) data.var <- data.frame(read, group) # Descriptive statistics table(f.group) tapply(read, f.group, mean) tapply(read, f.group, var) tapply(read, f.group, sd) tapply(read, f.group, skewness) tapply(read, f.group, SEsk) tapply(read, f.group, kurtosis) tapply(read, f.group, SEku) boxplot(read ~ f.group) # Testing assumptions tapply(read, f.group, shapiro.test) # Levene's test leveneTest(read ~ f.group, data = data.var) # Fligner-Killen test fligner.test(read ~ f.group) t.test(read ~ f.group, var.equal = TRUE) t.test(read ~ f.group, var.equal = FALSE) wilcox.test(read ~ f.group)