R Scripts for Chapter 14 # Clean out workspace rm(list = ls()) # Suppress levels of significance options(show.signif.stars=FALSE) library(car) library(pwr) source("C:/R/functions.txt") # Define the groups low <- c(13,13,21,16,22,15,18,20,22,14) high <- c(40,28,18,32,17,27,19,21,16,30) # Develop the summary statistics n1 <- length(low) n2 <- length(high) sum1 <-sum(low) sum2 <-sum(high) sumsq1 <- sum(low*low) sumsq2 <- sum(high*high) m1 <- mean(low) m2 <- mean(high) ss1 <- sumsq1-(sum1*sum1)/n1 ss2 <- sumsq2-(sum2*sum2)/n2 sd1 <- sd(low) sd2 <- sd(high) n1; n2 sum1; sum2 sumsq1 ;sumsq2 m1; m2 ss1; ss2 sd1; sd2 # Calculate t diff <- m1 - m2 s2.pooled <- (ss1 + ss2)/(n1 + n2 - 2) t.obs <- (diff)/(sqrt(s2.pooled*((1/n1) + (1/n2)))) t.obs df <- n1 + n2 - 2 # Two-tailed p-value p.value <- 2*pt(t.obs, df) p.value t.crit <- qt(c(.025, .975), df) t.crit # Finding t the easy way t.test(low, high, var.equal = TRUE) # Playing with power power.t.test(n = 10, d = 7.4, sd = 6.14, sig.level = 0.05, power = NULL, type = "two.sample", alternative = "two.sided") pwr.t.test(n = 10, d = 1.2, sig.level = .05, type = "two.sample", alternative = "two.sided") power.t.test(n = NULL, d = 1.22, sd = 6.14, sig.level = .05, power = .8, type = "two.sample", alternative = "two.sided") # Another Way # Read in text file with data # Group defined as 1 = Low, 2 = High data <- read.table("c:/rbook/twoindmeans.txt", header = TRUE) attach(data) data$f.group <- factor(group, levels = 1:2, labels = c("Low.SES", "High.SES"), ordered = TRUE) attach(data) t.test(read1 ~ f.group, equal.var = TRUE) # Script 14.2: The Wilcoxon/Mann-Whitney test. # Clean out workspace rm(list = ls()) # Suppress levels of significance options(show.signif.stars = FALSE) library(car) library(pwr) source("C:/R/functions.txt") # Define the groups low <- c(13,13,21,16,22,15,18,20,22,14) n1 <- length(low) high <- c(40,28,18,32,17,27,19,21,16,30) n2 <- length(high) both <- c(low,high) rank.both <-rank(both, ties.method = "average") rank.both ranks.low <-rank.both[1:n1] first.2 <- n1 + 1 last.2 <- n1 + n2 ranks.high <- rank.both[first.2:last.2] ranks.high r1 <- sum(ranks.low) r2 <- sum(ranks.high) r1; r2 library(PASWR) wilcox.test(low, high, alternative = "two.sided", exact = TRUE, correct = FALSE) wilcoxE.test(low, high, conf.level = .95) # Another Way # Read in text file with data # Group defined as 1 = Low, 2 = High data <- read.table("c:/rbook/twoindmeans.txt", header = TRUE) attach(data) data$f.group <- factor(group, levels = 1:2, labels = c("Low.SES", "High.SES"), ordered = TRUE) attach(data) wilcox.test(read1 ~ f.group, alternative = "two.sided", exact = TRUE, correct = FALSE) # Note: wilcoxE.test can only compare two vectors/groups # Script 14.3: Two-sample permutation test for means. # Permutation test for the difference between two means # by squaring the difference, we are do a two-tailed test, otherwise # we would compare the "random" differences to the actual difference, # in the way predicted by the alternate hypothesis # Read in the data low <- c(13,13,21,16,22,15,18,20,22,14) n1 <- length(low) high <- c(40,28,18,32,17,27,19,21,16,30) n2 <- length(high) mean.l = mean(low) mean.h = mean(high) diff = mean.l - mean.h diff diff2 = diff^2 # Finding first and last index for each group f1 <- 1 l1 <- n1 f2 <- n1 + 1 l2 <- n1 + n2 pop <- c(low,high) N <- 100000 stat <- numeric(N) stat2 <- numeric(N) counter <- 0 for (i in 1:N) { perm <- sample(pop, l2) lgp <- perm[1:l1] hgp <- perm[f2:l2] mean1p <- mean(lgp) mean2p <- mean(hgp) stat[i] <- (mean1p-mean2p) stat2[i] <- stat[i]^2 # For two tailed test if (stat2[i] >= diff2) counter <- counter + 1 } p.value = counter/N cat("The p-value is", p.value,"\n") hist(stat, prob = TRUE) lines(density(stat)) q <- round(quantile(stat, probs = c(.005,.025,.05,.95,.975,.995)), 3) cat("The quartiles for using CI approach are:\n");q # Script 14.4 - Two-sample test for proportions # Clean out workspace rm(list = ls()) # Suppress levels of significance options(show.signif.stars = FALSE) library(car) library(MASS) library(pwr) source("C:/R/functions.txt") # Define favor as the two values for "in favor" # Define survey as the number in each group respectively favor <- c(31, 19) survey <- c(50, 50) prop.test(favor, survey, correct = FALSE, alternative = "greater") # Script 14.5 - Two sample test permutation test for proportions # Clean out workspace rm(list = ls()) # Suppress levels of significance options(show.signif.stars=FALSE) source("C:/R/functions.txt") # Provide data n1 <- 50 s1 <- 31 n2 <- 50 s2 <- 19 p1 <- s1/n1 p1 p2 <- s2/n2 p2 diff1 <- p1 - p2 diff2 <- diff1^2 n.success <- s1 + s2 n.total <- n1 + n2 n.fail <- n.total - n.success # Establish ones and zeros success <- rep(1, n.success) failure <- rep(0, n.fail) perm.data <- c(success, failure) # Set first and last index for each group f1 <- 1 l1 <- n1 f2 <- n1 + 1 l2 <- n1 + n2 N <- 100000 # stat1 for one-tailed test stat1 <- numeric(N) # stat2 for two-tailed test stat2 <- numeric(N) counter1 <- 0 counter2 <- 0 # Counter increased only for one tail for (i in 1:N) { perm <- sample(perm.data, l2) g1.perm <- perm[1:l1] g2.perm <- perm[f2:l2] p1.perm <- mean(g1.perm) p2.perm <- mean(g2.perm) stat1[i] <- (p1.perm - p2.perm) stat2[i] <- stat1[i]^2 if (stat1[i] >= diff1) counter1 <- counter1 + 1 if (stat2[i] >= diff2) counter2 <- counter2 + 1 } p.value1 <- counter1/N p.value2 <- counter2/N cat("The observed difference is",diff1,"and the p-value is",p.value1,"\n") q <- round(quantile(stat1,probs=c(.005,.025,.05,.95,.975,.995)),3) cat("The quartiles for using CIs for hypothesis testing are: \n"); q # Script 14.6: Testing the difference between two medians # The median test # Clean out workspace rm(list = ls()) source("C:/R/functions.txt") # Suppress levels of significance options(show.signif.stars=FALSE) # House prices taken from Allen Tate ad in N&O 1 May 2010 tate.price <- c(1210000,1085000,999950,850000,700000,600000,575000,550000, 550000,524900,515000,449900,399900,365000,339900,295000,274900, 229900,182500,179900,169900,134900,108000,895000,650000,650000, 599900,527500,475000,322000,286000,260000,215900,197000,179900, 167000) # House prices taken from Chapel Hill News, 15 August 2010 - The Home Team team.price <- c(659000,399000,222500,599000,350000,295000,495000,384900, 219000,599000,349000,269500,489900,335000,189000,439750,335000, 268500,475000,264500,189500,375000,300000,199000,449000,252500, 178900,369000,299000) # Combine data house.price <- c(tate.price, team.price) # Find the three medians med.all <- median(house.price) med.tate <- median(tate.price) med.team <- median(team.price) med.all med.tate med.team # Finding the cell entries c11 <- length(tate.price[tate.price > med.all]) c12 <- length(tate.price[tate.price < med.all]) c21 <- length(team.price[team.price > med.all]) c22 <- length(team.price[team.price < med.all]) median.table <- rbind(c(c11, c12), c(c21, c22)) rownames(median.table) <- c("Tate.Raleigh", "Team.CH") colnames(median.table) <- c("Above", "Below") median.table # Complete the median test fisher.test(median.table) # Bootstrapping the difference between two medians diff1 <- med.tate - med.team diff2 <- diff1^2 n1 <- length(tate.price) n2 <- length(team.price) N <- 100000 # Permutation Test from pooled data counter = 0 stat1 = numeric(N) stat2 = numeric(N) f1 = 1 l1 = n1 f2 = n1 + 1 l2 = n1 + n2 # For a two-tailed test, we track diff2 for (i in 1:N) { perm = sample(house.price,l2) g1.perm = perm[1:l1] g2.perm = perm[f2:l2] median1p = median(g1.perm) median2p = median(g2.perm) stat1[i] = (median1p - median2p) stat2[i] = stat1[i]^2 if (stat2[i] >= diff2) counter = counter + 1 } p.value <- counter/N # Script 14.7 - Test of location for dependent samples (Parametric, # nonparametric, & resampling). # Testing the difference between two dependent samples on location # Clean out workspace rm(list = ls()) source("C:/R/functions.txt") # Suppress levels of significance options(show.signif.stars = FALSE) # Comparing the reading and math skills of 10 Native American students read <- c(58,47,38,35,43,47,34,56,53,44) math <- c(43,49,40,45,38,49,38,64,52,49) diff <- read - math diff mean(diff) sum(diff) sum(diff*diff) t.test(read, math, paired = TRUE) qt(.025,9) # As though we had done an independent samples t-test t.test(read, math, var.equal = TRUE) # Doing binomial (Sign) test pos <- length(diff[diff > 0]) n <- length(diff) binom.test(pos, n, alternative = "two.sided") library(PASWR) wilcox.test(read, math, paired = TRUE, correct = FALSE) wilcoxE.test(read, math, paired = TRUE) # A permutation test of the differences in the pairs N = 100000 diff <- read - math n <- length(diff) mdiff <- mean(diff) mdiff stat <- numeric(n) counter1 <- 0 for (i in 1:N){ which <- runif(n) for (j in 1:n){ if (which[j] < .5) stat[j] <- -diff[j] else stat[j] <- diff[j] } if (abs(mean(stat)) >= abs(mdiff)) counter1 <- counter1 + 1 } # Absolute values comparison makes it a two-tailed test pvalue2 <- counter1/N cat("The mean difference is",mdiff,"and the p-value is",pvalue2,"\n") # A different way to obtain the data data <- read.table("c:/rbook/twodepmeans.txt", header = TRUE) attach(data) t.test(read1, math1, paired = TRUE) # Script 14.8: Testing the difference between two dependent proportions # Clean out workspace rm(list = ls()) source("C:/R/functions.txt") # Suppress levels of significance options(show.signif.stars = FALSE) # Provide the observed counts obs <- c(18, 8) sum(obs) # Find the expected probabilities for each cell ratios <- c(13, 13) prob <- ratios/sum(ratios) result <-chisq.test(obs, p = prob) result qchisq(.95,1) # Using the mcnemar.test function # Provide the observed counts in table form, bind the two rows # of two columns item.diff <- rbind(c(12,18), c(8,12)) colnames(item.diff) <- c("2.Correct", "2.Incorrect") rownames(item.diff) <- c("1.Correct", "1.Incorrect") item.diff mcnemar.test(item.diff, correct = FALSE) # Script 14.9 - Exercise14.6: Comparing two medians. # Testing the difference between two medians # Clean out workspace rm(list = ls()) source("C:/R/functions.txt") # Suppress levels of significance options(show.signif.stars = FALSE) # Read salaries for Boston Red Sox brs <- c(420000,12100000,650000,7750000,14000000,8525000,18700000,12500000, 7700000,2750000,9350000,1155000,5500000,3000000,9375000) # Read salaries for New York Yankees nyy <- c(435650,9000000,487975,5500000,22600000,4000000,1200000,11750000, 15000000,33000000,6850000,900000,1100000) n1 <- length(brs) n2 <- length(nyy) med1 <- median(brs) med2 <- median(nyy) n1; n2; med1; med2 salaries <- c(brs,nyy) med <- median(salaries) med # Sign Test # Finding the cell entries c11 <- length(brs[brs > med]) c12 <- length(brs[brs < med]) c21 <- length(nyy[nyy > med]) c22 <- length(nyy[nyy < med]) median.table <- rbind(c(c11, c12), c(c21, c22)) rownames(median.table) <- c("Red Sox", "Yankees") colnames(median.table) <- c("Above", "Below") median.table # Now do the chisq.test on the table chisq.test(median.table, correct = FALSE) # Permutation Test from pooled data - Two Tailed diff1 <- med1 - med2 diff2 <- diff1^2 N <- 100000 counter <- 0 stat1 <- numeric(N) stat2 <- numeric(N) f1 <- 1 l1 <- n1 f2 <- n1 + 1 l2 <- n1 + n2 for (i in 1:N) { perm <- sample(salaries, l2) g1.perm <- perm[1:l1] g2.perm <- perm[f2:l2] median1p <- median(g1.perm) median2p <- median(g2.perm) stat1[i] <- (median1p - median2p) stat2[i] <- stat1[i]^2 if (stat2[i] >= diff2) counter = counter + 1 } p.value <- counter/N cat("The observed difference is",diff1,"and the p-value is",p.value,"\n") # Script14.10 - Exercise14.10: Dependent proportions. # Testing the difference between two dependent proportions # Clean out workspace rm(list = ls()) source("C:/R/functions.txt") # Suppress levels of significance options(show.signif.stars = FALSE) # Provide the observed counts obs <- c(4, 8) expect <- sum(obs)/2 #Find the expected probabilities for each cell ratios <- c(expect, expect) prob <- ratios/sum(ratios) result <-chisq.test(obs, p = prob) result qchisq(.95,1) # Using the mcnemar.test function #Provide the observed counts in table form, bind the two rows #of two columns approve <- rbind(c(40,4),c(8,28)) rownames(approve) <- c("1.For","2.Against") colnames(approve) <- c("1.For","2.Against") approve mcnemar.test(approve, correct = FALSE)