R Scripts for Chapter 3 # Script3.1.Sampling Stability # Clean out workspace rm(list = ls()) library(car) source("C:/R/functions.txt") #Suppress levels of significance options(show.signif.stars=FALSE) #Comparing the stability of the mode, median, and mean #Set number of samples and reserve memory N <- 20000 mo <- numeric(N) med <- numeric(N) mn <- numeric(N) for (i in 1:N) { data <- sample(1:100, 100, replace = TRUE) mod <- mode(data) modal <- median(mod) mo[i] <- modal med[i] <- median(data) mn[i] <- mean(data) } summary(mo) summary(med) summary(mn) hist(mn, prob = TRUE, xlim = c(10, 90)) lines(density(mn), lty=1, lw = 2) lines(density(med), lty = 2, lw = 2) lines(density(mo), lty = 3, lw = 2) legend(10, .13, title = "Densities for Mean, Median, and Mode", c("Mean", "Median", "Mode"), lty = c(1,2,3), cex = .7) # Accounting for the skew of the mode for (i in 1:N) { data <- sample(1:100, 100, replace = TRUE) mod <- mode(data) modal <- median(mod) mo[i] <- modal } hist(mo) # Script3.2.SimulatingSampleVariance # Clean out workspace rm(list = ls()) library(car) source("C:/R/functions.txt") # Suppress levels of significance options(show.signif.stars = FALSE) data <- c(1:30) sigma2 <- var.pop(data); sigma2 # Specify desired sample size # Repeat for n = 10, 15, 20, 25, 30, 40, and 50 n <- 5 N = 100000 stat1 <- numeric(N) stat2 <- numeric(N) for (i in 1:N) { boot <- sample(data, n, replace = TRUE) devs <- (boot - mean(boot)) devs2 <- devs^2 ss <- sum(devs2) varhat <- ss/n stat1[i] <- varhat stat2[i] <- var(boot) } ev.bias <- sigma2-sigma2/n ev.bias mean(stat1) mean(stat2) hist(stat1) hist(stat2) # Script3.3.ChapterCaseStudy # Clean out workspace rm(list = ls()) library(car) source("C:/R/functions.txt") # Suppress levels of significance options(show.signif.stars = FALSE) # Read in the data ecls200 <- read.table ("c:/rbook/ecls200.txt", header = TRUE) attach(ecls200) ecls200.white <- subset(ecls200, race == "1", select = c(c1rrtsco, c1rmscal)) ecls200.aa <- subset(ecls200, race == "2", select = c (c1rrtsco, c1rmscal)) # Looking at the data for Whites attach(ecls200.white) table(c1rmscal) # Open new graphic window windows() hist(c1rmscal, prob = TRUE, breaks = seq(4.5, 49.5, 5), xlab = "Pre-K IRT Math Scores for Whites", ylim = c(0, .06)) lines(density(c1rmscal)) rug(jitter(c1rmscal)) # Descriptive Statistics mode(c1rmscal) median(c1rmscal) mean(c1rmscal) range(c1rmscal) IQR(c1rmscal) AD(c1rmscal) var.pop(c1rmscal) var(c1rmscal) sd(c1rmscal) quantile(c1rmscal) summary(c1rmscal) fivenum(c1rmscal) # Switch to file for African Americans detach(ecls200.white) attach(ecls200.aa) table(c1rmscal) # Open new graphic window windows() hist(c1rmscal, prob = TRUE, breaks=seq(4.5, 49.5, 5), xlab = "Pre-K IRT Math Scores for Whites", ylim = c(0, .07)) lines(density(c1rmscal)) rug(jitter(c1rmscal)) # Descriptive Statistics mode(c1rmscal) median(c1rmscal) mean(c1rmscal) range(c1rmscal) IQR(c1rmscal) AD(c1rmscal) var.pop(c1rmscal) var(c1rmscal) sd(c1rmscal) quantile(c1rmscal) summary(c1rmscal) fivenum(c1rmscal) # Describing Shape detach(ecls200.aa) attach(ecls200.white) skewness(c1rmscal) SEsk(c1rmscal) kurtosis(c1rmscal) SEku (c1rmscal) detach(ecls200.white) attach(ecls200.aa) skewness(c1rmscal) SEsk(c1rmscal) kurtosis(c1rmscal) SEku(c1rmscal) detach(ecls200.aa) # Boxplot by race attach(ecls200) boxplot(c1rmscal ~ race) # Script3.4.SumYSumY2 # Finding preliminary values for sum of Y and sum of Y2 # Define the population pop <- 1:20 y <- sample(pop, size = 10, replace = TRUE) y sum.y <- sum(y) sum.y sum.y2 <- sum(y*y) sum.y2 # Script3.5.Exercise3.6 source("C:/R/functions.txt") data.chap3.ex6 <- read.table("c:/rbook/chap2.ex2.txt", header = TRUE) attach(data.chap3.ex6) length(jobsat) table(jobsat) mode(jobsat) median(jobsat) mean(jobsat) range(jobsat) IQR(jobsat) var.pop(jobsat) var(jobsat) sd(jobsat) quantile(jobsat) summary(jobsat) fivenum(jobsat) skewness(jobsat) SEsk(jobsat) kurtosis(jobsat) SEku(jobsat) hist(jobsat, prob = TRUE, breaks = seq(3.5, 29.5, 2), xlab='Job Satisfaction Scores') lines(density(jobsat)) rug(jitter(jobsat)) windows( ) hist(jobsat, prob = TRUE, breaks = seq(2.5, 29.5, 3), xlab='Job Satisfaction Scores') lines(density(jobsat)) rug(jitter(jobsat)) boxplot(jobsat) f=fivenum(jobsat) text(rep(1.3, 5), f, labels = c("minimum", "lower hinge", "median", "upper hinge", "maximum")) # Script3.6.AdditionalExercises # Describing the distributions of the Asian and Hispanic Students # Clean out workspace rm(list = ls()) library(car) # The "car" package has many useful procedures # Read the functions from the text file source("C:/R/functions.txt") # Read the Asian data asian <- read.table("c:/r684/asianpkread.txt",header = TRUE) attach(asian) names(asian) # Find out how many Asian scores there are na <- length(c1rrscal) na # Finding the measures of location/central tendency mode(c1rrscal) median(c1rrscal) mean(c1rrscal) # Finding the measures of dispersion range(c1rrscal) # The "right" answer rnge <- max(c1rrscal) - min(c1rrscal) rnge IQR(c1rrscal) # Finding the average deviation m <- mean(c1rrscal) devs <- c1rrscal - m AD <- mean(abs(devs)) AD # Finding the variance and standard deviation of a population var.pop(c1rrscal) sqrt(var.pop(c1rrscal)) # Finding the variance and standard deviation of a sample var(c1rrscal) sqrt(var(c1rrscal)) sd(c1rrscal) # Now let's look at shape skewness(c1rrscal) SEsk(c1rrscal) kurtosis(c1rrscal) SEku(c1rrscal) # Some aggregated information summary(c1rrscal) boxplot(c1rrscal) detach(asian) # Read in data for Hispanic Children hisp <- read.table("c:/r684/hisppkread.txt", header = TRUE) attach(hisp) # Find out how many Hispanic scores there are nh <- length(c1rrscal) nh # Finding the measures of location/central tendency mode(c1rrscal) median(c1rrscal) mean(c1rrscal) # Finding the measures of dispersion range(c1rrscal) # The "right" answer rnge <- max(c1rrscal) - min(c1rrscal) rnge IQR(c1rrscal) #Finding the average deviation m <- mean(c1rrscal) devs <- c1rrscal - m AD <- mean(abs(devs)) AD # Finding the variance and standard deviation of a population var.pop(c1rrscal) sqrt(var.pop(c1rrscal)) # Finding the variance and standard deviation of a sample var(c1rrscal) sqrt(var(c1rrscal)) sd(c1rrscal) # Now let's look at shape skewness(c1rrscal) SEsk(c1rrscal) kurtosis(c1rrscal) SEku(c1rrscal) # Some aggregated information summary(c1rrscal) boxplot(c1rrscal) detach(asian) asian$g <- rep(1, length(asian$c1rrscal)) attach(asian) hisp$g <- rep(2, length(hisp$c1rrscal)) attach(hisp) both <- rbind(asian, hisp) attach(both) tapply(c1rrscal, g, mean)