R Scripts

advertisement
R Scripts for Chapter 3
# Script3.1.Sampling Stability
# Clean out workspace
rm(list = ls())
library(car)
source("C:/R/functions.txt")
#Suppress levels of significance
options(show.signif.stars=FALSE)
#Comparing the stability of the mode, median, and mean
#Set number of samples and reserve memory
N <- 20000
mo <- numeric(N)
med <- numeric(N)
mn <- numeric(N)
for (i in 1:N) {
data <- sample(1:100, 100, replace = TRUE)
mod <- mode(data)
modal <- median(mod)
mo[i] <- modal
med[i] <- median(data)
mn[i] <- mean(data)
}
summary(mo)
summary(med)
summary(mn)
hist(mn, prob = TRUE, xlim = c(10, 90))
lines(density(mn), lty=1, lw = 2)
lines(density(med), lty = 2, lw = 2)
lines(density(mo), lty = 3, lw = 2)
legend(10, .13, title = "Densities for Mean, Median, and Mode",
c("Mean", "Median", "Mode"), lty = c(1,2,3), cex = .7)
# Accounting for the skew of the mode
for (i in 1:N) {
data <- sample(1:100, 100, replace = TRUE)
mod <- mode(data)
modal <- median(mod)
mo[i] <- modal
}
hist(mo)
# Script3.2.SimulatingSampleVariance
# Clean out workspace
rm(list = ls())
library(car)
source("C:/R/functions.txt")
# Suppress levels of significance
options(show.signif.stars = FALSE)
data <- c(1:30)
sigma2 <- var.pop(data); sigma2
# Specify desired sample size
# Repeat for n = 10, 15, 20, 25, 30, 40, and 50
n <- 5
N = 100000
stat1 <- numeric(N)
stat2 <- numeric(N)
for (i in 1:N) {
boot <- sample(data, n, replace = TRUE)
devs <- (boot - mean(boot))
devs2 <- devs^2
ss <- sum(devs2)
varhat <- ss/n
stat1[i] <- varhat
stat2[i] <- var(boot)
}
ev.bias <- sigma2-sigma2/n
ev.bias
mean(stat1)
mean(stat2)
hist(stat1)
hist(stat2)
# Script3.3.ChapterCaseStudy
# Clean out workspace
rm(list = ls())
library(car)
source("C:/R/functions.txt")
# Suppress levels of significance
options(show.signif.stars = FALSE)
# Read in the data
ecls200 <- read.table ("c:/rbook/ecls200.txt", header = TRUE)
attach(ecls200)
ecls200.white <- subset(ecls200, race == "1", select = c(c1rrtsco,
c1rmscal))
ecls200.aa <- subset(ecls200, race == "2", select = c (c1rrtsco,
c1rmscal))
# Looking at the data for Whites
attach(ecls200.white)
table(c1rmscal)
# Open new graphic window
windows()
hist(c1rmscal, prob = TRUE, breaks = seq(4.5, 49.5, 5),
xlab = "Pre-K IRT Math Scores for Whites",
ylim = c(0, .06))
lines(density(c1rmscal))
rug(jitter(c1rmscal))
# Descriptive Statistics
mode(c1rmscal)
median(c1rmscal)
mean(c1rmscal)
range(c1rmscal)
IQR(c1rmscal)
AD(c1rmscal)
var.pop(c1rmscal)
var(c1rmscal)
sd(c1rmscal)
quantile(c1rmscal)
summary(c1rmscal)
fivenum(c1rmscal)
# Switch to file for African Americans
detach(ecls200.white)
attach(ecls200.aa)
table(c1rmscal)
# Open new graphic window
windows()
hist(c1rmscal, prob = TRUE, breaks=seq(4.5, 49.5, 5),
xlab = "Pre-K IRT Math Scores for Whites",
ylim = c(0, .07))
lines(density(c1rmscal))
rug(jitter(c1rmscal))
# Descriptive Statistics
mode(c1rmscal)
median(c1rmscal)
mean(c1rmscal)
range(c1rmscal)
IQR(c1rmscal)
AD(c1rmscal)
var.pop(c1rmscal)
var(c1rmscal)
sd(c1rmscal)
quantile(c1rmscal)
summary(c1rmscal)
fivenum(c1rmscal)
# Describing Shape
detach(ecls200.aa)
attach(ecls200.white)
skewness(c1rmscal)
SEsk(c1rmscal)
kurtosis(c1rmscal)
SEku (c1rmscal)
detach(ecls200.white)
attach(ecls200.aa)
skewness(c1rmscal)
SEsk(c1rmscal)
kurtosis(c1rmscal)
SEku(c1rmscal)
detach(ecls200.aa)
# Boxplot by race
attach(ecls200)
boxplot(c1rmscal ~ race)
# Script3.4.SumYSumY2
# Finding preliminary values for sum of Y and sum of Y2
# Define the population
pop <- 1:20
y <- sample(pop, size = 10, replace = TRUE)
y
sum.y <- sum(y)
sum.y
sum.y2 <- sum(y*y)
sum.y2
# Script3.5.Exercise3.6
source("C:/R/functions.txt")
data.chap3.ex6 <- read.table("c:/rbook/chap2.ex2.txt", header = TRUE)
attach(data.chap3.ex6)
length(jobsat)
table(jobsat)
mode(jobsat)
median(jobsat)
mean(jobsat)
range(jobsat)
IQR(jobsat)
var.pop(jobsat)
var(jobsat)
sd(jobsat)
quantile(jobsat)
summary(jobsat)
fivenum(jobsat)
skewness(jobsat)
SEsk(jobsat)
kurtosis(jobsat)
SEku(jobsat)
hist(jobsat, prob = TRUE, breaks = seq(3.5, 29.5, 2),
xlab='Job Satisfaction Scores')
lines(density(jobsat))
rug(jitter(jobsat))
windows( )
hist(jobsat, prob = TRUE, breaks = seq(2.5, 29.5, 3),
xlab='Job Satisfaction Scores')
lines(density(jobsat))
rug(jitter(jobsat))
boxplot(jobsat)
f=fivenum(jobsat)
text(rep(1.3, 5), f, labels = c("minimum", "lower hinge", "median",
"upper hinge", "maximum"))
# Script3.6.AdditionalExercises
# Describing the distributions of the Asian and Hispanic Students
# Clean out workspace
rm(list = ls())
library(car)
# The "car" package has many useful procedures
# Read the functions from the text file
source("C:/R/functions.txt")
# Read the Asian data
asian <- read.table("c:/r684/asianpkread.txt",header = TRUE)
attach(asian)
names(asian)
# Find out how many Asian scores there are
na <- length(c1rrscal)
na
# Finding the measures of location/central tendency
mode(c1rrscal)
median(c1rrscal)
mean(c1rrscal)
# Finding the measures of dispersion
range(c1rrscal)
# The "right" answer
rnge <- max(c1rrscal) - min(c1rrscal)
rnge
IQR(c1rrscal)
# Finding the average deviation
m <- mean(c1rrscal)
devs <- c1rrscal - m
AD <- mean(abs(devs))
AD
# Finding the variance and standard deviation of a population
var.pop(c1rrscal)
sqrt(var.pop(c1rrscal))
# Finding the variance and standard deviation of a sample
var(c1rrscal)
sqrt(var(c1rrscal))
sd(c1rrscal)
# Now let's look at shape
skewness(c1rrscal)
SEsk(c1rrscal)
kurtosis(c1rrscal)
SEku(c1rrscal)
# Some aggregated information
summary(c1rrscal)
boxplot(c1rrscal)
detach(asian)
# Read in data for Hispanic Children
hisp <- read.table("c:/r684/hisppkread.txt", header = TRUE)
attach(hisp)
# Find out how many Hispanic scores there are
nh <- length(c1rrscal)
nh
# Finding the measures of location/central tendency
mode(c1rrscal)
median(c1rrscal)
mean(c1rrscal)
# Finding the measures of dispersion
range(c1rrscal)
# The "right" answer
rnge <- max(c1rrscal) - min(c1rrscal)
rnge
IQR(c1rrscal)
#Finding the average deviation
m <- mean(c1rrscal)
devs <- c1rrscal - m
AD <- mean(abs(devs))
AD
# Finding the variance and standard deviation of a population
var.pop(c1rrscal)
sqrt(var.pop(c1rrscal))
# Finding the variance and standard deviation of a sample
var(c1rrscal)
sqrt(var(c1rrscal))
sd(c1rrscal)
# Now let's look at shape
skewness(c1rrscal)
SEsk(c1rrscal)
kurtosis(c1rrscal)
SEku(c1rrscal)
# Some aggregated information
summary(c1rrscal)
boxplot(c1rrscal)
detach(asian)
asian$g <- rep(1, length(asian$c1rrscal))
attach(asian)
hisp$g <- rep(2, length(hisp$c1rrscal))
attach(hisp)
both <- rbind(asian, hisp)
attach(both)
tapply(c1rrscal, g, mean)
Download