Uploaded by Cxrtis Bovell

Assignment 1 econ 322

advertisement
--title: "Assignment 1 Econ 322"
author: "Curtis Bovell"
date: "`r Sys.Date()`"
output: word_document
--Question A)
Wm <- data$wage[data$married==1]
Wnm <- data$wage[data$married==0]
Wl <- data$wage[data$KWW<30]
Wh <- data$wage[data$KWW>40]
# Compute the sample mean for the data
mean(Wm) # 977.0479
mean(Wnm) # 798.44
mean(Wl) # 809.355
mean(Wh) # 1148.517
# There are more individuals that have a higher KWW compared to a lower and more workers that are
married as well. # The sample Wh has the largest mean
Question B)
sd(Wm) #407.0803
sd(Wnm) #343.2095
sd(Wl) # 337.6642
sd(Wh) # 473.9471
# Therefore the Wh sample is the most volatile since it has the highest deviation from the mean.
Question C)
library(stargazer)
stargazer(data[c("wage", "hours", "IQ", "KWW", "educ")], type="text")
Question D)
# Sample Wh
L <- list(Wh, Wl, Wm, Wnm)
names(L) <- c("Wh", "Wl", "Wm", "Wnm") # Set the names of list elements
for (i in seq_along(L)) {
y <- L[[i]] # Access the current list element
hist(y, main = paste("Sample", names(L)[i]), xlab ="Samples", freq = FALSE)
m <- mean(y)
s <- sd(y)
curve(dnorm(x, m, s), add = TRUE, col = 2, lty = 2, lwd = 2)
}
## Therefore the most normal looking distribution from the histogram would have to be the Wnm
sample since it's deviation is largely spreaded out.
Question E)
R <- list(log(Wh), log(Wl), log(Wm), log(Wnm))
names(R) <- c("Log of Wh", "Log of Wl", "Log of Wm", "Log of Wnm") # Set the names of list elements
for (i in seq_along(R)) {
o <- R[[i]] # Access the current list element
hist(o, main = paste("Sample", names(R)[i]), xlab = "Samples", freq = FALSE)
m <- mean(o)
s <- sd(o)
curve(dnorm(x, m, s), add = TRUE, col = 2, lty = 2, lwd = 2)
}
## There is a difference betweeen the histograms from the previous question, however, the sample
Wnm still has the most normal looking distribution.
Question F)
L <- list(Wh, Wl, Wm, Wnm)
names(L) <- c("Wh", "Wl", "Wm", "Wnm")
for (i in seq_along(L)) {
y <- L[[i]]
qqnorm(y, main = paste("QQ plot of", names(L)[i]))
qqline(y)
}
Question F)
R <- list(log(Wh), log(Wl), log(Wm), log(Wnm))
names(R) <- c("log Wh", "log Wl", "log Wm", "log Wnm")
for (i in seq_along(R)) {
o <- R[[i]]
qqnorm(o, main = paste("QQ plot of", names(R)[i]))
qqline(o)
}
### After observing the series in both forms. The log form seems to be the normal since the QQ plot
closely follow the qqline. This indicates a closer approximation to a normal distribution.
Question G)
boxplot(wage~married, main = "Wage Box Plot and marital status", data=data,xlab="married",
ylab="wage")
# There are noticible difference between the marital status. There are more individuals who are married
and the married side has a higher median than the unmarried. The unmarried side doesn't seem to have
have a wide range, being from around 200:2000. While, the married column scales from 50:3000. Lastly
the married workers have more outliers compared to unmarried workers.
Download