--title: "Assignment 1 Econ 322" author: "Curtis Bovell" date: "`r Sys.Date()`" output: word_document --Question A) Wm <- data$wage[data$married==1] Wnm <- data$wage[data$married==0] Wl <- data$wage[data$KWW<30] Wh <- data$wage[data$KWW>40] # Compute the sample mean for the data mean(Wm) # 977.0479 mean(Wnm) # 798.44 mean(Wl) # 809.355 mean(Wh) # 1148.517 # There are more individuals that have a higher KWW compared to a lower and more workers that are married as well. # The sample Wh has the largest mean Question B) sd(Wm) #407.0803 sd(Wnm) #343.2095 sd(Wl) # 337.6642 sd(Wh) # 473.9471 # Therefore the Wh sample is the most volatile since it has the highest deviation from the mean. Question C) library(stargazer) stargazer(data[c("wage", "hours", "IQ", "KWW", "educ")], type="text") Question D) # Sample Wh L <- list(Wh, Wl, Wm, Wnm) names(L) <- c("Wh", "Wl", "Wm", "Wnm") # Set the names of list elements for (i in seq_along(L)) { y <- L[[i]] # Access the current list element hist(y, main = paste("Sample", names(L)[i]), xlab ="Samples", freq = FALSE) m <- mean(y) s <- sd(y) curve(dnorm(x, m, s), add = TRUE, col = 2, lty = 2, lwd = 2) } ## Therefore the most normal looking distribution from the histogram would have to be the Wnm sample since it's deviation is largely spreaded out. Question E) R <- list(log(Wh), log(Wl), log(Wm), log(Wnm)) names(R) <- c("Log of Wh", "Log of Wl", "Log of Wm", "Log of Wnm") # Set the names of list elements for (i in seq_along(R)) { o <- R[[i]] # Access the current list element hist(o, main = paste("Sample", names(R)[i]), xlab = "Samples", freq = FALSE) m <- mean(o) s <- sd(o) curve(dnorm(x, m, s), add = TRUE, col = 2, lty = 2, lwd = 2) } ## There is a difference betweeen the histograms from the previous question, however, the sample Wnm still has the most normal looking distribution. Question F) L <- list(Wh, Wl, Wm, Wnm) names(L) <- c("Wh", "Wl", "Wm", "Wnm") for (i in seq_along(L)) { y <- L[[i]] qqnorm(y, main = paste("QQ plot of", names(L)[i])) qqline(y) } Question F) R <- list(log(Wh), log(Wl), log(Wm), log(Wnm)) names(R) <- c("log Wh", "log Wl", "log Wm", "log Wnm") for (i in seq_along(R)) { o <- R[[i]] qqnorm(o, main = paste("QQ plot of", names(R)[i])) qqline(o) } ### After observing the series in both forms. The log form seems to be the normal since the QQ plot closely follow the qqline. This indicates a closer approximation to a normal distribution. Question G) boxplot(wage~married, main = "Wage Box Plot and marital status", data=data,xlab="married", ylab="wage") # There are noticible difference between the marital status. There are more individuals who are married and the married side has a higher median than the unmarried. The unmarried side doesn't seem to have have a wide range, being from around 200:2000. While, the married column scales from 50:3000. Lastly the married workers have more outliers compared to unmarried workers.