# Introduction to R grades = read.table("672_grades.txt", header=T) # read data, no space in variable names or missing values summary(grades) # summarize data sd(mid1) # error: mid1 cannot be found sd(grades$mid1) # specify that mid1 belongs to the dataframe of grades attach(grades) # alternatively, we can attach the dataframe to avoid using $ sd(mid1) table(sec) boxplot(mid1~sec) # compare mid1 across sec(tions) using boxplot boxplot(mid1, mid2) # compare mid1 and mid2 plot(mid1, mid2, type="p") # scatter plot of mid2 against mid1 abline(lm(mid2~mid1), lty=3) # add the regression line abline(lm(mid2~mid1,subset=(mid1 > 0 & mid2 > 0)), lty=1) # add the regression line without outliers the.regression1 = lm(mid2~mid1,subset=(mid1 > 0 & mid2 > 0)) # save the regression result in the. regression1 summary(the.regression1) # show the regression summary the.coefficient = coef(the.regression1) # save the regression coefficient the.covmatrix = vcov(the.regression1) # save the coefficient covariance matrix the.residual = resid(the.regression1) # save the residual plot(the.residual) # plot the residual hist(the.residual) # histogram of the residual sort(the.residual) # sort the residual grades[c(47, 42),] # look at the individual observations summary(lm(mid2~mid1,subset=(sec=="B"))) # regression for section B only summary(lm(mid2~mid1,subset=(sec=="C"))) # regression for section C only dummy = (sec=="B") # dummy variable, base group is section C interaction.term = dummy*mid1 # interaction term summary(lm(mid2~dummy+mid1+interaction.term)) detach(grades)