Lab4 Jialin Huang 2025-02-05 Part 1: ANOVA on Dog Data Load Dataset dog_data <- read.csv("dog_data_post.csv") head(dog_data) ## RID GroupAssignment Age_Yrs Year_of_Study Live_Pets Consumer_BARK Stage ## 1 1 Control 21 3 2 1 post ## 2 2 Direct 19 1 2 1 post ## 3 3 Indirect 18 1 2 1 post ## 4 4 Control 18 1 2 1 post ## 5 5 Direct 19 1 2 1 post ## 6 6 Indirect 20 2 2 1 post ## PANAS_PA SHS SCS Engagement FS Stress Homesick Lonely PANAS_NA ## 1 3.8 3.333333 3.800000 4 6.000 2 3 1.70 1.2 ## 2 3.2 4.000000 5.263158 2 6.000 1 2 1.60 1.0 ## 3 3.0 3.333333 4.150000 3 5.375 3 2 2.25 1.6 ## 4 3.8 3.000000 5.100000 3 6.375 2 1 2.05 1.6 ## 5 4.0 2.666667 3.600000 4 5.875 4 2 2.70 1.6 ## 6 4.4 3.333333 4.650000 2 5.625 4 3 2.40 1.4 ## Diff Group ## 1 0 Control ## 2 -1 Direct ## 3 -1 Indirect ## 4 0 Control ## 5 1 Direct ## 6 0 Indirect Compute ANOVA Manually # Compute group means and overall mean group_means <- dog_data %>% group_by(GroupAssignment) %>% summarize(MeanDiff = mean(Diff)) overall_mean <- mean(dog_data$Diff) # Sum of Squares ssb <- sum(table(dog_data$GroupAssignment) * (group_means$MeanDiff - overall_mean)ˆ2) 1 ssw <- sum((dog_data$Diff - dog_data$GroupAssignment %>% map_dbl(~group_means$MeanDiff[match(., group_means$GroupAssignment)]))ˆ2) df_between <- n_distinct(dog_data$GroupAssignment) - 1 df_within <- nrow(dog_data) - n_distinct(dog_data$GroupAssignment) msb <- ssb / df_between msw <- ssw / df_within f_statistic <- msb / msw p_value <- pf(f_statistic, df_between, df_within, lower.tail = FALSE) anova_manual <- data.frame(Source = c("Between Groups", "Within Groups"), SS = c(ssb, ssw), DF = c(df_between, df_within), MS = c(msb, msw), F = c(f_statistic, NA)) kable(anova_manual) Source Between Groups Within Groups SS DF MS F 33.05879 224.16305 2 281 16.5293925 0.7977333 20.72045 NA Compute ANOVA using aov() model <- aov(Diff ~ GroupAssignment, data=dog_data) summary(model) ## Df Sum Sq Mean Sq F value Pr(>F) ## GroupAssignment 2 33.06 16.529 20.72 4.04e-09 *** ## Residuals 281 224.16 0.798 ## --## Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1 Part 2: Exercises 3.9 & 3.18 Exercise 3.9: Portland Cement Tensile Strength cement_data <- data.frame( Mixing_Technique = rep(1:4, each=4), Tensile_Strength = c(3129, 3000, 2865, 2890, 3200, 3300, 2975, 3150, 2800, 2900, 2985, 3050, 2600, 270 ) cement_model <- aov(Tensile_Strength ~ factor(Mixing_Technique), data=cement_data) summary(cement_model) ## Df Sum Sq Mean Sq F value Pr(>F) ## factor(Mixing_Technique) 3 489740 163247 12.73 0.000489 *** ## Residuals 12 153908 12826 ## --## Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1 2 Graphical Display for Cement Strength ggplot(cement_data, aes(x=factor(Mixing_Technique), y=Tensile_Strength)) + geom_boxplot() + ggtitle("Boxplot of Tensile Strength by Mixing Technique") Boxplot of Tensile Strength by Mixing Technique Tensile_Strength 3200 3000 2800 2600 1 2 3 4 factor(Mixing_Technique) Exercise 3.18: Brick Density vs. Firing Temperature brick_data <- data.frame( Temperature = rep(c(100, 125, 150, 175), each=5), Density = c(21.8, 21.9, 21.7, 21.6, 21.7, 21.7, 21.4, 21.5, 21.4, 21.4, 21.9, 21.8, 21.8, 21.6, 21.5, ) brick_model <- aov(Density ~ factor(Temperature), data=brick_data) summary(brick_model) ## Df Sum Sq Mean Sq F value Pr(>F) ## factor(Temperature) 3 0.2095 0.06983 2.539 0.0931 . ## Residuals 16 0.4400 0.02750 ## --## Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1 3 Graphical Display for Brick Density ggplot(brick_data, aes(x=factor(Temperature), y=Density)) + geom_boxplot() + ggtitle("Boxplot of Brick Density by Firing Temperature") Boxplot of Brick Density by Firing Temperature 21.9 21.8 Density 21.7 21.6 21.5 21.4 100 125 150 factor(Temperature) Residual Analysis par(mfrow=c(2,2)) plot(cement_model, which=1:2) # Residuals vs Fitted, QQ Plot plot(brick_model, which=1:2) 4 175 0 7 2700 2900 3100 6 7 −2 −1 1 2 20 19 21.70 Fitted values Q−Q Residuals 1 16 −1 0.1 16 −0.3 Residuals Residuals vs Fitted 21.60 0 Theoretical Quantiles Standardized residuals Fitted values 21.50 1 1 6 Q−Q Residuals −1 Standardized residuals 200 1 −200 Residuals Residuals vs Fitted 19 −2 20 −1 0 1 2 Theoretical Quantiles Normality Test shapiro.test(resid(cement_model)) ## ## Shapiro-Wilk normality test ## ## data: resid(cement_model) ## W = 0.97046, p-value = 0.846 shapiro.test(resid(brick_model)) ## ## Shapiro-Wilk normality test ## ## data: resid(brick_model) ## W = 0.95823, p-value = 0.5091 Conclusion The ANOVA results indicate that mixing technique significantly affects tensile strength, while firing temperature does not significantly affect brick density at α = 0.05. The residual plots suggest that assumptions are mostly met, and Fisher’s LSD test could be used for further post-hoc comparisons. 5