Uploaded by Jialin Huang

ANOVA Analysis: Dog Data, Cement & Brick Experiments

advertisement
Lab4
Jialin Huang
2025-02-05
Part 1: ANOVA on Dog Data
Load Dataset
dog_data <- read.csv("dog_data_post.csv")
head(dog_data)
##
RID GroupAssignment Age_Yrs Year_of_Study Live_Pets Consumer_BARK Stage
## 1
1
Control
21
3
2
1 post
## 2
2
Direct
19
1
2
1 post
## 3
3
Indirect
18
1
2
1 post
## 4
4
Control
18
1
2
1 post
## 5
5
Direct
19
1
2
1 post
## 6
6
Indirect
20
2
2
1 post
##
PANAS_PA
SHS
SCS Engagement
FS Stress Homesick Lonely PANAS_NA
## 1
3.8 3.333333 3.800000
4 6.000
2
3
1.70
1.2
## 2
3.2 4.000000 5.263158
2 6.000
1
2
1.60
1.0
## 3
3.0 3.333333 4.150000
3 5.375
3
2
2.25
1.6
## 4
3.8 3.000000 5.100000
3 6.375
2
1
2.05
1.6
## 5
4.0 2.666667 3.600000
4 5.875
4
2
2.70
1.6
## 6
4.4 3.333333 4.650000
2 5.625
4
3
2.40
1.4
##
Diff
Group
## 1
0 Control
## 2
-1
Direct
## 3
-1 Indirect
## 4
0 Control
## 5
1
Direct
## 6
0 Indirect
Compute ANOVA Manually
# Compute group means and overall mean
group_means <- dog_data %>% group_by(GroupAssignment) %>% summarize(MeanDiff = mean(Diff))
overall_mean <- mean(dog_data$Diff)
# Sum of Squares
ssb <- sum(table(dog_data$GroupAssignment) * (group_means$MeanDiff - overall_mean)ˆ2)
1
ssw <- sum((dog_data$Diff - dog_data$GroupAssignment %>%
map_dbl(~group_means$MeanDiff[match(., group_means$GroupAssignment)]))ˆ2)
df_between <- n_distinct(dog_data$GroupAssignment) - 1
df_within <- nrow(dog_data) - n_distinct(dog_data$GroupAssignment)
msb <- ssb / df_between
msw <- ssw / df_within
f_statistic <- msb / msw
p_value <- pf(f_statistic, df_between, df_within, lower.tail = FALSE)
anova_manual <- data.frame(Source = c("Between Groups", "Within Groups"),
SS = c(ssb, ssw), DF = c(df_between, df_within),
MS = c(msb, msw), F = c(f_statistic, NA))
kable(anova_manual)
Source
Between Groups
Within Groups
SS
DF
MS
F
33.05879
224.16305
2
281
16.5293925
0.7977333
20.72045
NA
Compute ANOVA using aov()
model <- aov(Diff ~ GroupAssignment, data=dog_data)
summary(model)
##
Df Sum Sq Mean Sq F value
Pr(>F)
## GroupAssignment
2 33.06 16.529
20.72 4.04e-09 ***
## Residuals
281 224.16
0.798
## --## Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1
Part 2: Exercises 3.9 & 3.18
Exercise 3.9: Portland Cement Tensile Strength
cement_data <- data.frame(
Mixing_Technique = rep(1:4, each=4),
Tensile_Strength = c(3129, 3000, 2865, 2890, 3200, 3300, 2975, 3150, 2800, 2900, 2985, 3050, 2600, 270
)
cement_model <- aov(Tensile_Strength ~ factor(Mixing_Technique), data=cement_data)
summary(cement_model)
##
Df Sum Sq Mean Sq F value
Pr(>F)
## factor(Mixing_Technique) 3 489740 163247
12.73 0.000489 ***
## Residuals
12 153908
12826
## --## Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1
2
Graphical Display for Cement Strength
ggplot(cement_data, aes(x=factor(Mixing_Technique), y=Tensile_Strength)) +
geom_boxplot() + ggtitle("Boxplot of Tensile Strength by Mixing Technique")
Boxplot of Tensile Strength by Mixing Technique
Tensile_Strength
3200
3000
2800
2600
1
2
3
4
factor(Mixing_Technique)
Exercise 3.18: Brick Density vs. Firing Temperature
brick_data <- data.frame(
Temperature = rep(c(100, 125, 150, 175), each=5),
Density = c(21.8, 21.9, 21.7, 21.6, 21.7, 21.7, 21.4, 21.5, 21.4, 21.4, 21.9, 21.8, 21.8, 21.6, 21.5,
)
brick_model <- aov(Density ~ factor(Temperature), data=brick_data)
summary(brick_model)
##
Df Sum Sq Mean Sq F value Pr(>F)
## factor(Temperature) 3 0.2095 0.06983
2.539 0.0931 .
## Residuals
16 0.4400 0.02750
## --## Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1
3
Graphical Display for Brick Density
ggplot(brick_data, aes(x=factor(Temperature), y=Density)) +
geom_boxplot() + ggtitle("Boxplot of Brick Density by Firing Temperature")
Boxplot of Brick Density by Firing Temperature
21.9
21.8
Density
21.7
21.6
21.5
21.4
100
125
150
factor(Temperature)
Residual Analysis
par(mfrow=c(2,2))
plot(cement_model, which=1:2) # Residuals vs Fitted, QQ Plot
plot(brick_model, which=1:2)
4
175
0
7
2700
2900
3100
6
7
−2
−1
1
2
20
19
21.70
Fitted values
Q−Q Residuals
1
16
−1
0.1
16
−0.3
Residuals
Residuals vs Fitted
21.60
0
Theoretical Quantiles
Standardized residuals
Fitted values
21.50
1
1
6
Q−Q Residuals
−1
Standardized residuals
200
1
−200
Residuals
Residuals vs Fitted
19
−2
20
−1
0
1
2
Theoretical Quantiles
Normality Test
shapiro.test(resid(cement_model))
##
## Shapiro-Wilk normality test
##
## data: resid(cement_model)
## W = 0.97046, p-value = 0.846
shapiro.test(resid(brick_model))
##
## Shapiro-Wilk normality test
##
## data: resid(brick_model)
## W = 0.95823, p-value = 0.5091
Conclusion
The ANOVA results indicate that mixing technique significantly affects tensile strength, while firing
temperature does not significantly affect brick density at α = 0.05. The residual plots suggest that
assumptions are mostly met, and Fisher’s LSD test could be used for further post-hoc comparisons.
5
Download