Lab Assignment 5 Sanjay D PROBABILITY AND STAISTICS LAB L3+L4 #LARGE SAMPLE TEST #Prob1 #H0:mu=100000, H1: mu>10000 xbar=9900 mu0=10000 sigma=120 n=30 z=(xbar-mu0)/(sigma/sqrt(n)) z ## [1] -4.564355 alpha=0.05 z.alpha=qnorm(1-alpha) z.alpha ## [1] 1.644854 #The absolute value(4.564) is greater than the critical value(1.644), hence H0 is rejected. #we accept the claim that mean lifetime of a light bulb is above 10,000 hours. #Prob2 #H0: mu=2, H1: mu<=2 xbar=2.1 mu0=2 sigma=0.25 n=35 z=(xbar-mu0)/(sigma/sqrt(n)) z ## [1] 2.366432 alpha=0.05 z.alpha=qnorm(1-alpha) z.alpha ## [1] 1.644854 #The test statistic 2.3664 is greater than the critical value of 1.6449. #We accept the claim that there is at most 2 grams of saturated fat in a cookie. #Prob3 #H0: mu=15.4, H1: mu not equal to 4 xbar=14.6 mu0=15.4 sigma=2.5 n=35 z=(xbar-mu0)/(sigma/sqrt(n)) z ## [1] -1.893146 alpha=0.05 z.alpha=qnorm(1-alpha) z.alpha ## [1] 1.644854 pval=2*pnorm(z) pval ## [1] 0.05833852 #SMALL SAMPLE TEST #Prob1 #H0: mu=0.3, H1: mu>0.3 x=c(0.593,0.142,0.329, 0.691, 0.231, 0.793, 0.519, 0.392, 0.418) t.test(x,alternative = "greater",mu=0.3) ## ## One Sample t-test ## ## data: x ## t = 2.2051, df = 8, p-value = 0.02927 ## alternative hypothesis: true mean is greater than 0.3 ## 95 percent confidence interval: ## 0.3245133 Inf ## sample estimates: ## mean of x ## 0.4564444 #From the output we see that the p-value is less than 0.05, hence we reject H0 #Hence, there is moderately strong evidence that the mean Salmonella level in the ice cream is above 0.3MPN/g #Prob2 #H0:there is no significant difference, H1:There is significant difference x=c(65, 78, 88, 55, 48, 95, 66, 57, 79, 81) t.test(x,mu=75) ## ## One Sample t-test ## ## data: x ## t = -0.78303, df = 9, p-value = 0.4537 ## alternative hypothesis: true mean is not equal to 75 ## 95 percent confidence interval: ## 60.22187 82.17813 ## sample estimates: ## mean of x ## 71.2 #the p-value with a significance level of 95%. If p-value is lesser than 0.05 hence we reject H0. #Prob3 #H0:there is no significant difference, H1:There is significant difference x=c(175,168,168,190,156,181,182,175,174,179) y=c(185,169,173,173,188,186,175,174,179,180) t.test(x,y) ## ## Welch Two Sample t-test ## ## data: x and y ## t = -0.94737, df = 15.981, p-value = 0.3576 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -11.008795 4.208795 ## sample estimates: ## mean of x mean of y ## 174.8 178.2 #The p-value > 0.05, we conclude that the means of the two groups are significantly similar. We accept H0. #Prob4 #H0:there is no significant difference, H1:There is significant difference x=c(15,12,13,79,8,21,9,14,8) y=c(15,14,12,8,14,7,16,10,15,2) t.test(x,y,alternative = "less") ## ## Welch Two Sample t-test ## ## data: x and y ## t = 1.1231, df = 8.5739, p-value = 0.8541 ## alternative hypothesis: true difference in means is less than 0 ## 95 percent confidence interval: ## -Inf 22.68686 ## sample estimates: ## mean of x mean of y ## 19.88889 11.30000 #P value (0.3002) > 0.05, we accept H0. #Prob5 #H0:there is no significant improvement to the team of athletes. H1: there is significant improvement x=c(12.9,13.5,12.8,15.6,17.2,19.2,12.6,15.3,14.4,11.3) y=c(12.7,13.6,12.0,15.2,16.8,20.0,12.0,15.9,16.0,11.1) t.test(x,y,paired = TRUE) ## ## Paired t-test ## ## data: x and y ## t = -0.21331, df = 9, p-value = 0.8358 ## alternative hypothesis: true mean difference is not equal to 0 ## 95 percent confidence interval: ## -0.5802549 0.4802549 ## sample estimates: ## mean difference ## -0.05 #The p-value is greater than 0.05, then we accept H0 #The new training has not made any significant improvement to the team of athletes. #Prob6 #H0:there is no significant improvement to the team of athletes. H1: there is significant improvement x=c(12.9,13.5,12.8,15.6,17.2,19.2,12.6,15.3,14.4,11.3) y=c(12.0,12.2,11.2,13.0,15.0,15.8,12.2,13.4,12.9,11.0) t.test(x,y,paired = TRUE, alternative = "less") ## ## Paired t-test ## ## data: x and y ## t = 5.2671, df = 9, p-value = 0.9997 ## alternative hypothesis: true mean difference is less than 0 ## 95 percent confidence interval: ## -Inf 2.170325 ## sample estimates: ## mean difference ## 1.61 #The p value is greater than 0.05, hence we reject H0 #Prob7 #H0:drug doesnt lower cholestrol, H1:drug lowers cholestrol x=c(237,289,257,228,303,275,262,304,244,233) y=c(194,240,230,186,265,222,242,281,240,212) t.test(x,y,paired=TRUE,alternative = "greater",mu=0) ## ## Paired t-test ## ## data: x and y ## t = 6.5594, df = 9, p-value = 5.202e-05 ## alternative hypothesis: true mean difference is greater than 0 ## 95 percent confidence interval: ## 23.05711 Inf ## sample estimates: ## mean difference ## 32 #We can reject H0 and support the claim because the P-value is less than 0.05 #Prob8 #H0:there is no significant difference, H1:There is significant difference a=c(14.1,10.1,14.7,13.7,14.0) b=c(14.0,14.5,13.7,12.7,14.1) var.test(a,b) ## ## F test to compare two variances ## ## data: a and b ## F = 7.3304, num df = 4, denom df = 4, p-value = 0.07954 ## alternative hypothesis: true ratio of variances is not equal to 1 ## 95 percent confidence interval: ## 0.7632268 70.4053799 ## sample estimates: ## ratio of variances ## 7.330435 #Here p valuve >0.05 ,then we accept H0. #Practice problem 1 #H0: there is an increased blood pressure x=c(5, 2, 8, -1, 3, 0, -2, 1, 5, 0, 4, 6, 8) t.test(x, mu = 0, alternative = "greater") ## One Sample t-test ## ## data: x ## t = 3.2613, df = 12, p-value = 0.003406 ## alternative hypothesis: true mean is greater than 0 ## 95 percent confidence interval: ## 1.360534 Inf ## sample estimates: ## mean of x ## 3 #The p-value is lessser than 0.05, hence H0 is rejected #There is no proper evidence that the blood pressure is increased #Practice problem 2 #H0:mu=25, H1:mu not equal to 25 x=c(24, 20, 30, 20, 20, 18) t.test(x,mu=25) ## ## One Sample t-test ## ## data: x ## t = -1.6771, df = 5, p-value = 0.1544 ## alternative hypothesis: true mean is not equal to 25 ## 95 percent confidence interval: ## 17.4016 26.5984 ## sample estimates: ## mean of x ## 22 #The p value is greater than 0.01, hence the H0 is accepted #Mu=25 #Practice Problem 3 #H0: mu=4000, H1: mu not equal to 40000 x=c(4.2,4.6,3.9,4.1,5.2,3.8,3.9,4.3,4.4,5.6 ) t.test(x,mu=4) ## ## One Sample t-test ## ## data: x ## t = 2.1483, df = 9, p-value = 0.0602 ## alternative hypothesis: true mean is not equal to 4 ## 95 percent confidence interval: ## 3.978809 4.821191 ## sample estimates: ## mean of x ## 4.4 #The p value is greater than 0.05, hence we accpet H0. #Practice problem 4 #H0:these two treatements have identical effects, H1:they do not have identical effects x=c(2.0,2.7,2.9,1.9,2.1,2.6,2.7,2.9,3.0,2.6,2.6,2.7) y=c(3.2,3.6,3.7,3.5,2.9,2.6,2.5,2.7) t.test(x,y) ## ## Welch Two Sample t-test ## ## data: x and y ## t = -2.6676, df = 12.294, p-value = 0.02014 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -0.96022459 -0.09810875 ## sample estimates: ## mean of x mean of y ## 2.558333 3.087500 #the p-value is lesser than 0.05, hence we reject the H0 #The treatments do not have identical effects #GOODNESS OF FIT #Prob1 #H0: the anxiety level and family type is independent, H1: the anxiety level and family type isnt independent data=matrix(c(35,42,61,48,51,68),ncol=3,byrow=T) data ## [,1] [,2] [,3] ## [1,] 35 42 61 ## [2,] 48 51 68 chisq.test(data) ## ## Pearson's Chi-squared test ## ## data: data ## X-squared = 0.53441, df = 2, p-value = 0.7655 #the p value > 0.05, we accept H0 #Prob2 library(MASS) tbl=table(survey$Smoke, survey$Exer) tbl ## ## Freq None Some ## Heavy 7 1 3 ## Never 87 18 84 ## Occas 12 3 4 ## Regul 9 1 7 ctbl=cbind(tbl[,"Freq"],tbl[,"None"]+tbl[,"Some"]) ctbl ## [,1] [,2] ## Heavy 7 4 ## Never 87 102 ## Occas 12 7 ## Regul 9 8 chisq.test(ctbl) ## ## Pearson's Chi-squared test ## ## data: ctbl ## X-squared = 3.2328, df = 3, p-value = 0.3571 #Prob3 plants=c(20,10,7,4) chisq.test(plants,p=c(9/16,3/16,3/16,1/16)) ## Warning in chisq.test(plants, p = c(9/16, 3/16, 3/16, 1/16)): Chi-squared ## approximation may be incorrect ## ## Chi-squared test for given probabilities ## ## data: plants ## X-squared = 1.9702, df = 3, p-value = 0.5786 #Here the probability value p is greater than alpha level (0.05), so we accept H0. #Prob4 #H0:it fits binomial distribution, H1:it doesn't fit binomial disttribution x=c(5,4,3,2,1,0) n=5 N=320 P=0.5 obf=c(14,56,110,88,40,12) exf=dbinom(x,n,P)*N sum(obf) ## [1] 320 sum(exf) ## [1] 320 chisq=sum((obf-exf)^2/exf) chisq ## [1] 7.16 qchisq(0.95,5) ## [1] 11.0705 #The calculated value is less than the critical value, hence we reject H0 #Prob5 #H0:it fits poisson distribution, H1:it doesn't fit poisson disttribution x=0:6 f=c(275,72,30,7,5,2,1) lambda=(sum(f*x)/sum(f)) expf=dpois(x,lambda)*sum(f) f1=round(expf) sum(f) ## [1] 392 sum(f1) ## [1] 393 obf=c(275,72,30,15) exf=c(242,117,28,6) chisq=sum(((obf-exf)^2)/exf) qchisq(0.95,2) ## [1] 5.991465 #Since the calculated value is greater than critical value, we reject H0. #Prob6 #H0:it fits normal distribution, H1:it doesn't fit normal disttribution midy=seq(17.05,86.5,length=10) f=c(2,10,16,37,43,39,29,13,6,5) mean=sum(f*midy)/sum(f) sd=sqrt(sum(f*midy-mean)^2)/sum(f) l=seq(13.2,82.5,length=10) l=c(1,90.2) cdf=pnorm(l,mean,sd) cdf=c(0,cdf,1) pcf=diff(cdf) f=c(0,f,0) ex=round(pcf*sum(f),4) fr=data.frame(f,ex) obf=c(12,16,37,43,39,29,13,11) exf=c(sum(ex[c(1,2,3)]),ex[c(4:9)],sum(ex[c(10,11,12)])) sum(obf) ## [1] 200 sum(exf) ## [1] NA chisq=sum((obf-exf)^2/exf) chisq ## [1] NA qchisq(0.95,5) ## [1] 11.0705 #The calculated value is less than the critical value, hence we accept H0. Practice Problems 1 A particular brand of tires claims that its deluxe tire averages at least 50,000 milesbefore it needs to be replaced. From past studies of this tire, the standard deviationis known to be 8000. A survey of owners of that tire design is conducted. From the28 tires surveyed, the average lifespan was 46,500 miles with a standard deviationof 9800 miles. Do the data support the claim at the 5% level? # Given data sample_mean <- 46500 # average lifespan from the survey population_mean <- 50000 # claimed average lifespan sample_size <- 28 sample_sd <- 9800 # standard deviation from the survey # Calculate the standard error standard_error <- sample_sd / sqrt(sample_size) # Calculate the t-statistic t_statistic <- (sample_mean - population_mean) / standard_error # Degrees of freedom df <- sample_size - 1 # Calculate the critical t-value for a one-tailed test at alpha = 0.05 critical_t <- qt(0.05, df, lower.tail = FALSE) # Print the t-statistic and critical t-value cat("t-statistic:", t_statistic, "\n") ## t-statistic: -1.889822 cat("Critical t-value:", critical_t, "\n") ## Critical t-value: 1.703288 # Perform the t-test and print the result if (t_statistic < critical_t) { cat("Reject the null hypothesis. The data supports the claim.\n") } else { cat("Fail to reject the null hypothesis. The data does not support the claim.\n") } ## Reject the null hypothesis. The data supports the claim. 2 In the large city A,20 per cent of Random sample of 900 School children had defective eye –sight. In the large city B,15 percent of random sample of 1600 school children had the same defective. Is this Difference between the two Proportions Significant? Obtain 95% confidence limits of the difference in the population proportions. # Given data for City A sample_size_A <- 900 defective_A <- 0.20 * sample_size_A eyesight in City A # Given data for City B sample_size_B <- 1600 defective_B <- 0.15 * sample_size_B eyesight in City B # Proportions p_A <- defective_A / sample_size_A City A p_B <- defective_B / sample_size_B City B # Number of children with defective # Number of children with defective # Proportion of defective eyesight in # Proportion of defective eyesight in # Standard error of the difference in proportions SE_diff <- sqrt((p_A * (1 - p_A)) / sample_size_A + (p_B * (1 - p_B)) / sample_size_B) # Z-score for 95% confidence level z <- qnorm(0.975) # Two-tailed test # Calculate the difference in proportions diff_proportions <- p_A - p_B # Confidence interval for the difference in proportions lower_limit <- diff_proportions - z * SE_diff upper_limit <- diff_proportions + z * SE_diff # Print the results cat("Difference in proportions:", diff_proportions, "\n") ## Difference in proportions: 0.05 cat("95% Confidence Interval for the difference in proportions:", lower_limit, "to", upper_limit, "\n") ## 95% Confidence Interval for the difference in proportions: 0.01855096 to 0.08144904 3 A cigarette manufacturing firm claims its brand A of the cigarettes outsells its brand B by 8%.if its found that 42 out sample of 200 smoker prefer brand A and 18 out of another random sample of 100 smokers prefers brand B, test whether the 8% difference is a valid cliam. # Given data n_A <- 200 # Sample size for brand A n_B <- 100 # Sample size for brand B x_A <- 42 # Number of smokers preferring brand A x_B <- 18 # Number of smokers preferring brand B # Proportions p_A <- x_A / n_A p_B <- x_B / n_B # Proportion of smokers preferring brand A # Proportion of smokers preferring brand B # Null hypothesis: There is no difference in proportions (p_A - p_B = 0.08) # Alternative hypothesis: There is a difference in proportions (p_A - p_B ≠ 0.08) # Standard error of the difference in proportions SE_diff <- sqrt((p_A * (1 - p_A)) / n_A + (p_B * (1 - p_B)) / n_B) # Test statistic (z-score) z <- ((p_A - p_B) - 0.08) / SE_diff # p-value for two-tailed test p_value <- 2 * pnorm(-abs(z)) # Print the test statistic and p-value cat("Test Statistic (z):", z, "\n") ## Test Statistic (z): -1.041328 cat("p-value:", p_value, "\n") ## p-value: 0.2977235 # Test the hypothesis at a significance level of 0.05 if (p_value < 0.05) { cat("Reject the null hypothesis. There is evidence to suggest that the claim is not valid.\n") } else { cat("Fail to reject the null hypothesis. There is not enough evidence to reject the claim.\n") } ## Fail to reject the null hypothesis. There is not enough evidence to reject the claim. 4 The average number of sick days an employee takes per year is believed to be about 10. Members of a personnel department do not believe this figure. They randomly survey 8employees. The number of sick days they took for the past year are as follows: 12; 4; 15; 3; 11; 8; 6; 8. Let X = the number of sick days they took for the past year. Should thepersonnel team believe that the average number is about 10? # Given data sick_days <- c(12, 4, 15, 3, 11, 8, 6, 8) employees # Number of sick days taken by 8 # Calculate the sample mean sample_mean <- mean(sick_days) # Null hypothesis: The average number of sick days is 10 # Alternative hypothesis: The average number of sick days is not 10 # Conduct a one-sample t-test t_test_result <- t.test(sick_days, mu = 10) # Print the test result print(t_test_result) ## ## One Sample t-test ## ## data: sick_days ## t = -1.12, df = 7, p-value = 0.2996 ## alternative hypothesis: true mean is not equal to 10 ## 95 percent confidence interval: ## 4.94433 11.80567 ## sample estimates: ## mean of x ## 8.375 5 The mean life time of a sample of 400 fluorescent light bulbsproduced by a company is found to be 1, 570 hours with a standarddeviation of 150 hours. Test the hypothesis that the mean life time ofbulbs is 1600 hours against the alternative hypothesis that it is greaterthan 1, 600 hours at 1% and 5% level of significance # Given data sample_mean <- 1570 # Sample mean sample_sd <- 150 # Sample standard deviation sample_size <- 400 # Sample size population_mean <- 1600 # Hypothesized population mean # Generate sample data sample_data <- rnorm(sample_size, mean = sample_mean, sd = sample_sd) # Conduct a one-sample t-test for 1% level of significance t_test_result_1 <- t.test(sample_data, mu = population_mean, alternative = "greater", conf.level = 0.99) # Print the test result for 1% level of significance print(t_test_result_1) ## ## One Sample t-test ## ## data: sample_data ## t = -5.3219, df = 399, p-value = 1 ## alternative hypothesis: true mean is greater than 1600 ## 99 percent confidence interval: ## 1540.07 Inf ## sample estimates: ## mean of x ## 1558.35 # Conduct a one-sample t-test for 5% level of significance t_test_result_5 <- t.test(sample_data, mu = population_mean, alternative = "greater", conf.level = 0.95) # Print the test result for 5% level of significance print(t_test_result_5) ## ## One Sample t-test ## ## data: sample_data ## t = -5.3219, df = 399, p-value = 1 ## alternative hypothesis: true mean is greater than 1600 ## 95 percent confidence interval: ## 1545.447 Inf ## sample estimates: ## mean of x ## 1558.35 6 A certain stimulus administered to each of the 13 patients resulted in the following increase of blood pressure: 5, 2, 8,-1, 3, 0, -2, 1, 5, 0, 4, 6, 8. Can it be concluded that the stimulus, in general, be accompanied by an increase in the blood pressure. # Given data increase <- c(5, 2, 8, -1, 3, 0, -2, 1, 5, 0, 4, 6, 8) pressure for each patient # Increase in blood # Null hypothesis: The mean increase in blood pressure is zero (mu = 0) # Alternative hypothesis: The mean increase in blood pressure is greater than zero (mu > 0) # Conduct a one-sample t-test t_test_result <- t.test(increase, alternative = "greater") # Print the test result print(t_test_result) ## ## One Sample t-test ## ## data: increase ## t = 3.2613, df = 12, p-value = 0.003406 ## alternative hypothesis: true mean is greater than 0 ## 95 percent confidence interval: ## 1.360534 Inf ## sample estimates: ## mean of x ## 3 7 The manufacturer of a certain make of electric bulbs claims that his bulbs have a mean life of 25 months with a standard deviation of 5 months. Random samples of 6 such bulbs have the following values: Life of bulbs in months: 24, 20, 30, 20, 20, and 18. Can you regard the producer’s claim to valid at 1% level of significance # Given data sample <- c(24, 20, 30, 20, 20, 18) # Life of bulbs in months sample_mean <- mean(sample) # Sample mean sample_sd <- sd(sample) # Sample standard deviation sample_size <- length(sample) # Sample size population_mean <- 25 # Claimed population mean # Null hypothesis: The mean life of bulbs is 25 months (mu = 25) # Alternative hypothesis: The mean life of bulbs is not 25 months (two-tailed test) # Conduct a one-sample t-test t_test_result <- t.test(sample, mu = population_mean, alternative = "two.sided", conf.level = 0.99) # Print the test result print(t_test_result) ## ## One Sample t-test ## ## data: sample ## t = -1.6771, df = 5, p-value = 0.1544 ## alternative hypothesis: true mean is not equal to 25 ## 99 percent confidence interval: ## 14.78708 29.21292 ## sample estimates: ## mean of x ## 22 8 The life time of electric bulbs for a random sample of 10 from a large consignment gave the following data: 4.2, 4.6, 3.9, 4.1, 5.2, 3.8, 3.9, 4.3, 4.4, 5.6 (in ’000 hours). Can we accept the hypothesis that the average life time of bulbs is 4, 000 hours # Given data sample <- c(4.2, 4.6, 3.9, 4.1, 5.2, 3.8, 3.9, 4.3, 4.4, 5.6) bulbs in '000 hours' # Lifetime of # Calculate sample statistics sample_mean <- mean(sample) # Sample mean sample_sd <- sd(sample) # Sample standard deviation sample_size <- length(sample) # Sample size population_mean <- 4 # Hypothesized population mean (in '000 hours') # Null hypothesis: The average lifetime of bulbs is 4,000 hours (mu = 4) # Alternative hypothesis: The average lifetime of bulbs is not 4,000 hours (two-tailed test) # Conduct a one-sample t-test t_test_result <- t.test(sample, mu = population_mean, alternative = "two.sided") # Print the test result print(t_test_result) ## ## One Sample t-test ## ## data: sample ## t = 2.1483, df = 9, p-value = 0.0602 ## alternative hypothesis: true mean is not equal to 4 ## 95 percent confidence interval: ## 3.978809 4.821191 ## sample estimates: ## mean of x ## 4.4 a=c(2,2.7,2.9,1.9,2.1,2.6,2.7,2.9,3.0,2.6,2.6,2.7) b=c(3.2,3.6,3.7,3.5,2.9,2.6,2.5,2.7) u=var.test(a,b) u ## ## F test to compare two variances ## ## data: a and b ## F = 0.58045, num df = 11, denom df = 7, p-value = 0.4033 ## alternative hypothesis: true ratio of variances is not equal to 1 ## 95 percent confidence interval: ## 0.1232526 2.1817180 ## sample estimates: ## ratio of variances ## 0.5804544 9 The following data come from a hypothetical survey of 920 people (Men, Women) that ask for their preference of one of the three ice cream flavors (Chocolate, Vanilla, Strawberry). Is there any association between gender and preference for ice cream flavor? # Given data men <- c(100, 120, 20) strawberry women <- c(350, 320, 150) strawberry # Men's preference for chocolate, vanilla, # Women's preference for chocolate, vanilla, # Combine the data into a matrix ice_cream_data <- rbind(men, women) # Perform chi-square test for independence chi_square_test <- chisq.test(ice_cream_data) # Print the test result print(chi_square_test) ## ## Pearson's Chi-squared test ## ## data: ice_cream_data ## X-squared = 16.916, df = 2, p-value = 0.0002122 10 As a part of quality improvement project focused on a delivery of mail at a department office within a large company, data were gathered on the number of different addresses that had to be changed so that the mail could be redirected to thee correct mail stop. Table shows the frequency distribution. Fit binomial distribution and test goodness of fit # Given data x <- 0:4 f_observed <- c(5, 20, 45, 20, 10) # Total number of trials (sample size) n <- sum(f_observed) # Estimate the probability of success (p) p_estimate <- sum(x * f_observed) / n # Ensure that the probability estimate is within the valid range [0, 1] p_estimate <- pmin(p_estimate, 0.99) # Set maximum value to 0.99 to avoid numerical instability p_estimate <- pmax(p_estimate, 0.01) # Set minimum value to 0.01 to avoid numerical instability # Calculate the expected frequencies using the binomial distribution formula f_expected <- dbinom(x, size = max(x), prob = p_estimate) * n # Normalize the expected frequencies so that they sum up to 1 f_expected <- f_expected / sum(f_expected) # Perform goodness-of-fit test goodness_of_fit_test <- chisq.test(f_observed, p = f_expected) ## Warning in chisq.test(f_observed, p = f_expected): Chi-squared approximation ## may be incorrect # Print the test result print(goodness_of_fit_test) ## ## Chi-squared test for given probabilities ## ## data: f_observed ## X-squared = 26044540, df = 4, p-value < 2.2e-16