Question: 1 . Make all the obvious visual interpretation and explain each boxplot in detail. 2 . Do statistical tests between groups that are not very

1. Make all the obvious visual interpretation and explain each boxplot in detail.
2. Do statistical tests between groups that are not very obviously different:
- i.e.2 sample t test between europe and africa, anova for all the groups
- Remember to log transform or remove outliers to make tests more accurate
CODE IN R:-
library(tidyverse)
library(ggplot2)
# Change the file directories!!
country_metadata <- read_csv('country_metadata.csv')
education <- education %>%
pivot_longer(cols =5:68, names_to = 'year')%>%
full_join(country_metadata[1:3])%>%
filter(!is.na(value))%>%
filter(!is.na(Region))%>%
filter(!is.na(IncomeGroup)) # Run this again to filter venezuela
# Log transform to handle outliers
education$log_value <- log(education$value)
education_decade <- education %>%
filter(year %in% c(2013:2022))
# Label data as male or female, filter data pertaining to gender
education_mf <- education %>%
mutate(male_or_female = case_when(
str_detect(`Indicator Name`,' male') ~ 'male',
str_detect(`Indicator Name`, 'female') ~ 'female'
)
)%>%
filter(!is.na(male_or_female))
# Perform 2-sample t-test between Europe and Africa
t_test_result <- t.test(education_mf$value ~ education_mf$Region,
subset(education_mf, Region %in% c("Europe", "Africa")))
# Perform ANOVA for all groups
anova_result <- aov(value ~ Region, data = education_mf)
# Print results
print("2-Sample t-test between Europe and Africa:")
print(t_test_result)
print("ANOVA Test for all groups:")
print(anova_result)
# Indicator: Adjusted net enrollment by income group and region
education_mf %>%
filter(str_detect(`Indicator Name`, 'Adjusted net enrollment rate'))%>%
ggplot()+ geom_boxplot(aes(x = value, fill = IncomeGroup))+
facet_wrap( ~ male_or_female)+
ggtitle('Adjusted net enrollment rate by income group')+
xlab('Adjusted net enrollment %')+
scale_y_continuous(breaks = NULL, labels = NULL)
education_mf %>%
filter(str_detect(`Indicator Name`, 'Adjusted net enrollment rate'))%>%
ggplot()+ geom_boxplot(aes(x = value, fill = Region))+
facet_wrap( ~ male_or_female)+
ggtitle('Adjusted net enrollment rate by region')+
xlab('Adjusted net enrollment (%)')+
scale_y_continuous(breaks = NULL, labels = NULL)
# Indicator: Repeaters by income group and region
education_mf %>%
filter(str_detect(`Indicator Name`, 'Repeaters'))%>%
ggplot()+
geom_boxplot(aes(x = value, fill = IncomeGroup))+
facet_wrap( ~ male_or_female)+
ggtitle('Repeaters by income group')+
xlab('Repeaters (%)')+
scale_y_continuous(breaks = NULL, labels = NULL)
education_mf %>%
filter(str_detect(`Indicator Name`, 'Repeaters'))%>%
ggplot()+
geom_boxplot(aes(x = value, fill = Region))+
facet_wrap( ~ male_or_female)+
ggtitle('Repeaters by region')+
xlab('Repeaters (%)')+
scale_y_continuous(breaks = NULL, labels = NULL)
# Indicator: Trained teachers by income group and region
education_mf %>%
filter(str_detect(`Indicator Name`, 'Trained teachers'))%>%
ggplot()+
geom_boxplot(aes(x = value, fill = IncomeGroup))+
facet_wrap( ~ male_or_female)+
ggtitle('Trained teachers by income group')+
xlab('Trained teachers (%)')+
scale_y_continuous(breaks = NULL, labels = NULL)
education_mf %>%
filter(str_detect(`Indicator Name`, 'Trained teachers'))%>%
ggplot()+
geom_boxplot(aes(x = value, fill = Region))+
facet_wrap( ~ male_or_female)+
ggtitle('Trained teachers by region')+
xlab('Trained teachers (%)')+
scale_y_continuous(breaks = NULL, labels = NULL)
# Indicator: Literacy rate by income group and region
education_mf %>%
filter(str_detect(`Indicator Name`, 'Literacy rate'))%>%
ggplot()+
geom_boxplot(aes(x = value, fill = IncomeGroup))+
facet_wrap( ~ male_or_female)+
ggtitle('Literacy rate by income group')+
xlab('Literacy rate (%)')+
scale_y_continuous(breaks = NULL, labels = NULL)
education_mf %>%
filter(str_detect(`Indicator Name`, 'Literacy rate'))%>%
ggplot()+
geom_boxplot(aes(x = value, fill = Region))+
facet_wrap( ~ male_or_female)+
ggtitle('Literacy rate by region')+
xlab('Literacy rate (%)')+
scale_y_continuous(breaks = NULL, labels = NULL)
# Indicator: Unemployment by income group and region
education_mf %>%
filter(str_detect(`Indicator Name`, 'Unemployment'))%>%
ggplot()+
geom_boxplot(aes(x = value, fill = IncomeGroup))+
facet_wrap( ~ male_or_female)+
ggtitle('Unemployment by income group')+
xlab('Unemployment (%)')+
scale_y_continuous(breaks = NULL, labels = NULL)
education_mf %>%
filter(str_detect(`Indicator Name`, 'Unemployment'))%>%
ggplot()+
geom_boxplot(aes(x = value, fill = Region))+
facet_wrap( ~ male_or_female)+
ggtitle('Unemployment by region')+
xlab('Unemployment (%)')+
scale_y_continuous(breaks = NULL, labels = NULL)

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Accounting Questions!