## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE, message = FALSE, warning = FALSE ) suppressPackageStartupMessages(library(systemfonts)) suppressPackageStartupMessages(library(textshaping)) ## ----setup-------------------------------------------------------------------- # library(educabR) # library(dplyr) # library(ggplot2) ## ----saeb-download------------------------------------------------------------ # # Student performance data # saeb_students <- get_saeb(year = 2023, type = "aluno") # # # School questionnaire # saeb_schools <- get_saeb(year = 2023, type = "escola") # # # Use n_max for exploration # saeb_sample <- get_saeb(year = 2023, type = "aluno", n_max = 5000) ## ----saeb-years--------------------------------------------------------------- # # 2021 data is split by education level # saeb_fund <- get_saeb( # year = 2021, # type = "aluno", # level = "fundamental_medio" # ) # # saeb_infantil <- get_saeb( # year = 2021, # type = "aluno", # level = "educacao_infantil" # ) ## ----saeb-analysis------------------------------------------------------------ # # Explore student scores # saeb_sample <- get_saeb(2023, type = "aluno", n_max = 10000) # # # Score distribution by subject # saeb_sample |> # filter(!is.na(proficiencia_mt)) |> # ggplot(aes(x = proficiencia_mt)) + # geom_histogram(bins = 50, fill = "steelblue", alpha = 0.7) + # labs( # title = "SAEB 2023 - Mathematics Proficiency Distribution", # x = "Mathematics Score", # y = "Count" # ) + # theme_minimal() ## ----encceja-download--------------------------------------------------------- # # Download ENCCEJA microdata # encceja_2023 <- get_encceja(year = 2023) # # # Sample for exploration # encceja_sample <- get_encceja(year = 2023, n_max = 5000) ## ----encceja-structure-------------------------------------------------------- # # Explore the data structure # glimpse(encceja_sample) ## ----encceja-analysis--------------------------------------------------------- # encceja_2023 <- get_encceja(2023, n_max = 50000) # # # Count participants by state # participants_by_state <- # encceja_2023 |> # count(sg_uf_prova, sort = TRUE) |> # head(10) # # ggplot(participants_by_state, aes( # x = reorder(sg_uf_prova, n), # y = n # )) + # geom_col(fill = "darkorange") + # coord_flip() + # labs( # title = "ENCCEJA 2023 - Top 10 States by Participation", # x = "State", # y = "Number of Participants" # ) + # theme_minimal() + # scale_y_continuous(label = scales::number_format(big.mark = ".", decimal.mark = ",")) ## ----enem-escola-download----------------------------------------------------- # # Download all ENEM by School data (2005-2015) # enem_escola <- get_enem_escola() # # # Sample for exploration # enem_escola_sample <- get_enem_escola(n_max = 5000) ## ----enem-escola-structure---------------------------------------------------- # glimpse(enem_escola_sample) ## ----enem-escola-analysis----------------------------------------------------- # enem_escola <- get_enem_escola() # # # Average scores over time (public vs private) # trend <- # enem_escola |> # mutate( # media_geral = rowMeans( # across(c(nu_media_cn, nu_media_ch, nu_media_lp, nu_media_mt, nu_media_red)), # na.rm = FALSE # ) # ) |> # filter(!is.na(media_geral)) |> # group_by(nu_ano, tp_dependencia_adm_escola) |> # summarise( # mean_score = mean(media_geral, na.rm = TRUE), # .groups = "drop" # ) |> # mutate( # admin_type = case_when( # tp_dependencia_adm_escola == 1 ~ "Federal", # tp_dependencia_adm_escola == 2 ~ "State", # tp_dependencia_adm_escola == 3 ~ "Municipal", # tp_dependencia_adm_escola == 4 ~ "Private" # ) # ) # # ggplot(trend, aes(x = nu_ano, y = mean_score, color = admin_type)) + # geom_line(linewidth = 1) + # geom_point(size = 2) + # labs( # title = "ENEM Average Score by School Type (2009-2015)", # x = "Year", # y = "Average Total Score", # color = "School Type" # ) + # theme_minimal()