## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE, message = FALSE, warning = FALSE ) suppressPackageStartupMessages(library(systemfonts)) suppressPackageStartupMessages(library(textshaping)) ## ----setup-------------------------------------------------------------------- # library(educabR) # library(dplyr) # library(tidyr) # library(ggplot2) ## ----download----------------------------------------------------------------- # # WARNING: each year downloads ~600 MB from INEP. This may take several minutes. # years <- c(2019, 2021, 2022, 2023) # # enem <- # years |> # purrr::map(\(y) get_enem(year = y, n_max = 50000) |> mutate(year = y)) |> # bind_rows() ## ----math-by-state------------------------------------------------------------ # scores_by_state <- # enem |> # filter(!is.na(nu_nota_mt), !is.na(sg_uf_prova)) |> # summarise( # mean_math = mean(nu_nota_mt, na.rm = TRUE), # n = n(), # .by = c(sg_uf_prova, year) # ) # # # Top 5 states in the most recent year # top_states <- # scores_by_state |> # filter(year == max(year)) |> # slice_max(mean_math, n = 5) |> # pull(sg_uf_prova) # # scores_by_state |> # filter(sg_uf_prova %in% top_states) |> # ggplot(aes(x = factor(year), y = mean_math, color = sg_uf_prova, group = sg_uf_prova)) + # geom_line(linewidth = 1) + # geom_point(size = 2) + # labs( # title = "ENEM Math Score Evolution - Top 5 States", # x = "Year", # y = "Average Math Score", # color = "State" # ) + # theme_minimal() ## ----region-gap--------------------------------------------------------------- # region_map <- c( # AC = "North", AP = "North", AM = "North", PA = "North", # RO = "North", RR = "North", TO = "North", # AL = "Northeast", BA = "Northeast", CE = "Northeast", # MA = "Northeast", PB = "Northeast", PE = "Northeast", # PI = "Northeast", RN = "Northeast", SE = "Northeast", # DF = "Midwest", GO = "Midwest", MT = "Midwest", MS = "Midwest", # ES = "Southeast", MG = "Southeast", RJ = "Southeast", SP = "Southeast", # PR = "South", RS = "South", SC = "South" # ) # # enem |> # filter(!is.na(nu_nota_mt), !is.na(sg_uf_prova)) |> # mutate(region = region_map[sg_uf_prova]) |> # summarise( # mean_math = mean(nu_nota_mt, na.rm = TRUE), # .by = c(region, year) # ) |> # ggplot(aes(x = factor(year), y = mean_math, color = region, group = region)) + # geom_line(linewidth = 1) + # geom_point(size = 2) + # labs( # title = "ENEM Math Score by Region", # x = "Year", # y = "Average Math Score", # color = "Region" # ) + # theme_minimal() ## ----all-scores--------------------------------------------------------------- # enem |> # filter(!is.na(sg_uf_prova)) |> # summarise( # math = mean(nu_nota_mt, na.rm = TRUE), # languages = mean(nu_nota_lc, na.rm = TRUE), # humanities = mean(nu_nota_ch, na.rm = TRUE), # sciences = mean(nu_nota_cn, na.rm = TRUE), # essay = mean(nu_nota_redacao, na.rm = TRUE), # .by = year # ) |> # pivot_longer(-year, names_to = "subject", values_to = "mean_score") |> # ggplot(aes(x = factor(year), y = mean_score, color = subject, group = subject)) + # geom_line(linewidth = 1) + # geom_point(size = 2) + # labs( # title = "National Average ENEM Scores by Subject", # x = "Year", # y = "Average Score", # color = "Subject" # ) + # theme_minimal()