## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE, message = FALSE, warning = FALSE ) suppressPackageStartupMessages(library(systemfonts)) suppressPackageStartupMessages(library(textshaping)) ## ----setup-------------------------------------------------------------------- # library(educabR) # library(dplyr) # library(tidyr) # library(ggplot2) ## ----download----------------------------------------------------------------- # # Download all schools for 2023 # escolas <- get_censo_escolar(year = 2023) # # # Or filter by state for faster exploration # escolas_sp <- get_censo_escolar(year = 2023, uf = "SP") ## ----overview----------------------------------------------------------------- # indicators <- c( # "in_internet", # "in_banda_larga", # "in_biblioteca", # "in_laboratorio_informatica", # "in_laboratorio_ciencias", # "in_quadra_esportes", # "in_agua_potavel", # "in_esgoto_rede_publica" # ) # # infra_summary <- # escolas |> # summarise(across(all_of(indicators), ~ mean(. == 1, na.rm = TRUE) * 100)) |> # pivot_longer(everything(), names_to = "indicator", values_to = "pct") |> # mutate( # label = c( # "Internet", "Broadband", "Library", "Computer lab", # "Science lab", "Sports court", "Drinking water", "Public sewage" # ) # ) # # ggplot(infra_summary, aes(x = reorder(label, pct), y = pct)) + # geom_col(fill = "steelblue") + # coord_flip() + # labs( # title = "Percentage of Schools with Key Infrastructure (2023)", # x = NULL, # y = "% of schools" # ) + # theme_minimal() ## ----by-admin----------------------------------------------------------------- # admin_labels <- c( # "1" = "Federal", # "2" = "State", # "3" = "Municipal", # "4" = "Private" # ) # # infra_admin <- # escolas |> # mutate(admin = admin_labels[as.character(tp_dependencia)]) |> # group_by(admin) |> # summarise( # Internet = mean(in_internet == 1, na.rm = TRUE) * 100, # Library = mean(in_biblioteca == 1, na.rm = TRUE) * 100, # `Computer lab` = mean(in_laboratorio_informatica == 1, na.rm = TRUE) * 100, # `Science lab` = mean(in_laboratorio_ciencias == 1, na.rm = TRUE) * 100, # .groups = "drop" # ) |> # pivot_longer(-admin, names_to = "resource", values_to = "pct") # # ggplot(infra_admin, aes(x = resource, y = pct, fill = admin)) + # geom_col(position = "dodge") + # labs( # title = "School Infrastructure by Administrative Type (2023)", # x = NULL, # y = "% of schools", # fill = "Type" # ) + # theme_minimal() + # theme(axis.text.x = element_text(angle = 20, hjust = 1)) ## ----by-region---------------------------------------------------------------- # region_labels <- c( # "Norte" = "North", # "Nordeste" = "Northeast", # "Sudeste" = "Southeast", # "Sul" = "South", # "Centro-Oeste" = "Central-West" # ) # # infra_region <- # escolas |> # mutate(region = region_labels[no_regiao]) |> # group_by(region) |> # summarise( # Internet = mean(in_internet == 1, na.rm = TRUE) * 100, # Library = mean(in_biblioteca == 1, na.rm = TRUE) * 100, # `Science lab` = mean(in_laboratorio_ciencias == 1, na.rm = TRUE) * 100, # `Sports court` = mean(in_quadra_esportes == 1, na.rm = TRUE) * 100, # .groups = "drop" # ) |> # pivot_longer(-region, names_to = "resource", values_to = "pct") # # ggplot(infra_region, aes(x = region, y = pct, fill = resource)) + # geom_col(position = "dodge") + # labs( # title = "School Infrastructure by Region (2023)", # x = NULL, # y = "% of schools", # fill = NULL # ) + # theme_minimal() ## ----urban-rural-------------------------------------------------------------- # infra_location <- # escolas |> # mutate( # location = ifelse(tp_localizacao == 1, "Urban", "Rural") # ) |> # group_by(location) |> # summarise( # Internet = mean(in_internet == 1, na.rm = TRUE) * 100, # Broadband = mean(in_banda_larga == 1, na.rm = TRUE) * 100, # Library = mean(in_biblioteca == 1, na.rm = TRUE) * 100, # `Computer lab` = mean(in_laboratorio_informatica == 1, na.rm = TRUE) * 100, # `Science lab` = mean(in_laboratorio_ciencias == 1, na.rm = TRUE) * 100, # `Sports court` = mean(in_quadra_esportes == 1, na.rm = TRUE) * 100, # .groups = "drop" # ) |> # pivot_longer(-location, names_to = "resource", values_to = "pct") # # ggplot(infra_location, aes(x = resource, y = pct, fill = location)) + # geom_col(position = "dodge") + # scale_fill_manual(values = c("Urban" = "steelblue", "Rural" = "coral")) + # labs( # title = "School Infrastructure: Urban vs Rural (2023)", # x = NULL, # y = "% of schools", # fill = NULL # ) + # theme_minimal() + # theme(axis.text.x = element_text(angle = 20, hjust = 1)) ## ----accessibility------------------------------------------------------------ # access_cols <- c( # "in_acessibilidade_rampas", # "in_acessibilidade_corrimao", # "in_acessibilidade_elevador", # "in_acessibilidade_pisos_tateis", # "in_acessibilidade_sinal_sonoro", # "in_acessibilidade_sinal_tatil", # "in_acessibilidade_sinal_visual", # "in_acessibilidade_inexistente" # ) # # access_labels <- c( # "Ramps", "Handrails", "Elevator", "Tactile floors", # "Sound signals", "Tactile signals", "Visual signals", "None" # ) # # access_summary <- # escolas |> # summarise(across(all_of(access_cols), ~ mean(. == 1, na.rm = TRUE) * 100)) |> # pivot_longer(everything(), names_to = "feature", values_to = "pct") |> # mutate(label = access_labels) # # ggplot(access_summary, aes(x = reorder(label, pct), y = pct)) + # geom_col(fill = "#2a9d8f") + # coord_flip() + # labs( # title = "School Accessibility Features (2023)", # x = NULL, # y = "% of schools" # ) + # theme_minimal() ## ----internet-by-state-------------------------------------------------------- # internet_uf <- # escolas |> # group_by(sg_uf) |> # summarise( # pct_internet = mean(in_internet == 1, na.rm = TRUE) * 100, # .groups = "drop" # ) |> # arrange(pct_internet) # # ggplot(internet_uf, aes(x = reorder(sg_uf, pct_internet), y = pct_internet)) + # geom_col(fill = "steelblue") + # coord_flip() + # labs( # title = "Schools with Internet Access by State (2023)", # x = NULL, # y = "% of schools" # ) + # theme_minimal()