--- title: "How to compare CPC indicators between public and private institutions?" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{How to compare CPC indicators between public and private institutions?} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE, message = FALSE, warning = FALSE ) suppressPackageStartupMessages(library(systemfonts)) suppressPackageStartupMessages(library(textshaping)) ``` This vignette shows how to use educabR to compare course quality (CPC) between public and private higher education institutions in Brazil. ```{r setup} library(educabR) library(dplyr) library(tidyr) library(ggplot2) ``` ## Downloading CPC data CPC (Conceito Preliminar de Curso) is a quality indicator for undergraduate courses, ranging from 1 (lowest) to 5 (highest). Courses scoring 1 or 2 are flagged for on-site evaluation. ```{r download} cpc <- get_cpc(year = 2023) glimpse(cpc) ``` ``` #> Rows: 9,812 #> Columns: 39 #> $ ano 2023, 2023, 2023, 2023, 2023, … #> $ codigo_da_ies 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, … #> $ nome_da_ies "UNIVERSIDADE FEDERAL DE MATO G… #> $ sigla_da_ies "UFMT", "UFMT", "UFMT", "UFMT"… #> $ categoria_administrativa "Pública Federal", "Pública Fed… #> $ codigo_do_curso 3, 9, 10, 12, 16, 17, 20, 37, … #> $ area_de_avaliacao "ENGENHARIA CIVIL", "AGRONOMIA"… #> $ sigla_da_uf "MT", "MT", "MT", "MT", "MT", "… #> $ cpc_continuo 3.429, 3.482, 3.064, 2.792, 4.… #> $ cpc_faixa 4, 4, 4, 3, 5, 4, 4, 4, 4, 5, … #> # ℹ 29 more variables ``` ## Identifying public vs private institutions The `categoria_administrativa` column classifies institutions. The exact column name and coding may vary by year -- check `names(cpc)` after downloading. ```{r classify} cpc_classified <- cpc |> mutate( sector = case_when( categoria_administrativa %in% c( "Publica Federal", "Publica Estadual", "Publica Municipal", "P\u00fablica Federal", "P\u00fablica Estadual", "P\u00fablica Municipal" ) ~ "Public", .default = "Private" ) ) ``` ## CPC score distribution by sector ```{r distribution} cpc_classified |> filter(!is.na(cpc_faixa)) |> count(sector, cpc_faixa) |> mutate(pct = n / sum(n) * 100, .by = sector) |> ggplot(aes(x = factor(cpc_faixa), y = pct, fill = sector)) + geom_col(position = "dodge") + labs( title = "CPC Score Distribution: Public vs Private (2023)", x = "CPC Score (1-5)", y = "Percentage of Courses (%)", fill = "Sector" ) + theme_minimal() ``` ![](../man/figures/vignette-cpc-distribution.png) ## Average CPC by sector and knowledge area ```{r by-area} cpc_classified |> filter(!is.na(cpc_continuo), !is.na(area_de_avaliacao)) |> summarise( mean_cpc = mean(cpc_continuo, na.rm = TRUE), n = n(), .by = c(sector, area_de_avaliacao) ) |> filter(n >= 10) |> pivot_wider( names_from = sector, values_from = c(mean_cpc, n) ) |> mutate(gap = mean_cpc_Public - mean_cpc_Private) |> slice_max(abs(gap), n = 15) |> ggplot(aes(x = reorder(area_de_avaliacao, gap), y = gap)) + geom_col(aes(fill = gap > 0)) + coord_flip() + scale_fill_manual( values = c("TRUE" = "#2a9d8f", "FALSE" = "#e76f51"), labels = c("TRUE" = "Public higher", "FALSE" = "Private higher") ) + labs( title = "CPC Gap: Public minus Private, by Knowledge Area (2023)", x = NULL, y = "CPC difference (public - private)", fill = NULL ) + theme_minimal() + theme(legend.position = "none") ``` ![](../man/figures/vignette-cpc-gap-area.png) ## Combining with IGC for institutional view IGC (Indice Geral de Cursos) provides an institution-level quality score. Combining CPC and IGC gives a course-level and institution-level perspective. ```{r igc-comparison} igc <- get_igc(year = 2023) igc |> mutate( sector = case_when( categoria_administrativa %in% c( "Publica Federal", "Publica Estadual", "Publica Municipal", "P\u00fablica Federal", "P\u00fablica Estadual", "P\u00fablica Municipal" ) ~ "Public", .default = "Private" ) ) |> filter(!is.na(igc_continuo)) |> ggplot(aes(x = sector, y = igc_continuo, fill = sector)) + geom_boxplot(alpha = 0.7) + scale_fill_manual(values = c("Public" = "#2a9d8f", "Private" = "#e76f51")) + labs( title = "IGC Distribution: Public vs Private (2023)", x = NULL, y = "IGC (Continuous)" ) + theme_minimal() + theme(legend.position = "none") ``` ![](../man/figures/vignette-cpc-igc-boxplot.png)