## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) ## ----eval = FALSE------------------------------------------------------------- # # Install from GitHub # devtools::install_github("silviascarpa/inequantiles") ## ----------------------------------------------------------------------------- library(inequantiles) ## ----------------------------------------------------------------------------- data(synthouse) head(synthouse) ## ----------------------------------------------------------------------------- ### Log-Normal distribution plot_inequality_curve(qfunction = qlnorm, qfun_args = list(meanlog = 9, sdlog = 0.3), col = "tomato", lty = 2, label = "LogN(sigma=0.3)") plot_inequality_curve(qfunction = qlnorm, qfun_args = list(meanlog = 9, sdlog = 1.9), col = "blue", lty = 2, add = TRUE, label = "LogN(sigma=1.9)") plot_inequality_curve(qfunction = qlnorm, qfun_args = list(meanlog = 9, sdlog = 3.9), col = "green", lty = 2, add = TRUE, label = "LogN(sigma=3.9)") ## ----------------------------------------------------------------------------- # Log-normal distribution superpop_qri(qfunction = qlnorm, meanlog = 9, sdlog = 0.2) superpop_qri(qfunction = qlnorm, meanlog = 9, sdlog = 1.7) superpop_qri(qfunction = qlnorm, meanlog = 9, sdlog = 3.2) # Weibull distribution superpop_qri(qfunction = qweibull, shape = 2, scale = 30000) superpop_qri(qfunction = qweibull, shape = 1.5, scale = 30000) ## ----------------------------------------------------------------------------- qri(y = synthouse$eq_income, weights = synthouse$weight, M = 100) ## ----echo=FALSE, message=FALSE, warning=FALSE--------------------------------- library(knitr) library(kableExtra) # Creiamo i dati. # Nota: usiamo il doppio backslash \\ per i simboli LaTeX in R df <- data.frame( Estimator = c("$\\widehat{Q}_4(p)$", "$\\widehat{Q}_5(p)$", "$\\widehat{Q}_6(p)$", "$\\widehat{Q}_7(p)$", "$\\widehat{Q}_8(p)$", "$\\widehat{Q}_9(p)$"), rk = c("$\\frac{W_k}{W_n}$", "$\\frac{W_k-\\frac{1}{2}w_k}{W_n}$", "$\\frac{W_k}{W_n+w_n}$", "$\\frac{W_{k-1}}{W_{n-1}}$", "$\\frac{W_k-\\frac{1}{3}w_k}{W_n+\frac{w_n}{3}}$", "$\\frac{W_k-\\frac{3}{8}w_k}{W_n+\frac{1}{4}w_n}$"), mk = c("0", "$\\frac{w_k}{2}$", "$w_np$", "$w_k - w_np$", "$\\frac{w_k}{3} + \\frac{w_n}{3}p$", "$\\frac{3}{8}w_k + \\frac{w_n}{4}p$"), k = c("$W_{k-1} \\le W_n p \\lt W_k$", "$W_{k-1} - \\frac{w_{k-1}}{2} \\le W_n p \\lt W_{k} - \\frac{w_{k}}{2}$", "$W_{k-1} \\le (W_n + w_n)p \\lt W_{k}$", "$W_{k-2} \\le W_{n-1}p \\lt W_{k-1}$", "$W_{k-1} - \\frac{w_{k-1}}{3} \\le (W_{n} - \\frac{w_n}{3})p \\lt W_{k} - \\frac{w_k}{3}$", "$W_{k-1} - \\frac{3w_{k-1}}{8} \\le (W_{n} + \\frac{w_{n}}{4})p \\lt W_{k} - \\frac{3w_{k}}{8}$") ) # Generiamo la tabella per HTML kbl(df, caption = "Table 1: Quantile estimators incorporating sampling weights.", escape = FALSE, # Fondamentale per far leggere il LaTeX a MathJax col.names = c("Estimator", "$\\widehat{r}_k$", "$\\widehat{m}_k$", "$k$"), align = "clll") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F, position = "center") %>% column_spec(1, bold = T) %>% row_spec(0, bold = T) # Intestazione in grassetto ## ----------------------------------------------------------------------------- # Compute weighted quartiles csquantile(y = synthouse$eq_income, weights = synthouse$weight, probs = c(0.25, 0.5, 0.75), type = 4) csquantile(y = synthouse$eq_income, weights = synthouse$weight, probs = c(0.25, 0.5, 0.75), type = 7) # Compare without considering the sampling weights csquantile(synthouse$eq_income, probs = c(0.25, 0.5, 0.75), type = 4) ## ----------------------------------------------------------------------------- # Harrell-Davis weighted median csquantile(y = synthouse$eq_income, weights = synthouse$weight, probs = 0.5, type = "HD") ## ----------------------------------------------------------------------------- # Compare different quantile types by NUTS3 types <- c(4, 5, 6, 7, 8, 9, "HD") areas <- unique(synthouse$NUTS3) # Function to compute QRI for all types in one area compare_quantiles <- function(region_code, data = synthouse) { idx <- which(data$NUTS3 == region_code) results <- sapply(types, function(t) { csquantile(y = data$eq_income[idx], weights = data$weight[idx], type = t, probs = 0.95) }) return(results) } # Compute for all areas results_quantiles <- sapply(areas, compare_quantiles) rownames(results_quantiles) <- types colnames(results_quantiles) <- areas ## === Quantile estimators for Each NUTS3 Region ===" print(head(t(results_quantiles), n = 10)) ## ----------------------------------------------------------------------------- # Compute weighted QRI qri(y = synthouse$eq_income, weights = synthouse$weight, type = 6) # Type 6 quantile estimator (default) ## ----eval = FALSE------------------------------------------------------------- # # Pseudo-code for rescaled bootstrap for the estimation of the sampling variance of the QRI estimator # var_qri <- rescaled_bootstrap( # data = synthouse, # y = "eq_income", # strata = "NUTS2", # psu = "municipality", # weights = "weight", # estimator = qri, # by_strata = TRUE, # B = 100, # seed = 456) ## ----------------------------------------------------------------------------- # Compute QSR share_ratio(y = synthouse$eq_income, weights = synthouse$weight, type = 4) ## ----------------------------------------------------------------------------- # Compute Palma ratio share_ratio(y = synthouse$eq_income, weights = synthouse$weight, type = 7, prob_numerator = 0.90, prob_denominator = 0.40) ## ----------------------------------------------------------------------------- # P90/P10 ratio ratio_quantiles(y = synthouse$eq_income, weights = synthouse$weight, type = 7) # P75/P25 ratio ratio_quantiles(y = synthouse$eq_income, weights = synthouse$weight, prob_numerator = 0.75, prob_denominator = 0.25, type = 6) ## ----------------------------------------------------------------------------- # Compare all indicators inequantiles(y = synthouse$eq_income, weights = synthouse$weight, indicators = "all") ## ----se-example, eval=FALSE--------------------------------------------------- # # QRI and QSR with their standard errors for one region via rescaled bootstrap # nord <- synthouse[synthouse$NUTS1 == "N", ] # # qri_qsr <- inequantiles( # y = nord$eq_income, # weights = nord$weight, # indicators = c("qri", "qsr"), # se = TRUE, # data = nord, # strata = "NUTS2", # psu = "municipality", # B = 200, # seed = 42 # ) ## ----------------------------------------------------------------------------- # Select a subset for clearer visualization n_obs <- 200 set.seed(123) idx <- sample(nrow(synthouse), n_obs) # Extract data y_subset <- synthouse$eq_income[idx] w_subset <- synthouse$weight[idx] # Compute the IF for some indicators if_gini_vals <- if_gini(y_subset, w_subset) if_qri_vals <- if_qri(y_subset, w_subset, type = 6) if_qsr_vals <- if_share_ratio(y_subset, w_subset, type = 4) if_palma_vals <- if_share_ratio(y_subset, w_subset, type = 4, prob_numerator = 0.90, prob_denominator = 0.40) if_q50_vals <- if_quantile(y_subset, w_subset, probs = 0.5, type = 6) if_ratioquantiles_vals <- if_ratio_quantiles(y_subset, w_subset, prob_numerator = 0.90, prob_denominator = 0.10, type = 6) ## ----------------------------------------------------------------------------- # Create the plot library(ggplot2) library(scales) # plot_df <- data.frame( Income = rep(y_subset, 6), IF_Value = c( if_qri_vals, if_qsr_vals, if_gini_vals, if_palma_vals, if_q50_vals, if_ratioquantiles_vals ), Indicator = rep(c("QRI", "QSR", "Gini", "Palma", "Median", "P90/P10"), each = n_obs) ) ggplot(plot_df, aes(x = Income, y = IF_Value, color = Indicator)) + geom_line(linewidth = 0.8, alpha = 0.7) + geom_hline(yintercept = 0, linetype = "dashed", color = "gray40") + facet_wrap(~ Indicator, scales = "free_y", ncol = 2) + scale_x_continuous(labels = scales::comma) + labs( title = "Influence Functions: How Each Observation Affects the Estimate", subtitle = paste("Based on", n_obs, "randomly sampled households"), x = "Equivalized Income", y = "Influence Function Value", ) + theme_minimal(base_size = 11) + theme( legend.position = "none", strip.text = element_text(face = "bold", size = 11), plot.title = element_text(face = "bold", size = 13), plot.subtitle = element_text(color = "gray40"), panel.grid.minor = element_blank() ) ## ----------------------------------------------------------------------------- # Example: Income distribution in frequency table format income_freq <- c(120, 180, 150, 80, 40, 20, 10) income_tot <- c(18800, 16300, 44700, 33900, 21500, 22100, 98300) income_lower <- c(0, 15000, 30000, 45000, 60000, 80000, 100000) income_upper <- c(15000, 30000, 45000, 60000, 80000, 100000, 150000) ## ----------------------------------------------------------------------------- # Estimate quantiles from grouped data quantile_grouped(freq = income_freq, lower_bounds = income_lower, upper_bounds = income_upper, probs = c(0.25, 0.5, 0.75)) ## ----------------------------------------------------------------------------- # Compute QRI from grouped data qri_grouped(freq = income_freq, lower_bounds = income_lower, upper_bounds = income_upper, M = 100) ## ----------------------------------------------------------------------------- # Estimate quantiles from grouped data gini_grouped(Y = income_tot, freq = income_freq) ## ----------------------------------------------------------------------------- sessionInfo()