## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = TRUE ) library(spboost) ## ----------------------------------------------------------------------------- args(spbgam) ## ----------------------------------------------------------------------------- formula_bspa <- Y ~ bbs(X1) + bbs(X2) + bbs(X3) + bols(X4) ## ----------------------------------------------------------------------------- formula_gam <- Y ~ s(X1) + s(X2) + s(X3) ## ----------------------------------------------------------------------------- formula_plain <- Y ~ X1 + X2 + X3 + X4 ## ----------------------------------------------------------------------------- library(spboost) library(mboost) set.seed(1) sim <- dgp( n = 1000, rho = 0.6, betas = c(0, 0.5, 1, -1), sigma2 = 1, model = "SAR", nonlin = TRUE, myseed = 1 ) fit_sar_cfe <- spbgam( formula = Y ~ bbs(X1) + bbs(X2) + bbs(X3), data = sim$data, W = sim$W, DGP = "SAR", method = "BSPA_SAR_CFE", control = list( control_gamboost = boost_control(mstop = 150, nu = 0.1) ) ) fit_sar_cfe$rho fit_sar_cfe$rmse fit_sar_cfe$rho_method summary(fit_sar_cfe) ## ----------------------------------------------------------------------------- fit_sar_ml <- spbgam( formula = Y ~ bbs(X1) + bbs(X2) + bbs(X3), data = sim$data, W = sim$W, DGP = "SAR", method = "BSPA_SAR_ML", control = list( control_gamboost = boost_control(mstop = 150, nu = 0.1) ) ) c(CFE = fit_sar_cfe$rho, ML = fit_sar_ml$rho) summary(fit_sar_ml) ## ----fig.width=8, fig.height=12----------------------------------------------- decomp_cfe_ex1 <- fitted_decomp_spboost(fit_sar_cfe) decomp_ml_ex1 <- fitted_decomp_spboost(fit_sar_ml) center_curve <- function(x) as.numeric(x - mean(x, na.rm = TRUE)) plot_component_ex1 <- function(true_col, fit_col, title) { ord <- order(sim$data[[fit_col]]) x_component <- sim$data[[fit_col]][ord] true_component <- center_curve(sim$data[[true_col]])[ord] cfe_component <- center_curve(decomp_cfe_ex1[[fit_col]])[ord] ml_component <- center_curve(decomp_ml_ex1[[fit_col]])[ord] plot( x_component, ml_component, type = "l", lwd = 1, col = "#D95F02", xlab = fit_col, ylab = "Centered component", main = title, ylim = range(c(true_component, cfe_component, ml_component), na.rm = TRUE) ) lines(x_component, cfe_component, col = "#1B9E77", lwd = 1) lines(x_component, true_component, col = "black", lwd = 3) } old_par_ex1 <- par(mfrow = c(3, 1), mar = c(4, 4, 2, 1)) plot_component_ex1("f1", "X1", "Component f1(X1)") legend( "topleft", legend = c("True component", "BSPA_SAR_CFE", "BSPA_SAR_ML"), col = c("black", "#1B9E77", "#D95F02"), lwd = c(3, 1, 1), bty = "n" ) plot_component_ex1("f2", "X2", "Component f2(X2)") plot_component_ex1("f3", "X3", "Component f3(X3)") par(old_par_ex1) ## ----fig.show='hide'---------------------------------------------------------- fit_sar_cv_random <- spbgam( formula = Y ~ bbs(X1) + bbs(X2) + bbs(X3), data = sim$data, W = sim$W, DGP = "SAR", method = "BSPA_SAR_CFE", control = list( # Maximum mstop considered by the internal CV search. control_gamboost = boost_control(mstop = 500, nu = 0.1), mstop_init = 100, mstop_criterion = "CV", nfold = 5, ncore = 1, cv_mode = "random", cv_seed_spatial = 1001 ) ) fit_sar_cv_spatial <- spbgam( formula = Y ~ bbs(X1) + bbs(X2) + bbs(X3), data = sim$data, W = sim$W, DGP = "SAR", method = "BSPA_SAR_CFE", control = list( # Maximum mstop considered by the internal CV search. control_gamboost = boost_control(mstop = 500, nu = 0.1), mstop_init = 100, mstop_criterion = "CV", nfold = 5, ncore = 1, cv_mode = "spatial_block", cv_coord_vars_spatial = c("x", "y"), cv_hex_size_spatial = 0.15, cv_seed_spatial = 1001, cv_plot = TRUE ) ) fit_sar_cv_random$spbgam_meta$mstop_final fit_sar_cv_spatial$spbgam_meta$mstop_final summary(fit_sar_cv_random) summary(fit_sar_cv_spatial) ## ----fig.width=8, fig.height=6, echo=FALSE, eval=TRUE------------------------- cv_plot_data <- fit_sar_cv_spatial$mstop_selection$cv_plot_data if (is.null(cv_plot_data) && requireNamespace("blockCV", quietly = TRUE) && requireNamespace("sf", quietly = TRUE)) { set.seed(1001) pts_cv <- sf::st_as_sf( sim$data, coords = c("x", "y"), crs = 3857, remove = FALSE ) cv_blocks <- blockCV::cv_cluster(x = pts_cv, k = 5, report = FALSE) cv_plot_data <- data.frame( x = sim$data$x, y = sim$data$y, fold = as.integer(cv_blocks$folds_ids) ) } stopifnot(!is.null(cv_plot_data)) fold_levels <- sort(unique(cv_plot_data$fold)) fold_cols <- grDevices::hcl.colors(length(fold_levels), palette = "Dark 3") fold_index <- match(cv_plot_data$fold, fold_levels) plot( cv_plot_data$x, cv_plot_data$y, col = fold_cols[fold_index], pch = 19, xlab = "x", ylab = "y", main = "Spatial block CV folds" ) legend( "topright", legend = paste("fold", fold_levels), col = fold_cols, pch = 19, bty = "n", cex = 0.8 ) ## ----------------------------------------------------------------------------- library(spatialreg) library(spdep) listw <- spdep::mat2listw(sim$W, style = "W") fit_lagsarlm <- spatialreg::lagsarlm( Y ~ X1 + X2 + X3, data = sim$data, listw = listw, method = "LU", zero.policy = TRUE ) fit_gamboost <- mboost::gamboost( Y ~ bbs(X1) + bbs(X2) + bbs(X3), data = sim$data, control = boost_control(mstop = 150, nu = 0.1) ) fit_gam_sar_cfe <- spbgam( formula = Y ~ s(X1, k = 5) + s(X2, k = 5) + s(X3, k = 5), data = sim$data, W = sim$W, DGP = "SAR", method = "GAM_SAR_CFE", control = list(ncore = 1) ) fit_gam_sar_ml <- spbgam( formula = Y ~ s(X1, k = 5) + s(X2, k = 5) + s(X3, k = 5), data = sim$data, W = sim$W, DGP = "SAR", method = "GAM_SAR_ML", control = list(ncore = 1) ) true_f <- rowSums(sim$data[, c("f0", "f1", "f2", "f3")]) wy <- as.numeric(sim$W %*% sim$data$Y) f_lagsarlm <- as.numeric( { x_lag <- model.matrix(Y ~ X1 + X2 + X3, sim$data) b_lag <- coef(fit_lagsarlm)[colnames(x_lag)] x_lag %*% b_lag } ) f_gamboost <- as.numeric(fitted(fit_gamboost)) f_cfe <- as.numeric(fit_sar_cfe$fittedYS) f_ml <- as.numeric(fit_sar_ml$fittedYS) f_gam_cfe <- as.numeric(fit_gam_sar_cfe$fittedYS) f_gam_ml <- as.numeric(fit_gam_sar_ml$fittedYS) comparison_sar <- data.frame( estimator = c( "SAR(lagsarlm)", "gamboost", "BSPA_SAR_CFE", "BSPA_SAR_ML", "GAM_SAR_CFE", "GAM_SAR_ML" ), rmse_y = c( sqrt(mean((sim$data$Y - suppressMessages(fitted(fit_lagsarlm)))^2)), sqrt(mean((sim$data$Y - fitted(fit_gamboost))^2)), fit_sar_cfe$rmse, fit_sar_ml$rmse, fit_gam_sar_cfe$rmse, fit_gam_sar_ml$rmse ), rmse_rho = c( abs(fit_lagsarlm$rho - 0.6), NA_real_, abs(fit_sar_cfe$rho - 0.6), abs(fit_sar_ml$rho - 0.6), abs(fit_gam_sar_cfe$rho - 0.6), abs(fit_gam_sar_ml$rho - 0.6) ), rmse_f = c( sqrt(mean((true_f - f_lagsarlm)^2)), sqrt(mean((true_f - f_gamboost)^2)), sqrt(mean((true_f - f_cfe)^2)), sqrt(mean((true_f - f_ml)^2)), sqrt(mean((true_f - f_gam_cfe)^2)), sqrt(mean((true_f - f_gam_ml)^2)) ) ) comparison_sar ## ----------------------------------------------------------------------------- sim_sem <- dgp( n = 1000, rho = 0.5, betas = c(0, 0.5, 1, -1), sigma2 = 1, model = "SEM", nonlin = TRUE, myseed = 2 ) fit_sem_cfe <- spbgam( formula = Y ~ bbs(X1) + bbs(X2) + bbs(X3), data = sim_sem$data, W = sim_sem$W, DGP = "SEM", method = "BSPA_SEM_CFE", control = list( control_gamboost = boost_control(mstop = 250, nu = 0.1), mstop_criterion = "CV", cv_mode = "random", nfold = 5, ncore = 1 ) ) fit_sem_cfe$rho fit_sem_cfe$rmse fit_sem_cfe$rho_method summary(fit_sem_cfe) ## ----------------------------------------------------------------------------- fit_sem_ml <- spbgam( formula = Y ~ bbs(X1) + bbs(X2) + bbs(X3), data = sim_sem$data, W = sim_sem$W, DGP = "SEM", method = "BSPA_SEM_ML", control = list( control_gamboost = boost_control(mstop = 250, nu = 0.1) ) ) c(CFE = fit_sem_cfe$rho, ML = fit_sem_ml$rho) summary(fit_sem_ml) ## ----------------------------------------------------------------------------- fit_errorsarlm <- spatialreg::errorsarlm( Y ~ X1 + X2 + X3, data = sim_sem$data, listw = spdep::mat2listw(sim_sem$W, style = "W"), method = "LU", zero.policy = TRUE ) fit_gamboost_sem <- mboost::gamboost( Y ~ bbs(X1) + bbs(X2) + bbs(X3), data = sim_sem$data, control = boost_control(mstop = 250, nu = 0.1) ) true_f_sem <- rowSums(sim_sem$data[, c("f0", "f1", "f2", "f3")]) f_errorsarlm <- as.numeric( { x_err <- model.matrix(Y ~ X1 + X2 + X3, sim_sem$data) b_err <- coef(fit_errorsarlm)[colnames(x_err)] x_err %*% b_err } ) f_gamboost_sem <- as.numeric(fitted(fit_gamboost_sem)) f_sem_cfe <- as.numeric(fit_sem_cfe$fittedYS) f_sem_ml <- as.numeric(fit_sem_ml$fittedYS) lambda_errorsarlm <- if (!is.null(fit_errorsarlm$lambda)) { fit_errorsarlm$lambda } else { fit_errorsarlm$lambda.se } if (length(lambda_errorsarlm) == 0L) lambda_errorsarlm <- NA_real_ scalar_or_na <- function(x) { if (is.null(x) || length(x) == 0L) return(NA_real_) as.numeric(x[1]) } comparison_sem <- data.frame( estimator = c("SEM(errorsarlm)", "gamboost", "BSPA_SEM_CFE", "BSPA_SEM_ML"), rmse_y = c( sqrt(mean((sim_sem$data$Y - suppressMessages(fitted(fit_errorsarlm)))^2)), sqrt(mean((sim_sem$data$Y - fitted(fit_gamboost_sem))^2)), scalar_or_na(fit_sem_cfe$rmse), scalar_or_na(fit_sem_ml$rmse) ), rmse_lambda = c( abs(as.numeric(lambda_errorsarlm[1]) - 0.5), NA_real_, abs(scalar_or_na(fit_sem_cfe$rho) - 0.5), abs(scalar_or_na(fit_sem_ml$rho) - 0.5) ), rmse_f = c( sqrt(mean((true_f_sem - f_errorsarlm)^2)), sqrt(mean((true_f_sem - f_gamboost_sem)^2)), sqrt(mean((true_f_sem - f_sem_cfe)^2)), sqrt(mean((true_f_sem - f_sem_ml)^2)) ) ) comparison_sem ## ----------------------------------------------------------------------------- fit_mars <- spbgam( formula = Y ~ X1 + X2 + X3, data = sim$data, W = sim$W, DGP = "SAR", method = "MARS_SAR_CFE", control = list( control_earth = list( degree = 1, nprune = 12, trace = 0 ) ) ) fit_mars$rho fit_mars$rmse fit_mars$rho_method ## ----------------------------------------------------------------------------- fit_mars_cv <- spbgam( formula = Y ~ X1 + X2 + X3, data = sim$data, W = sim$W, DGP = "SAR", method = "MARS_SAR_CFE", control = list( control_earth = list( degree = 1, use_cv_nprune = TRUE, cv_nfold = 5, cv_ncore = 1, trace = 0 ) ) ) ## ----------------------------------------------------------------------------- fit_xgb <- spbgam( formula = Y ~ X1 + X2 + X3, data = sim$data, W = sim$W, DGP = "SAR", method = "XGBOOST_SAR_CFE", control = list( mstop_init = 100, myparams_xgboost = list( booster = "gbtree", eta = 0.05, max_depth = 2, subsample = 0.8, colsample_bytree = 0.8, nfold = 5, early_stopping_rounds = 5, nthread = 1, verbose = 0 ) ) ) fit_xgb$rho fit_xgb$rmse ## ----------------------------------------------------------------------------- pred_in <- predict(fit_sar_cfe) head(pred_in) decomp <- fitted_decomp_spboost(fit_sar_cfe) head(decomp) ## ----------------------------------------------------------------------------- train_id <- 1:800 test_id <- 801:1000 W_train <- sim$W[train_id, train_id, drop = FALSE] row_sum_train <- Matrix::rowSums(W_train) W_train <- Matrix::Diagonal( x = ifelse(row_sum_train > 0, 1 / row_sum_train, 0) ) %*% W_train fit_sar_train <- spbgam( formula = Y ~ bbs(X1) + bbs(X2) + bbs(X3), data = sim$data[train_id, ], W = W_train, DGP = "SAR", method = "BSPA_SAR_CFE", control = list( control_gamboost = boost_control(mstop = 100, nu = 0.1) ) ) pred_new <- predict( fit_sar_train, newdata = sim$data[test_id, ], data = sim$data[train_id, ], W = sim$W, type = "BPN" ) head(pred_new) # diff RMSE train - test fit_sar_train$rmse rmse_test<-sqrt(mean((pred_new-sim$data[test_id, 'Y'])^2)) rmse_test ## ----------------------------------------------------------------------------- sim_sarar <- dgp( n = 1000, rho = 0.5, lambda = -0.3, betas = c(0, 0.5, 1, -1), sigma2 = 1, model = "SARAR", nonlin = TRUE, myseed = 3 ) fit_sarar <- spbgam( formula = Y ~ bbs(X1) + bbs(X2) + bbs(X3), data = sim_sarar$data, W = sim_sarar$W, W2 = sim_sarar$W2, DGP = "SARAR", method = "BSPA_SARAR_CFE", control = list( control_gamboost = boost_control(mstop = 200, nu = 0.1), iter_max = 3, tol_lambda = 1e-4 ) ) fit_sarar$rho fit_sarar$lambda fit_sarar$rmse ## ----------------------------------------------------------------------------- fit <- fit_sar_cfe summary(fit) fit$rho fit$lambda fit$rho_method fit$rmse fit$spbgam_meta