## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, fig.align = "center", out.width = "80%", warning = FALSE, message = FALSE ) library(rbbnp) library(ggplot2) library(gridExtra) ## ----eval=FALSE--------------------------------------------------------------- # biasBound_condExpectation( # Y, # Response variable # X, # Predictor variable # x = NULL, # Evaluation points # h = NULL, # Bandwidth # h_method = "cv", # "cv" or "silverman" # alpha = 0.05, # Confidence level # kernel.fun = "Schennach2004" # ) ## ----basic-regression--------------------------------------------------------- # Generate data: Y = f(X) + noise set.seed(42) X <- gen_sample_data(size = 800, dgp = "2_fold_uniform") Y <- 2 * X - X^2 + rnorm(800, sd = 0.3) # Estimate conditional expectation E[Y|X] fit <- biasBound_condExpectation(Y, X, h = 0.1, kernel.fun = "Schennach2004") # View results fit ## ----output-details----------------------------------------------------------- # Key parameters coef(fit) # Detailed summary summary(fit) ## ----regression-plot---------------------------------------------------------- plot(fit) ## ----with-true-function------------------------------------------------------- # True function for comparison true_fn <- function(x) 2 * x - x^2 plot(fit) + stat_function(fun = true_fn, aes(color = "True E[Y|X]"), linetype = "dashed", linewidth = 1) + scale_color_manual(values = c("Estimate" = "#08306B", "True E[Y|X]" = "red")) + labs(color = NULL) + theme(legend.position = "top") ## ----fitted-values------------------------------------------------------------ # Get fitted values at evaluation points predictions <- fitted(fit) head(predictions, 10) # Evaluation points x_points <- fit$x head(x_points) ## ----conf-intervals----------------------------------------------------------- # Extract confidence intervals ci <- confint(fit) # Show the interval at interior points, where the marginal density is well away # from zero so the ratio-based interval is finite ok <- which(is.finite(ci[, "lower"]) & is.finite(ci[, "upper"])) data.frame( x = fit$x[ok], estimate = fitted(fit)[ok], lower = ci[ok, "lower"], upper = ci[ok, "upper"] )[1:6, ] ## ----bw-cv-------------------------------------------------------------------- fit_cv <- biasBound_condExpectation(Y, X, h = NULL, h_method = "cv") cat("CV bandwidth:", coef(fit_cv)["h"]) ## ----bw-silv------------------------------------------------------------------ fit_silv <- biasBound_condExpectation(Y, X, h = NULL, h_method = "silverman") cat("Silverman bandwidth:", coef(fit_silv)["h"]) ## ----real-data---------------------------------------------------------------- # Load sample data data(sample_data) head(sample_data) # Estimate regression fit_real <- biasBound_condExpectation( Y = sample_data$Y, X = sample_data$X, h = 0.1, kernel.fun = "Schennach2004" ) # Visualize plot(fit_real) + ggtitle("Regression on Sample Data") ## ----kernel-comparison, fig.width=6, fig.height=10.5, out.width="100%"-------- fit_sch <- biasBound_condExpectation(Y, X, h = 0.1, kernel.fun = "Schennach2004") fit_norm <- biasBound_condExpectation(Y, X, h = 0.1, kernel.fun = "normal") grid.arrange( plot(fit_sch) + ggtitle("Schennach2004 Kernel"), plot(fit_norm) + ggtitle("Normal Kernel"), ncol = 1 ) ## ----heteroscedastic---------------------------------------------------------- # Generate heteroscedastic data set.seed(123) X_het <- gen_sample_data(size = 600, dgp = "2_fold_uniform") Y_het <- X_het^2 + rnorm(600) * X_het # Variance increases with X fit_het <- biasBound_condExpectation(Y_het, X_het, h = 0.1) plot(fit_het) + ggtitle("Heteroscedastic Data") ## ----polynomial--------------------------------------------------------------- # Quadratic relationship set.seed(456) X_poly <- gen_sample_data(size = 500, dgp = "2_fold_uniform") Y_poly <- -X_poly^2 + 3*X_poly + rnorm(500, sd = 0.5) fit_poly <- biasBound_condExpectation(Y_poly, X_poly, h = 0.1) # Compare with true function true_poly <- function(x) -x^2 + 3*x plot(fit_poly) + stat_function(fun = true_poly, aes(color = "True: -x^2 + 3x"), linetype = "dashed", linewidth = 1) + scale_color_manual(values = c("Estimate" = "#08306B", "True: -x^2 + 3x" = "red")) + labs(color = NULL) + theme(legend.position = "top") ## ----methods-summary, eval=FALSE---------------------------------------------- # # All available methods for bbnp_regression objects # print(fit) # Concise summary # summary(fit) # Detailed statistics # plot(fit) # Visualization # coef(fit) # Parameters (A, r, B, h) # confint(fit) # Confidence intervals # fitted(fit) # Fitted values