## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = FALSE, comment = "##", fig.width = 5, fig.height = 3, fig.align = "center" ) # Optional truncation of long printed outputs, used selectively below. hook_output <- knitr::knit_hooks$get("output") knitr::knit_hooks$set(output = function(x, options) { if (!is.null(n <- options$out.lines)) { x <- xfun::split_lines(x) if (length(x) > n) { x <- c(head(x, n), "...") } x <- paste(x, collapse = "\n") } hook_output(x, options) }) set.seed(42) ## ----eval = FALSE------------------------------------------------------------- # install.packages("picreg") ## ----eval = FALSE------------------------------------------------------------- # # install.packages("remotes") # remotes::install_github("VcMaxouuu/picreg") ## ----------------------------------------------------------------------------- library(picreg) ## ----gen-data----------------------------------------------------------------- data(QuickStartExample) X <- QuickStartExample$X y <- QuickStartExample$y ## ----fit-basic---------------------------------------------------------------- fit <- pic(X, y) ## ----print-fit---------------------------------------------------------------- summary(fit) ## ----fit-fields--------------------------------------------------------------- # Number of selected features and their identifiers fit$df fit$selected # Selected regularization parameter fit$lambda # Family and penalty descriptors fit$family fit$penalty ## ----coef, out.lines = 12----------------------------------------------------- coef(fit) # original scale # coef(fit, standardized = TRUE) # standardized scale; same as fit$beta ## ----predict, out.lines = 6--------------------------------------------------- predict(fit, newx = X[1:5,]) # response predict(fit, newx = X[1:5,], type = "link") # linear predictor ## ----plot-fit, fig.height = 3------------------------------------------------- plot(fit) ## ----plot-pdb, fig.height = 3.2----------------------------------------------- plot(fit$lambda_pdb) ## ----pdb-summary-------------------------------------------------------------- pdb_summary(fit) ## ----manual-lambda------------------------------------------------------------ fit_lasso <- pic(X, y, penalty = "lasso", lambda = fit$lambda) fit_scad <- pic(X, y, penalty = "scad", lambda = fit$lambda) fit_mcp <- pic(X, y, penalty = "mcp", lambda = fit$lambda) data.frame( penalty = c("lasso", "scad", "mcp"), df = c(fit_lasso$df, fit_scad$df, fit_mcp$df), lambda = c(fit_lasso$lambda, fit_scad$lambda, fit_mcp$lambda) ) ## ----------------------------------------------------------------------------- data(BinomialExample) X <- BinomialExample$X y <- BinomialExample$y fit_binom <- pic(X, y, family = 'binomial') print(fit_binom) ## ----binomial-ex-------------------------------------------------------------- predict(fit_binom, newx = X[1:5, ], type = 'response') predict(fit_binom, newx = X[1:5, ], type = 'class') ## ----cox-example-------------------------------------------------------------- data(CoxExample) X_cox <- CoxExample$X y_cox <- CoxExample$y print(y_cox[1:7, ]) fit_cox <- pic(X_cox, y_cox, family = "cox") print(fit_cox) ## ----cox-baseline, fig.height = 4.5------------------------------------------- plot_baseline(fit_cox) ## ----cox-survival, fig.height = 3.2------------------------------------------- sf <- predict_survival_function(fit_cox, newx = X_cox[1:3, ]) plot_survival_curves(sf) ## ----cox-feature-effect, fig.height = 3.5------------------------------------- fx <- feature_effects_on_survival(fit_cox, idx = "gene_1") plot_survival_curves(fx) ## ----cox-feature-effect-custom, fig.height = 3.5------------------------------ fx <- feature_effects_on_survival(fit_cox, idx = "gene_3", values = c(-2, -1, 0, 1, 2)) plot_survival_curves(fx) ## ----assess-gaussian---------------------------------------------------------- data(QuickStartExample) X <- QuickStartExample$X y <- QuickStartExample$y fit <- pic(X, y) assess(fit, newx = X, newy = y) ## ----assess-cox--------------------------------------------------------------- data(CoxExample) fit_cox <- pic(CoxExample$X, CoxExample$y, family = "cox") assess(fit_cox, newx = CoxExample$X, newy = CoxExample$y) ## ----assess-relax------------------------------------------------------------- fit_relax <- pic(X, y, relax = TRUE) assess(fit_relax, newx = X, newy = y) ## ----assess-with-truth-------------------------------------------------------- true_active <- paste0("gene_", 1:5) assess(fit, newx = X, newy = y, true_features = true_active) ## ----phase-transition, eval=FALSE--------------------------------------------- # pt <- phase_transition( # n = c(50, 100), # p = c(100, 100), # type = "gaussian", # s_max = 20, # m = 100, # penalty = c("lasso", "scad"), # parallel = TRUE # ) # plot(pt) ## ----phase-transition-img, echo = FALSE, out.width = "85%", fig.align = "center"---- knitr::include_graphics("phase_transition_pic.png") ## ----eval=FALSE--------------------------------------------------------------- # plot(pt, metric = "fdr") ## ----phase-transition-fdr-img, echo = FALSE, out.width = "85%", fig.align = "center"---- knitr::include_graphics("phase_transition_pic_fdr.png") ## ----pdb-asymptotic, fig.height = 3.5----------------------------------------- as_ <- pdb_asymptotic( n_grid = c(20, 50, 100, 200), p = 200, type = "binomial", n_simu = 2000L ) plot(as_) ## ----compare-glmnet-data, eval = requireNamespace("glmnet", quietly = TRUE), include = FALSE---- library(glmnet) ## ----compare-glmnet-sim------------------------------------------------------- set.seed(1) n <- 100; p <- 100; s <- 5 X <- matrix(rnorm(n * p), n, p) beta <- numeric(p) beta[1:s] <- 3 y <- as.numeric(X %*% beta + rnorm(n)) true_support <- seq_len(s) ## ----compare-glmnet-fit, eval = requireNamespace("glmnet", quietly = TRUE), echo = FALSE---- # Hidden: the fits themselves and the small helpers that pull the # active set out of each fitted object. fit_pic <- pic(X, y, family = "gaussian", penalty = "lasso") sel_pic <- fit_pic$selected fit_cv <- cv.glmnet(X, y, family = "gaussian", alpha = 1) get_support <- function(fit, lambda) { sel <- which(as.numeric(stats::coef(fit, s = lambda)) != 0) sel <- sel[sel > 1L] - 1L sel } sel_min <- get_support(fit_cv, "lambda.min") sel_1se <- get_support(fit_cv, "lambda.1se") metrics <- function(sel, true_support) { data.frame( selected = length(sel), exact_recovery = setequal(sel, true_support) ) } ## ----compare-glmnet-table, eval = requireNamespace("glmnet", quietly = TRUE)---- rbind( cbind(method = "picreg (lasso)", metrics(sel_pic, true_support)), cbind(method = "cv.glmnet (lambda.min)", metrics(sel_min, true_support)), cbind(method = "cv.glmnet (lambda.1se)", metrics(sel_1se, true_support)) ) ## ----compare-phase-transition, eval = FALSE, echo = FALSE--------------------- # set.seed(2) # m <- 100L # Monte Carlo replications per s # s_max <- 12L # n <- 100L; p <- 100L # # recovery <- function(method, s) { # hits <- replicate(m, { # X <- matrix(rnorm(n * p), n, p) # b <- numeric(p) # true_idx <- if (s > 0L) sample.int(p, s) else integer(0) # if (s > 0L) b[true_idx] <- 3 # # y <- as.numeric(X %*% b + rnorm(n)) # # sel <- switch( # method, # # picreg = { # pic(X, y, family = "gaussian", penalty = "lasso")$selected # }, # # glmnet_min = { # f <- cv.glmnet( # X, y, # family = "gaussian", # alpha = 1 # ) # idx <- which(as.numeric(stats::coef(f, s = "lambda.min")) != 0) # idx[idx > 1L] - 1L # }, # # glmnet_1se = { # f <- cv.glmnet( # X, y, # family = "gaussian", # alpha = 1 # ) # idx <- which(as.numeric(stats::coef(f, s = "lambda.1se")) != 0) # idx[idx > 1L] - 1L # } # ) # # setequal(sort(sel), sort(true_idx)) # }) # # mean(hits) # } # # s_grid <- 0:s_max # # r_pic <- vapply(s_grid, recovery, numeric(1L), method = "picreg") # r_glm <- vapply(s_grid, recovery, numeric(1L), method = "glmnet_min") # r_1se <- vapply(s_grid, recovery, numeric(1L), method = "glmnet_1se") # # plot( # s_grid, r_pic, # type = "l", # lwd = 1.6, # lty = 1, # ylim = c(0, 1), # xlab = "s", # ylab = "PESR", # main = "picreg vs cv.glmnet - gaussian, lasso", # font.main = 1, # bty = "l" # ) # # lines( # s_grid, r_glm, # type = "l", # lty = 2, # lwd = 1.6 # ) # # lines( # s_grid, r_1se, # type = "l", # lty = 3, # lwd = 1.6 # ) # # legend( # "topright", # legend = c("picreg", "cv.glmnet (lambda.min)", "cv.glmnet (lambda.1se)"), # lty = c(1, 2, 3), # lwd = 1.6, # bty = "n" # ) ## ----phase-transition-comp-img, echo = FALSE, out.width = "85%", fig.align = "center"---- knitr::include_graphics("phase_transition_comparaison.png") ## ----eval = FALSE------------------------------------------------------------- # X <- matrix(rnorm(1e4 * 200), 1e4, 200) # Y <- rnorm(1e4) ## ----eval = FALSE------------------------------------------------------------- # system.time(pic(X, Y)) ## ----echo = FALSE------------------------------------------------------------- structure(c(1.238, 0.104, 1.404, 0, 0), class = "proc_time", .Names = c("user.self", "sys.self", "elapsed", "user.child", "sys.child")) ## ----eval = FALSE------------------------------------------------------------- # system.time(pic(X, Y, lambda_method = "analytical")) ## ----echo = FALSE------------------------------------------------------------- structure(c(0.188, 0.023, 0.212, 0, 0), class = "proc_time", .Names = c("user.self", "sys.self", "elapsed", "user.child", "sys.child")) ## ----eval = FALSE------------------------------------------------------------- # system.time(cv.glmnet(X, Y)) ## ----echo = FALSE------------------------------------------------------------- structure(c(0.998, 0.052, 1.054, 0, 0), class = "proc_time", .Names = c("user.self", "sys.self", "elapsed", "user.child", "sys.child")) ## ----eval = FALSE------------------------------------------------------------- # X <- matrix(rnorm(1e4 * 200), 1e4, 200) # Y <- rbinom(1e4, size = 1, prob = 0.5) ## ----eval = FALSE------------------------------------------------------------- # system.time(pic(X, Y, family = "binomial")) ## ----echo = FALSE------------------------------------------------------------- structure(c(1.805, 0.115, 1.941, 0, 0), class = "proc_time", .Names = c("user.self", "sys.self", "elapsed", "user.child", "sys.child")) ## ----eval = FALSE------------------------------------------------------------- # system.time(pic(X, Y, family = "binomial", lambda_method = "analytical")) ## ----echo = FALSE------------------------------------------------------------- structure(c(0.189, 0.023, 0.213, 0, 0), class = "proc_time", .Names = c("user.self", "sys.self", "elapsed", "user.child", "sys.child")) ## ----eval = FALSE------------------------------------------------------------- # system.time(cv.glmnet(X, Y, family = "binomial")) ## ----echo = FALSE------------------------------------------------------------- structure(c(3.191, 0.084, 3.377, 0, 0), class = "proc_time", .Names = c("user.self", "sys.self", "elapsed", "user.child", "sys.child"))