## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ## ----------------------------------------------------------------------------- succinct_dat <- data.frame( x = c(1, NA, 3, 4, NA), z = factor(c("a", "b", NA, "a", "b")) ) i <- mimar::impute(succinct_dat, imputer = "knn", m = 2, maxit = 2, seed = 1) mimar::complete(i, 1) mimar::complete(i, "all") ## ----------------------------------------------------------------------------- library(mimar) set.seed(1) dat <- data.frame( age = rnorm(120, 50, 10), bmi = rnorm(120, 25, 4), sex = factor(sample(c("F", "M"), 120, TRUE)), group = factor(sample(c("A", "B", "C"), 120, TRUE)), smoker = sample(c(TRUE, FALSE), 120, TRUE) ) head(dat) ## ----------------------------------------------------------------------------- d <- describe(dat) d summary(d) ## ----fig.width=7, fig.height=4------------------------------------------------ plot(d) ## ----------------------------------------------------------------------------- imputer_registry() ## ----------------------------------------------------------------------------- describe("imputers") ## ----------------------------------------------------------------------------- a <- ampute( dat, prop = 0.25, mechanism = "MAR", target = c("bmi", "group"), by = c("age", "sex"), seed = 1 ) a summary(a) ## ----fig.width=7, fig.height=4, fig.show='hold'------------------------------- i_knn <- impute(a, imputer = "knn", m = 3, maxit = 3, seed = 1) plot(i_knn, type = "density", variable = "bmi") plot(i_knn, type = "xy", formula = bmi ~ age | sex) ## ----------------------------------------------------------------------------- i_knn <- impute(a, imputer = "knn", m = 3, maxit = 3, seed = 1) i_knn summary(i_knn) complete(i_knn, 1) ## ----------------------------------------------------------------------------- i_knn_small <- impute(a, imputer = "knn", m = 1, maxit = 2, seed = 1) i_hotdeck <- impute(a, imputer = "hotdeck", m = 1, maxit = 2, seed = 1) summary(i_knn_small) summary(i_hotdeck) ## ----------------------------------------------------------------------------- rf_spec <- imputer("rf", num.trees = 500) xgb_spec <- imputer("xgboost", nrounds = 100, max_depth = 3) describe(a) i_knn <- impute(a, imputer = "knn", m = 3, maxit = 3, seed = 1, donors = 10) summary(i_knn) ## ----------------------------------------------------------------------------- sl_spec <- imputer( "superlearner", library = c("pmm", "knn", "rpart"), folds = 3, metalearner = "inverse_loss" ) i_sl <- impute(a, imputer = sl_spec, m = 2, maxit = 2, seed = 1) summary(i_sl) ## ----------------------------------------------------------------------------- e <- evaluate(i_knn) e describe(e) head(e$recovery_by_imputation) ## ----fig.width=7, fig.height=4------------------------------------------------ plot(i_knn) plot(i_knn, type = "missing") plot(i_knn, type = "density") plot(e) ## ----------------------------------------------------------------------------- head(i_knn$diagnostics$trace) ## ----fig.width=7, fig.height=4------------------------------------------------ plot(i_knn, type = "trace", statistic = "mean") ## ----fig.width=7, fig.height=4------------------------------------------------ plot(i_knn, type = "density", variable = "bmi") ## ----fig.width=7, fig.height=4------------------------------------------------ plot(i_knn, type = "boxplot", variable = "bmi") ## ----fig.width=7, fig.height=4------------------------------------------------ plot(i_knn, type = "strip", variable = "bmi") ## ----fig.width=7, fig.height=4------------------------------------------------ plot(i_knn, type = "xy", formula = bmi ~ age | sex) ## ----fig.width=7, fig.height=4------------------------------------------------ plot(i_knn, type = "proportion", variable = "group") plot(i_knn, type = "proportion", formula = group ~ sex) ## ----------------------------------------------------------------------------- pool(c(0.10, 0.11, 0.09), std.error = c(0.04, 0.05, 0.04), name = "age") ## ----------------------------------------------------------------------------- betas <- list( c(age = 0.10, bmi = 0.30), c(age = 0.11, bmi = 0.32), c(age = 0.09, bmi = 0.29) ) covariances <- list( diag(c(0.04, 0.08)^2), diag(c(0.05, 0.09)^2), diag(c(0.04, 0.08)^2) ) pooled_betas <- pool(betas, covariance = covariances) pooled_betas pooled_betas$estimate pooled_betas$variance ## ----------------------------------------------------------------------------- survival_probabilities <- list( matrix(c(0.90, 0.80, 0.70, 0.60), nrow = 2), matrix(c(0.91, 0.79, 0.72, 0.61), nrow = 2), matrix(c(0.89, 0.81, 0.71, 0.59), nrow = 2) ) pooled_survival <- pool(survival_probabilities) pooled_survival pooled_survival$estimate ## ----------------------------------------------------------------------------- external_results <- data.frame( term = rep(c("age", "bmi"), each = 3), estimate = c(0.10, 0.11, 0.09, 0.30, 0.32, 0.29), std.error = c(0.04, 0.05, 0.04, 0.08, 0.09, 0.08), imputation = rep(1:3, times = 2) ) p <- pool(external_results) p