## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 4 ) library(ensembleML) ## ----fit---------------------------------------------------------------------- data(iris) set.seed(42) idx <- sample(nrow(iris), 120) train <- iris[idx, ] test <- iris[-idx, ] rf <- em_fit(Species ~ ., data = train, method = "random_forest", verbose = TRUE) ## ----xgb, eval = FALSE-------------------------------------------------------- # xgb <- em_fit(Species ~ ., data = train, method = "xgboost") # ada <- em_fit(Species ~ ., data = train, method = "adaboost") # bag <- em_fit(Species ~ ., data = train, method = "bagging") ## ----predict------------------------------------------------------------------ preds <- em_predict(rf, newdata = test) head(preds) ## ----prob--------------------------------------------------------------------- probs <- em_predict(rf, newdata = test, type = "prob") head(probs, 3) ## ----evaluate----------------------------------------------------------------- em_evaluate(rf, newdata = test) ## ----metrics------------------------------------------------------------------ em_evaluate(rf, newdata = test, metrics = c("accuracy", "f1", "kappa")) ## ----cv, eval = FALSE--------------------------------------------------------- # cv_res <- em_cv(Species ~ ., data = iris, method = "random_forest", # cv_folds = 5, repeats = 3) # cv_res$summary # em_plot_cv(cv_res, metric = "accuracy") ## ----tune, eval = FALSE------------------------------------------------------- # grid <- list(ntree = c(100, 300, 500), mtry = c(1, 2, 3)) # # tuned <- em_tune( # Species ~ ., data = train, method = "random_forest", # param_grid = grid, cv_folds = 5 # ) # # tuned$best_params # tuned$best_score # tuned$results ## ----compare, eval = FALSE---------------------------------------------------- # cmp <- em_compare(Species ~ ., train = train, test = test) # cmp$table ## ----importance--------------------------------------------------------------- em_importance(rf, top_n = 4) ## ----partial, eval = FALSE---------------------------------------------------- # em_partial(rf, data = train, feature = "Petal.Length") ## ----confusion, eval = FALSE-------------------------------------------------- # em_confusion(rf, newdata = test) # em_confusion(rf, newdata = test, normalise = TRUE) ## ----regression--------------------------------------------------------------- set.seed(7) reg_data <- data.frame( x1 = rnorm(200), x2 = rnorm(200), y = 3 + 2 * rnorm(200) + rnorm(200)) reg_train <- reg_data[1:160, ] reg_test <- reg_data[161:200, ] reg_model <- em_fit(y ~ ., data = reg_train, method = "random_forest") em_evaluate(reg_model, reg_test) em_residuals(reg_model, reg_test) ## ----citation, eval = FALSE--------------------------------------------------- # citation("ensembleML") ## ----session------------------------------------------------------------------ sessionInfo()