## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) ## ----binary-classification---------------------------------------------------- library(evoFE) data(mtcars) df <- mtcars df$am <- as.integer(df$am) # target: 0 = automatic, 1 = manual set.seed(42) res <- evolve_features( data = df, target_col = "am", task = "classification", evaluator = "xgboost", generations = 5, pop_size = 8, cv_folds = 3, early_stopping_rounds = 3, verbose = TRUE ) ## ----binary-inspect----------------------------------------------------------- # View the winning recipe cat("Best recipe:", individual_to_recipe_string(res$best_individual), "\n") cat("Fitness: ", res$best_individual$fitness, "\n") ## ----binary-predict-features-------------------------------------------------- engineered <- predict(res, df[1:5, ]) head(engineered) ## ----binary-predict-model----------------------------------------------------- preds <- predict_model(res, df[1:5, ]) preds ## ----regression--------------------------------------------------------------- data(iris) set.seed(123) res_reg <- evolve_features( data = iris[, c("Sepal.Length", "Sepal.Width", "Petal.Width", "Petal.Length")], target_col = "Petal.Length", task = "regression", evaluator = "xgboost", generations = 5, pop_size = 8, cv_folds = 3, early_stopping_rounds = 3, verbose = TRUE ) cat("Best recipe:", individual_to_recipe_string(res_reg$best_individual), "\n") cat("Fitness (neg RMSE):", res_reg$best_individual$fitness, "\n") ## ----regression-predict------------------------------------------------------- preds_reg <- predict_model(res_reg, iris[1:10, ]) # Compare predictions to actuals data.frame( actual = iris$Petal.Length[1:10], predicted = round(preds_reg, 2) ) ## ----multiclass--------------------------------------------------------------- iris_mc <- iris iris_mc$Species <- as.character(iris_mc$Species) set.seed(99) res_mc <- evolve_features( data = iris, target_col = "Species", task = "multiclass", evaluator = "xgboost", generations = 5, pop_size = 8, cv_folds = 3, early_stopping_rounds = 3, verbose = TRUE ) cat("Best recipe:", individual_to_recipe_string(res_mc$best_individual), "\n") ## ----multiclass-predict------------------------------------------------------- probs <- predict_model(res_mc, iris_mc[c(1, 51, 101), ]) round(probs, 3) ## ----inspect-recipe----------------------------------------------------------- ind <- res$best_individual # Human-readable recipe string cat(individual_to_recipe_string(ind), "\n") # Number of evolved genes cat("Evolved genes:", length(ind$genes), "\n") # Original columns retained cat("Numeric cols: ", paste(ind$numeric_cols, collapse = ", "), "\n") cat("Categorical cols:", paste(ind$categorical_cols, collapse = ", "), "\n") # Individual gene details for (g in ind$genes) { cat(sprintf(" %s(%s) → %s\n", g$transformer_name, paste(g$input_cols, collapse = ", "), g$output_col)) } ## ----reproducibility---------------------------------------------------------- set.seed(42) r1 <- evolve_features(iris[,1:5], "Petal.Length", task = "regression", generations = 3, pop_size = 5, evaluator = "xgboost", verbose = FALSE) set.seed(42) r2 <- evolve_features(iris[,1:5], "Petal.Length", task = "regression", generations = 3, pop_size = 5, evaluator = "xgboost", verbose = FALSE) identical(r1$best_individual$fitness, r2$best_individual$fitness) identical( individual_to_recipe_string(r1$best_individual), individual_to_recipe_string(r2$best_individual) ) ## ----end-to-end--------------------------------------------------------------- data(iris) set.seed(1) idx <- sample(nrow(iris), 0.7 * nrow(iris)) train <- iris[idx, ] test <- iris[-idx, ] # Evolve on training data only set.seed(7) recipe <- evolve_features( data = train[, 1:4], # exclude Species target_col = "Petal.Length", task = "regression", evaluator = "xgboost", generations = 5, pop_size = 8, verbose = FALSE ) # Predict on held-out test data test_preds <- predict_model(recipe, test[, 1:4]) # Evaluate rmse <- sqrt(mean((test$Petal.Length - test_preds)^2)) cat(sprintf("Test RMSE: %.4f\n", rmse)) cat(sprintf("Recipe: %s\n", individual_to_recipe_string(recipe$best_individual))) ## ----session-info------------------------------------------------------------- sessionInfo()