## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) ## ----setup-------------------------------------------------------------------- library(nullcat) ## ----categorical-basic-------------------------------------------------------- # Create a categorical matrix set.seed(123) x <- matrix(sample(1:4, 100, replace = TRUE), nrow = 10) # Randomize using curvecat (preserves row & column category multisets) x_rand <- curvecat(x, n_iter = 1000) # Verify margins are preserved all.equal(sort(x[1, ]), sort(x_rand[1, ])) # Row preserved all.equal(sort(x[, 1]), sort(x_rand[, 1])) # Column preserved ## ----categorical-margins------------------------------------------------------ set.seed(456) x <- matrix(sample(1:3, 60, replace = TRUE), nrow = 10) # Fixed-fixed: both row and column margins preserved x_fixed <- curvecat(x, n_iter = 1000) # Row-constrained: only row margins preserved x_row <- r0cat(x) # Column-constrained: only column margins preserved x_col <- c0cat(x) # Check row sums by category table(x[1, ]) # Original row 1 table(x_fixed[1, ]) # Preserved in curvecat table(x_row[1, ]) # Preserved in r0cat table(x_col[1, ]) # Not preserved in c0cat ## ----quantitative-basic------------------------------------------------------- # Create a quantitative community matrix set.seed(789) comm <- matrix(runif(200), nrow = 20) # Default: curvecat-backed stratified randomization with 5 strata rand1 <- quantize(comm, n_strata = 5, n_iter = 2000, fixed = "row") # Values are similar but rearranged cor(as.vector(comm), as.vector(rand1)) plot(rowSums(comm), rowSums(rand1)) plot(colSums(comm), colSums(rand1)) ## ----quantitative-strata------------------------------------------------------ set.seed(200) x <- rexp(100, .1) # More strata = less mixing, higher fidelity to original distribution s3 <- stratify(x, n_strata = 3) s10 <- stratify(x, n_strata = 10) table(s3) # Coarser bins table(s10) # Finer bins # Transform before stratifying (e.g., log-transform for skewed data) s_log <- stratify(x, n_strata = 5, transform = log1p) table(s_log) # Rank transform creates equal-occupancy strata s_rank <- stratify(x, n_strata = 5, transform = rank) table(s_rank) # Nearly equal counts per stratum # Separate zeros into their own stratum x_with_zeros <- c(0, 0, 0, x) s_zero <- stratify(x_with_zeros, n_strata = 4, zero_stratum = TRUE) table(s_zero) ## ----quantitative-fixed------------------------------------------------------- set.seed(100) comm <- matrix(rexp(100), nrow = 10) # Preserve row value multisets (quantitative row sums maintained) rand_row <- quantize(comm, n_strata = 5, fixed = "row", n_iter = 2000) all.equal(rowSums(comm), rowSums(rand_row)) # Preserve column value multisets (quantitative column sums maintained) rand_col <- quantize(comm, n_strata = 5, fixed = "col", n_iter = 2000) all.equal(colSums(comm), colSums(rand_col)) # Cell-level preservation: each value moves with its original cell location # The categorical randomization determines WHERE cells go, but each cell # carries its original value with it rand_cell <- quantize(comm, n_strata = 5, fixed = "cell", n_iter = 2000) # Values shuffled globally within strata, holding none of the above fixed rand <- quantize(comm, n_strata = 5, fixed = "stratum", n_iter = 2000) # For non-fixed-fixed methods like r0cat or c0cat, only some fixed options make sense: # r0cat with fixed="col" or c0cat with fixed="row" would be incompatible ## ----efficient-batch---------------------------------------------------------- set.seed(400) comm <- matrix(rexp(200), nrow = 20) # Prepare once prep <- quantize_prep(comm, n_strata = 5, fixed = "row", n_iter = 2000) # Generate many randomizations efficiently rand1 <- quantize(prep = prep) rand2 <- quantize(prep = prep) rand3 <- quantize(prep = prep) # Or use batch functions nulls <- quantize_batch(comm, n_reps = 99, n_strata = 5, fixed = "row", n_iter = 2000) dim(nulls) # 20 rows × 10 cols × 99 replicates ## ----trace-diagnostics-------------------------------------------------------- set.seed(300) x <- matrix(sample(1:5, 400, replace = TRUE), nrow = 20) # Generate trace showing mixing over iterations trace <- trace_cat(x, fun = "nullcat", method = "curvecat", n_iter = 1000, n_chains = 3, thin = 10) # Visual inspection plot(trace) # Automatic burn-in suggestion suggested <- suggest_n_iter(trace, tail_frac = 0.3) print(suggested) ## ----weighted-spatial--------------------------------------------------------- set.seed(42) # 20 sites x 15 species categorical matrix x <- matrix(sample(1:4, 300, replace = TRUE), nrow = 20) # Site coordinates coords <- cbind(runif(20), runif(20)) # Distance-decay weight matrix: nearby sites exchange more often d <- as.matrix(dist(coords)) W <- exp(-d / 0.3) # Gaussian kernel with scale = 0.3 # Spatially constrained randomization x_spatial <- curvecat(x, n_iter = 3000, wt_row = W) # Margins are still preserved all.equal(sort(x[1, ]), sort(x_spatial[1, ])) all.equal(sort(x[, 1]), sort(x_spatial[, 1])) ## ----weighted-strata---------------------------------------------------------- set.seed(42) x <- matrix(sample(1:3, 80, replace = TRUE), nrow = 8) # Two island groups: rows 1-4 and 5-8 W <- matrix(0, 8, 8) W[1:4, 1:4] <- 1 W[5:8, 5:8] <- 1 diag(W) <- 0 # diagonal is ignored, but zero is cleaner # Tokens only move within islands x_islands <- curvecat(x, n_iter = 3000, wt_row = W, output = "index") # Verify: tokens from rows 1-4 stayed in rows 1-4 idx <- matrix(1:length(x), nrow(x)) all(x_islands[1:4, ] %in% idx[1:4, ]) ## ----weighted-columns--------------------------------------------------------- set.seed(42) x <- matrix(sample(1:4, 200, replace = TRUE), nrow = 20) # Trait-based column weights (e.g., species with similar body size exchange more) traits <- runif(10) W_col <- exp(-as.matrix(dist(traits)) / 0.3) x_trait <- curvecat(x, n_iter = 3000, wt_col = W_col) ## ----weighted-dual------------------------------------------------------------ set.seed(42) x <- matrix(sample(1:4, 200, replace = TRUE), nrow = 20) # Spatial weights for rows, trait weights for columns coords <- cbind(runif(20), runif(20)) W_row <- exp(-as.matrix(dist(coords)) / 0.3) traits <- runif(10) W_col <- exp(-as.matrix(dist(traits)) / 0.3) x_dual <- curvecat(x, n_iter = 3000, wt_row = W_row, wt_col = W_col) ## ----vegan-integration, eval = FALSE------------------------------------------ # library(vegan) # # # Categorical data # x_cat <- matrix(sample(1:4, 100, replace = TRUE), nrow = 10) # cs <- nullcat_commsim_seq(method = "curvecat") # nm <- nullmodel(x_cat, cs) # sims <- simulate(nm, nsim = 99, burnin = 1000, thin = 100) # # # Quantitative data # x_quant <- matrix(rexp(100), nrow = 10) # cs_quant <- quantize_commsim(n_strata = 5, method = "curvecat", n_iter = 2000) # nm_quant <- nullmodel(x_quant, cs_quant) # sims_quant <- simulate(nm_quant, nsim = 99)