## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 6, fig.height = 4, fig.align = "center" ) set.seed(42) ## ----load-package------------------------------------------------------------- library(sparsecommunity) ## ----sim-sbm------------------------------------------------------------------ # Two balanced communities, n = 300 nodes # Within-community edge probability: 0.25; between: 0.04 B_sbm <- matrix(c(0.25, 0.04, 0.04, 0.25), nrow = 2) sim_sbm <- simulate_sbm(n = 300, K = 2, B = B_sbm, seed = 1) print(sim_sbm) ## ----sbm-structure------------------------------------------------------------ # Mean degree (note: sparse regime ~ log(n)/n * n = log(n)) mean(Matrix::rowSums(sim_sbm$A)) ## ----fit-sbm------------------------------------------------------------------ fit_sbm <- community_detect(sim_sbm$A, K = 2, model = "sbm", seed = 1) print(fit_sbm) ## ----sbm-components----------------------------------------------------------- # Top-K eigenvalues of the regularized Laplacian fit_sbm$eigenvalues # Community sizes table(fit_sbm$labels) ## ----sbm-accuracy------------------------------------------------------------- misclustering_rate(sim_sbm$labels, fit_sbm$labels) ## ----sbm-embedding, fig.cap="Spectral embedding for SBM. Points are colored by true community."---- U <- fit_sbm$embedding plot(U[, 1], U[, 2], col = sim_sbm$labels + 1, pch = 19, cex = 0.6, xlab = "Eigenvector 1", ylab = "Eigenvector 2", main = "SBM: spectral embedding") legend("topright", legend = c("Community 1", "Community 2"), col = 2:3, pch = 19, bty = "n") ## ----sim-dcsbm---------------------------------------------------------------- # Three communities with strong degree heterogeneity B_dcsbm <- matrix(c(0.5, 0.04, 0.04, 0.04, 0.5, 0.04, 0.04, 0.04, 0.5), nrow = 3) # Degree parameters: Uniform(0.3, 1.7), creating substantial heterogeneity set.seed(2) theta <- runif(400, min = 0.3, max = 1.7) sim_dcsbm <- simulate_dcsbm(n = 400, K = 3, B = B_dcsbm, theta = theta, seed = 2) print(sim_dcsbm) ## ----dcsbm-sbm-fail----------------------------------------------------------- fit_wrong <- community_detect(sim_dcsbm$A, K = 3, model = "sbm", seed = 2) cat("Misclustering rate (SBM method on DCSBM data):", misclustering_rate(sim_dcsbm$labels, fit_wrong$labels), "\n") ## ----fit-dcsbm---------------------------------------------------------------- fit_dcsbm <- community_detect(sim_dcsbm$A, K = 3, model = "dcsbm", seed = 2) print(fit_dcsbm) cat("Misclustering rate (DCSBM method):", misclustering_rate(sim_dcsbm$labels, fit_dcsbm$labels), "\n") ## ----dcsbm-embedding, fig.cap="Row-normalized spectral embedding for DCSBM. Colors indicate true communities."---- U_dc <- fit_dcsbm$embedding plot(U_dc[, 1], U_dc[, 2], col = sim_dcsbm$labels + 1, pch = 19, cex = 0.5, xlab = "Eigenvector 1 (normalized)", ylab = "Eigenvector 2 (normalized)", main = "DCSBM: row-normalized spectral embedding") legend("topright", legend = paste("Community", 1:3), col = 2:4, pch = 19, bty = "n") ## ----karate-data, message=FALSE----------------------------------------------- if (!requireNamespace("igraphdata", quietly = TRUE)) { message("igraphdata not installed; skipping real-data example.") knitr::knit_exit() } library(igraph) data("karate", package = "igraphdata") # Extract adjacency matrix and true community labels A_karate <- igraph::as_adjacency_matrix(karate, sparse = TRUE) true_comm <- igraph::V(karate)$Faction cat("Nodes:", vcount(karate), "| Edges:", ecount(karate), "| Communities:", length(unique(true_comm)), "\n") cat("Community sizes:", table(true_comm), "\n") cat("Mean degree:", round(mean(degree(karate)), 2), "\n") ## ----karate-fit--------------------------------------------------------------- fit_karate <- community_detect(A_karate, K = 2, model = "sbm", n_init = 30, seed = 42) summary(fit_karate) cat("Misclustering rate:", misclustering_rate(true_comm, fit_karate$labels), "\n") ## ----karate-plot, fig.cap="Karate club network. Node color = detected community; node shape = true faction."---- # Plot network colored by detected community shape_map <- ifelse(true_comm == 1, "circle", "square") igraph::plot.igraph( karate, vertex.color = fit_karate$labels + 1, vertex.shape = shape_map, vertex.size = 8, vertex.label = NA, main = "Karate club: detected vs. true communities" ) legend("bottomleft", legend = c("Detected: 1", "Detected: 2"), fill = 2:3, bty = "n", cex = 0.9) legend("bottomright", legend = c("True: faction 1", "True: faction 2"), pch = c(19, 15), bty = "n", cex = 0.9) ## ----football-data------------------------------------------------------------ data("football") cat("Nodes:", nrow(football_A), "| Edges:", sum(football_A) / 2, "\n") cat("Mean degree:", round(mean(Matrix::rowSums(football_A)), 2), " log(n):", round(log(nrow(football_A)), 2), "\n") table(football_labels) # 12 conferences ## ----football-estimate-K------------------------------------------------------ estimate_K(football_A, K_max = 15) # true K = 12 ## ----football-fit------------------------------------------------------------- fit_football <- community_detect(football_A, K = 12, model = "sbm", n_init = 30, seed = 1) misclustering_rate(football_labels, fit_football$labels) ## ----football-plot, fig.cap="Spectral embedding of the football network. Colors indicate detected community; the 12 athletic conferences are largely separated."---- plot(fit_football)