## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5.5, dpi = 150, warning = FALSE, message = FALSE) ## ----load-data---------------------------------------------------------------- library(palimpsestr) data(villa_romana) cat("Finds:", nrow(villa_romana), "\n") cat("Stratigraphic units:", length(unique(villa_romana$context)), "\n") cat("Material classes:", length(unique(villa_romana$class)), "\n") cat("Date range:", min(villa_romana$date_min), "to", max(villa_romana$date_max), "\n") ## ----data-summary------------------------------------------------------------- cat("Material classes:\n") sort(table(villa_romana$class), decreasing = TRUE) ## ----data-structure----------------------------------------------------------- str(villa_romana) ## ----compare-k---------------------------------------------------------------- ck <- compare_k( villa_romana, k_values = 2:7, tafonomy = "taf_score", context = "context", seed = 42 ) print(ck) ## ----gg-compare-k, fig.cap="Model selection diagnostics: BIC, PDI, and mean entropy across candidate phase counts. The dashed line marks the optimal K."---- if (requireNamespace("ggplot2", quietly = TRUE)) { gg_compare_k(ck) } ## ----fit-model---------------------------------------------------------------- fit <- fit_sef( villa_romana, k = 4, tafonomy = "taf_score", context = "context", n_init = 10, seed = 42 ) print(fit) ## ----model-summary------------------------------------------------------------ summary(fit) ## ----model-stats-------------------------------------------------------------- cat("PDI:", pdi(fit), "\n") sef_summary(fit) ## ----gg-convergence, fig.cap="EM convergence trace showing log-likelihood stabilisation across iterations."---- if (requireNamespace("ggplot2", quietly = TRUE)) { gg_convergence(fit) } ## ----gg-phasefield, fig.cap="Dominant phase assignment. Point size reflects assignment confidence (inverse entropy). Larger points indicate higher confidence."---- if (requireNamespace("ggplot2", quietly = TRUE)) { gg_phasefield(fit) } ## ----gg-entropy, fig.cap="Spatial distribution of Shannon entropy. High-entropy zones (bright) mark areas of uncertain phase assignment, typically at phase boundaries or in heavily mixed deposits."---- if (requireNamespace("ggplot2", quietly = TRUE)) { gg_entropy(fit) } ## ----gg-energy, fig.cap="Excavation Stratigraphic Energy field. Elevated ESE values (bright) highlight zones of depositional disruption where neighbouring finds belong to different phases."---- if (requireNamespace("ggplot2", quietly = TRUE)) { gg_energy(fit) } ## ----gg-intrusions, fig.cap="Candidate intrusions overlaid on the phase-field map. Circled finds exhibit high entropy, high stratigraphic energy, and low entanglement with their neighbours. Top 10 suspects are labelled."---- if (requireNamespace("ggplot2", quietly = TRUE)) { gg_intrusions(fit, top_n = 10) } ## ----gg-phase-profile, fig.cap="Vertical phase profile: depth vs horizontal position. Phase 1 (deepest) corresponds to the earliest occupation; Phase 4 (shallowest) to the most recent."---- if (requireNamespace("ggplot2", quietly = TRUE)) { gg_phase_profile(fit) } ## ----intrusions--------------------------------------------------------------- di <- detect_intrusions(fit) suspects <- di[di$intrusion_prob > 0.5, ] cat("Suspected intrusions:", nrow(suspects), "out of", nrow(villa_romana), "finds", sprintf("(%.1f%%)\n", 100 * nrow(suspects) / nrow(villa_romana))) ## ----intrusion-table---------------------------------------------------------- if (nrow(suspects) > 0) { # Merge with source data and phase assignments phase_tab <- as_phase_table(fit) susp_data <- merge(suspects, villa_romana[, c("id", "context", "class")], by = "id") susp_data <- merge(susp_data, phase_tab[, c("id", "dominant_phase")], by = "id") susp_data <- susp_data[order(susp_data$intrusion_prob, decreasing = TRUE), ] susp_show <- susp_data[, c("id", "context", "class", "dominant_phase", "intrusion_prob")] names(susp_show) <- c("Find", "US", "Class", "Phase", "Intrusion Prob.") knitr::kable(head(susp_show, 15), row.names = FALSE, digits = 3, caption = "Top suspected intrusions ranked by intrusion probability.") } ## ----us-purity---------------------------------------------------------------- us_tab <- us_summary_table(fit) knitr::kable(us_tab, row.names = FALSE, digits = 3, caption = "Per-US diagnostics: dominant phase, purity, mean entropy, mean ESE, and number of intrusions.") ## ----purity-stats------------------------------------------------------------- n_pure <- sum(us_tab$purity >= 0.9) cat(n_pure, "out of", nrow(us_tab), "units have purity >= 90%\n") ## ----transition-matrix-------------------------------------------------------- tm <- phase_transition_matrix(fit) print(tm) ## ----validation--------------------------------------------------------------- ari <- adjusted_rand_index(fit, villa_romana$true_phase) cat("Adjusted Rand Index:", round(ari, 3), "\n") ## ----confusion---------------------------------------------------------------- confusion_matrix(fit, villa_romana$true_phase) ## ----gg-confusion, fig.cap="Confusion matrix heatmap. Strong diagonal = correct phase recovery. Off-diagonal cells indicate misattributed finds, often corresponding to bioturbated or redeposited material."---- if (requireNamespace("ggplot2", quietly = TRUE)) { gg_confusion(fit, villa_romana$true_phase) } ## ----sensitivity-------------------------------------------------------------- configs <- list( equal = c(ws = 1, wz = 1, wt = 1, wc = 1), spatial = c(ws = 2, wz = 1, wt = 0.5, wc = 0.5), temporal = c(ws = 0.5, wz = 0.5, wt = 2, wc = 1) ) sens <- do.call(rbind, lapply(names(configs), function(nm) { w <- configs[[nm]] f <- fit_sef(villa_romana, k = 4, weights = w, seed = 42, tafonomy = "taf_score", context = "context") data.frame( config = nm, pdi = pdi(f), ari = adjusted_rand_index(f, villa_romana$true_phase), mean_entropy = mean(f$entropy, na.rm = TRUE), stringsAsFactors = FALSE ) })) knitr::kable(sens, digits = 4, caption = "Sensitivity of PDI, ARI, and mean entropy to weight configuration.") ## ----bootstrap---------------------------------------------------------------- bs <- bootstrap_sef(fit, n_boot = 50, true_labels = villa_romana$true_phase, verbose = FALSE) knitr::kable(bs, digits = 4, caption = "Bootstrap confidence intervals (50 replicates) for key model statistics.") ## ----gg-bootstrap, fig.cap="Bootstrap confidence intervals for PDI, mean entropy, mean energy, log-likelihood, and ARI."---- if (requireNamespace("ggplot2", quietly = TRUE)) { gg_bootstrap(bs) } ## ----harris------------------------------------------------------------------- H <- harris_from_contexts(villa_romana, z_col = "z", context_col = "context") cat("Harris penalty matrix:", nrow(H), "x", ncol(H), "\n") ## ----harris-fit--------------------------------------------------------------- fit_h <- fit_sef(villa_romana, k = 4, harris = H, tafonomy = "taf_score", context = "context", n_init = 5, seed = 42) cat("PDI without Harris:", round(pdi(fit), 4), "\n") cat("PDI with Harris: ", round(pdi(fit_h), 4), "\n") cat("ARI without Harris:", round(adjusted_rand_index(fit, villa_romana$true_phase), 4), "\n") cat("ARI with Harris: ", round(adjusted_rand_index(fit_h, villa_romana$true_phase), 4), "\n") ## ----harris-validate---------------------------------------------------------- validate_phases_harris(fit) ## ----report-en---------------------------------------------------------------- report_sef(fit, lang = "en") ## ----report-it---------------------------------------------------------------- report_sef(fit, lang = "it") ## ----export, eval=FALSE------------------------------------------------------- # export_results(fit, dir = "results/", format = "csv", prefix = "villa_romana") ## ----gis-export, eval=FALSE--------------------------------------------------- # if (requireNamespace("sf", quietly = TRUE)) { # sf_pts <- as_sf_phase(fit, crs = 32632L, dims = "XYZ") # sf_links <- as_sf_links(fit, quantile_threshold = 0.90, crs = 32632L) # sf::st_write(sf_pts, "villa_romana.gpkg", layer = "phases") # sf::st_write(sf_links, "villa_romana.gpkg", layer = "sei_links") # } ## ----interactive, eval=FALSE-------------------------------------------------- # as_plotly(gg_phasefield(fit)) # as_plotly(gg_intrusions(fit)) ## ----eval=FALSE--------------------------------------------------------------- # # install.packages("remotes") # remotes::install_github("enzococca/palimpsestr")