## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 4.5, fig.align = "center" ) set.seed(42L) ## ----install, eval = FALSE---------------------------------------------------- # install.packages("kronxNBC") ## ----synthetic-data----------------------------------------------------------- library(kronxNBC) set.seed(42L) n <- 300L mk <- n / 3L make_regime <- function(ret_fn, vol_mu, vol_sd, dd_mu, dd_sd, ts_mu, ts_sd, rp_a, rp_b, rp_lambda) { data.frame( log_return = ret_fn(mk), rolling_volatility = abs(rnorm(mk, vol_mu, vol_sd)), drawdown = rnorm(mk, dd_mu, dd_sd), transition_stress = abs(rnorm(mk, ts_mu, ts_sd)), residence_pressure = rpois(mk, rp_lambda), ruin_proxy = rbeta(mk, rp_a, rp_b) ) } X_syn <- rbind( # Calm: near-Gaussian, low volatility, shallow drawdowns make_regime( ret_fn = function(n) rnorm(n, 0.0002, 0.003), vol_mu = 0.004, vol_sd = 0.001, dd_mu = -0.002, dd_sd = 0.002, ts_mu = 0.001, ts_sd = 0.0005, rp_a = 1, rp_b = 20, rp_lambda = 1 ), # Steady: positive drift, moderate drawdowns make_regime( ret_fn = function(n) rnorm(n, 0.0005, 0.005), vol_mu = 0.008, vol_sd = 0.002, dd_mu = -0.008, dd_sd = 0.004, ts_mu = 0.003, ts_sd = 0.001, rp_a = 2, rp_b = 10, rp_lambda = 3 ), # Stress: fat-tailed (t_3), high volatility, deep drawdowns make_regime( ret_fn = function(n) rt(n, df = 3L) * 0.012, vol_mu = 0.022, vol_sd = 0.005, dd_mu = -0.030, dd_sd = 0.010, ts_mu = 0.015, ts_sd = 0.005, rp_a = 5, rp_b = 3, rp_lambda = 12 ) ) X_syn <- as.matrix(X_syn) y_syn <- factor( rep(c("Calm", "Steady", "Stress"), each = mk), levels = c("Calm", "Steady", "Stress") ) # 80 / 20 random split (see Section 7 for the rationale) set.seed(7L) tr_idx <- sample(n, size = floor(0.8 * n)) x_train <- X_syn[ tr_idx, ] y_train <- y_syn[ tr_idx ] x_test <- X_syn[-tr_idx, ] y_test <- y_syn[-tr_idx ] cat("Training set:", nrow(x_train), "observations\n") cat("Test set: ", nrow(x_test), "observations\n") cat("Class balance (train):\n") print(table(y_train)) ## ----fit-model---------------------------------------------------------------- model <- student_t_naive_bayes(x_train, y_train) summary(model) ## ----print-model-------------------------------------------------------------- print(model) ## ----tables-all--------------------------------------------------------------- tabs <- tables(model) print(tabs) ## ----tables-subset------------------------------------------------------------ # By name tables(model, which = "log_return") # By index (second feature: rolling_volatility) tables(model, which = 2L) ## ----coef--------------------------------------------------------------------- cf <- coef(model) cf ## ----nu-matrix---------------------------------------------------------------- nu_mat <- model$params$nu nu_mat ## ----nu-interpretation-------------------------------------------------------- cat(sprintf( "log_return: nu(Calm) = %g | nu(Steady) = %g | nu(Stress) = %g\n", nu_mat["Calm", "log_return"], nu_mat["Steady", "log_return"], nu_mat["Stress", "log_return"] )) cat(sprintf( "drawdown: nu(Calm) = %g | nu(Steady) = %g | nu(Stress) = %g\n", nu_mat["Calm", "drawdown"], nu_mat["Steady", "drawdown"], nu_mat["Stress", "drawdown"] )) ## ----plot-conditional, fig.cap = "Conditional Student-t densities by regime for all six features. Note the heavier tails on the Stress curve for log_return."---- plot(model, prob = "conditional", arg.num = list(col = c("steelblue", "forestgreen", "firebrick"), lty = c(1L, 2L, 4L)), lwd = 2) ## ----plot-marginal, fig.cap = "Marginal (prior-weighted) Student-t densities."---- plot(model, prob = "marginal", arg.num = list(col = c("steelblue", "forestgreen", "firebrick"), lty = c(1L, 2L, 4L)), lwd = 2) ## ----plot-subset, fig.cap = "Conditional densities for log_return and drawdown only."---- plot(model, which = c("log_return", "drawdown"), prob = "conditional", arg.num = list(col = c("steelblue", "forestgreen", "firebrick")), lwd = 2) ## ----predict-class------------------------------------------------------------ pred_class <- predict(model, newdata = x_test, type = "class") accuracy <- mean(pred_class == y_test) cat(sprintf("Out-of-sample accuracy: %.1f%%\n", 100 * accuracy)) ## ----confusion---------------------------------------------------------------- cm <- table(Actual = y_test, Predicted = pred_class) cm ## ----predict-prob------------------------------------------------------------- pred_prob <- predict(model, newdata = x_test, type = "prob") cat("First 6 test observations — posterior probabilities:\n") round(head(pred_prob), 4) ## ----stress-alert------------------------------------------------------------- stress_prob <- pred_prob[, "Stress"] alert_flag <- ifelse(stress_prob > 0.60, "Stress Alert", "No Alert") cat("Alert summary (test period):\n") print(table(alert_flag)) cat("\nHighest Stress probabilities in test set:\n") top_stress <- sort(stress_prob, decreasing = TRUE) round(head(top_stress, 8L), 4) ## ----gnb-setup---------------------------------------------------------------- set.seed(99L) n_stress <- 40L n_calm <- 20L X_stress_test <- cbind( log_return = rt(n_stress, df = 3L) * 0.012, rolling_volatility = abs(rnorm(n_stress, 0.022, 0.005)), drawdown = rnorm(n_stress, -0.030, 0.010), transition_stress = abs(rnorm(n_stress, 0.015, 0.005)), residence_pressure = as.numeric(rpois(n_stress, 12L)), ruin_proxy = rbeta(n_stress, 5, 3) ) X_calm_test <- cbind( log_return = rnorm(n_calm, 0.0002, 0.003), rolling_volatility = abs(rnorm(n_calm, 0.004, 0.001)), drawdown = rnorm(n_calm, -0.002, 0.002), transition_stress = abs(rnorm(n_calm, 0.001, 0.0005)), residence_pressure = as.numeric(rpois(n_calm, 1L)), ruin_proxy = rbeta(n_calm, 1, 20) ) X_mixed_test <- rbind(X_stress_test, X_calm_test) y_mixed_test <- factor( c(rep("Stress", n_stress), rep("Calm", n_calm)), levels = c("Calm", "Steady", "Stress") ) ## ----gnb-fit------------------------------------------------------------------ library(naivebayes) gnb <- naive_bayes(x_train, y_train, usekernel = FALSE) ## ----gnb-compare-------------------------------------------------------------- # Student-t NBC predictions st_pred <- predict(model, newdata = X_mixed_test, type = "class") st_acc <- mean(st_pred == y_mixed_test) # Gaussian NBC predictions gnb_pred <- predict(gnb, newdata = X_mixed_test, type = "class") gnb_acc <- mean(gnb_pred == y_mixed_test) cat(sprintf("Student-t NBC overall accuracy: %.1f%%\n", 100 * st_acc)) cat(sprintf("Gaussian NBC overall accuracy: %.1f%%\n", 100 * gnb_acc)) ## ----gnb-confusion------------------------------------------------------------ cat("Student-t NBC:\n") table(Actual = y_mixed_test, Predicted = st_pred) cat("\nGaussian NBC:\n") table(Actual = y_mixed_test, Predicted = gnb_pred) ## ----gnb-recall--------------------------------------------------------------- st_stress_recall <- mean(st_pred[y_mixed_test == "Stress"] == "Stress") gnb_stress_recall <- mean(gnb_pred[y_mixed_test == "Stress"] == "Stress") cat(sprintf("Student-t NBC Stress recall: %.1f%%\n", 100 * st_stress_recall)) cat(sprintf("Gaussian NBC Stress recall: %.1f%%\n", 100 * gnb_stress_recall)) ## ----tail-density------------------------------------------------------------- x_extreme <- quantile( abs(x_train[y_train == "Stress", "log_return"]), probs = 0.95 ) cat("Extreme log_return value:", round(x_extreme, 5), "\n") # Student-t log-density (Stress class parameters) mu_st <- model$params$mu["Stress", "log_return"] sd_st <- model$params$sd["Stress", "log_return"] nu_st <- model$params$nu["Stress", "log_return"] ld_st <- stats::dt((x_extreme - mu_st) / sd_st, df = nu_st, log = TRUE) - log(sd_st) # Gaussian log-density (same location/scale, normal tails) ld_gn <- stats::dnorm(x_extreme, mean = mu_st, sd = sd_st, log = TRUE) cat(sprintf("Student-t log-density at extreme point: %+.2f (nu = %g)\n", ld_st, nu_st)) cat(sprintf("Gaussian log-density at extreme point: %+.2f\n", ld_gn)) cat(sprintf("Difference (Student-t advantage): %+.2f log-units\n", ld_st - ld_gn)) ## ----custom-prior------------------------------------------------------------- # Stress is rare in real time: 10% prior weight model_informed <- student_t_naive_bayes( x = x_train, y = y_train, prior = c(Calm = 0.50, Steady = 0.40, Stress = 0.10) ) cat("Priors (custom):\n") print(model_informed$prior) # The classifier becomes more conservative about Stress alerts prob_informed <- predict(model_informed, newdata = x_test, type = "prob") cat("\nMean posterior P(Stress) — uniform prior: ", round(mean(pred_prob[, "Stress"]), 4), "\n") cat("Mean posterior P(Stress) — informed prior: ", round(mean(prob_informed[, "Stress"]), 4), "\n") ## ----custom-grid-heavy-------------------------------------------------------- model_heavy <- student_t_naive_bayes( x = x_train, y = y_train, nu_grid = c(3, 4, 5, 6, 7, 8, 10) ) model_heavy$params$nu ## ----custom-grid-gaussian----------------------------------------------------- model_wide <- student_t_naive_bayes( x = x_train, y = y_train, nu_grid = c(3:30, 40, 60, 100, 200, 500) ) ## ----raw-params--------------------------------------------------------------- # Regime-mean volatility (mu of rolling_volatility by class) model$params$mu[, "rolling_volatility"] # Tail-thickness ranking across regimes for each feature apply(model$params$nu, 2, which.min) # index of class with lowest nu ## ----subset-predict----------------------------------------------------------- # Predict using only the two most important risk features x_reduced <- x_test[, c("log_return", "drawdown")] pred_reduced <- predict(model, newdata = x_reduced, type = "class") cat("Accuracy on full feature set: ", round(mean(pred_class == y_test), 4), "\n") cat("Accuracy on 2-feature subset: ", round(mean(pred_reduced == y_test), 4), "\n") ## ----cor-features, eval = FALSE----------------------------------------------- # library(zoo) # # es_data <- read.csv("data.csv", stringsAsFactors = FALSE) # decoded <- read.csv("decoded_states.csv", stringsAsFactors = FALSE) # es_data <- es_data[!is.na(es_data$ret), ] # n_roll <- 24L # # # 1. Rolling volatility (SD of log-returns) # rolling_vol <- rollapply(es_data$ret, n_roll, sd, fill = NA, align = "right") # rolling_vol <- pmax(rolling_vol, 0.0001) # # # 2. Drawdown from rolling peak # rolling_max <- rollapply(es_data$close, n_roll, max, fill = NA, align = "right") # drawdown <- (es_data$close - rolling_max) / rolling_max # # # 3. Downside semi-deviation (left-tail sensitivity) # downside_dev <- function(x) { # neg_x <- x[x < 0] # if (length(neg_x) == 0L) return(0) # sqrt(mean(neg_x^2)) # } # trans_stress <- rollapply(es_data$ret, n_roll, downside_dev, fill = NA, align = "right") # # # 4. Residence pressure (consecutive hours in drawdown) # is_dd <- as.integer(drawdown < -0.005) # is_dd[is.na(is_dd)] <- 0L # res_pr <- ave(is_dd, cumsum(is_dd == 0L), FUN = cumsum) # # # 5. Ruin proxy: P(return <= -2%) under rolling distribution # rolling_mean <- rollapply(es_data$ret, n_roll, mean, fill = NA, align = "right") # ruin_proxy <- pnorm(-0.02, mean = rolling_mean, sd = rolling_vol) # # # Assemble and attach HMM labels # state_labels <- c("1" = "Stress", "2" = "Calm", "3" = "Steady") # # cor_data <- data.frame( # timestamp = es_data$timestamp, # log_return = es_data$ret, # rolling_volatility = rolling_vol, # drawdown = drawdown, # transition_stress = trans_stress, # residence_pressure = pmax(res_pr, 0.0001), # ruin_proxy = pmax(ruin_proxy, 0.0001), # regime = factor( # state_labels[as.character(decoded$state)], # levels = c("Calm", "Steady", "Stress") # ) # ) # cor_data <- cor_data[complete.cases(cor_data), ] ## ----cor-split, eval = FALSE-------------------------------------------------- # features <- c("log_return", "rolling_volatility", "drawdown", # "transition_stress", "residence_pressure", "ruin_proxy") # # set.seed(123L) # tr_idx <- sample(nrow(cor_data), floor(0.80 * nrow(cor_data))) # x_train <- as.matrix(cor_data[ tr_idx, features]) # y_train <- cor_data$regime[ tr_idx] # x_test <- as.matrix(cor_data[-tr_idx, features]) # y_test <- cor_data$regime[-tr_idx] # # model <- student_t_naive_bayes(x_train, y_train) ## ----session-info------------------------------------------------------------- sessionInfo()