## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 4.5 ) ## ----feature-engineering, eval = FALSE---------------------------------------- # library(zoo) # # es_data <- read.csv("data.csv", stringsAsFactors = FALSE) # decoded <- read.csv("decoded_states.csv", stringsAsFactors = FALSE) # # es_data <- es_data[!is.na(es_data$ret), ] # drop leading NA # stopifnot(nrow(es_data) == nrow(decoded)) # # n_roll <- 24L # 24-hour window # # cor_data <- data.frame( # timestamp = es_data$timestamp, # log_return = es_data$ret # ) ## ----feat-vol, eval = FALSE--------------------------------------------------- # cor_data$rolling_volatility <- rollapply( # es_data$ret, width = n_roll, FUN = sd, fill = NA, align = "right" # ) # cor_data$rolling_volatility <- pmax(cor_data$rolling_volatility, 0.0001) ## ----feat-dd, eval = FALSE---------------------------------------------------- # rolling_max <- rollapply(es_data$close, width = n_roll, # FUN = max, fill = NA, align = "right") # cor_data$drawdown <- (es_data$close - rolling_max) / rolling_max ## ----feat-semidev, eval = FALSE----------------------------------------------- # downside_dev <- function(x) { # neg_x <- x[x < 0] # if (length(neg_x) == 0L) return(0) # sqrt(mean(neg_x^2)) # } # cor_data$transition_stress <- rollapply( # es_data$ret, width = n_roll, FUN = downside_dev, fill = NA, align = "right" # ) # cor_data$transition_stress[is.na(cor_data$transition_stress)] <- 0.0001 ## ----feat-res, eval = FALSE--------------------------------------------------- # is_dd <- ifelse(cor_data$drawdown < -0.005, 1L, 0L) # is_dd[is.na(is_dd)] <- 0L # cor_data$residence_pressure <- ave( # is_dd, cumsum(is_dd == 0L), FUN = cumsum # ) # cor_data$residence_pressure <- pmax(cor_data$residence_pressure, 0.0001) ## ----feat-ruin, eval = FALSE-------------------------------------------------- # rolling_mean <- rollapply(es_data$ret, width = n_roll, # FUN = mean, fill = NA, align = "right") # cor_data$ruin_proxy <- pnorm(-0.02, # mean = rolling_mean, # sd = cor_data$rolling_volatility) # cor_data$ruin_proxy <- pmax(cor_data$ruin_proxy, 0.0001) ## ----feat-labels, eval = FALSE------------------------------------------------ # # KRONX empirical label mapping (derived from HMM state ordering) # state_labels <- c("1" = "Stress", "2" = "Calm", "3" = "Steady") # cor_data$regime <- factor( # state_labels[as.character(decoded$state)], # levels = c("Calm", "Steady", "Stress") # ) # # cor_data <- cor_data[complete.cases(cor_data), ] # drop rolling-window NAs # write.csv(cor_data, file = "nbc_analysis_report.txt", row.names = FALSE) ## ----train-test-split, eval = FALSE------------------------------------------- # cor_data <- read.csv("nbc_analysis_report.txt", stringsAsFactors = FALSE) # cor_data$regime <- factor(cor_data$regime, levels = c("Calm", "Steady", "Stress")) # cor_data <- cor_data[!is.na(cor_data$regime), ] # # features <- c("log_return", "rolling_volatility", "drawdown", # "transition_stress", "residence_pressure", "ruin_proxy") # # set.seed(123) # train_idx <- sample(seq_len(nrow(cor_data)), size = floor(0.80 * nrow(cor_data))) # train <- cor_data[ train_idx, ] # test <- cor_data[-train_idx, ] # # x_train <- as.matrix(train[, features]); y_train <- train$regime # x_test <- as.matrix(test[, features]); y_test <- test$regime ## ----fit-model, eval = FALSE-------------------------------------------------- # library(kronxNBC) # # model <- student_t_naive_bayes(x = x_train, y = y_train) # print(model) ## ----fit-synthetic------------------------------------------------------------ library(kronxNBC) set.seed(42L) n <- 300L mk <- n / 3L # Mimic the distributional shape of each regime X_syn <- rbind( data.frame( # Calm log_return = rnorm(mk, 0.0002, 0.003), rolling_volatility = rnorm(mk, 0.004, 0.001), drawdown = rnorm(mk, -0.002, 0.002), transition_stress = abs(rnorm(mk, 0.001, 0.0005)), residence_pressure = rpois(mk, 1), ruin_proxy = rbeta(mk, 1, 20) ), data.frame( # Steady log_return = rnorm(mk, 0.0005, 0.005), rolling_volatility = rnorm(mk, 0.008, 0.002), drawdown = rnorm(mk, -0.008, 0.004), transition_stress = abs(rnorm(mk, 0.003, 0.001)), residence_pressure = rpois(mk, 3), ruin_proxy = rbeta(mk, 2, 10) ), data.frame( # Stress: fat-tailed log_return = rt(mk, df = 3) * 0.012, rolling_volatility = rnorm(mk, 0.022, 0.005), drawdown = rnorm(mk, -0.030, 0.010), transition_stress = abs(rnorm(mk, 0.015, 0.005)), residence_pressure = rpois(mk, 12), ruin_proxy = rbeta(mk, 5, 3) ) ) X_syn <- as.matrix(X_syn) y_syn <- factor( rep(c("Calm", "Steady", "Stress"), each = mk), levels = c("Calm", "Steady", "Stress") ) set.seed(7L) tr_idx <- sample(n, size = floor(0.8 * n)) x_train <- X_syn[ tr_idx, ]; y_train <- y_syn[ tr_idx] x_test <- X_syn[-tr_idx, ]; y_test <- y_syn[-tr_idx] model <- student_t_naive_bayes(x_train, y_train) summary(model) ## ----tables------------------------------------------------------------------- tabs <- tables(model) print(tabs) ## ----coef--------------------------------------------------------------------- coef(model) ## ----plot-model, fig.cap = "Per-feature Student-t densities by regime. Heavier tails in the Stress curves are visible where nu is low."---- plot(model, prob = "conditional") ## ----predict------------------------------------------------------------------ pred_class <- predict(model, newdata = x_test, type = "class") pred_prob <- predict(model, newdata = x_test, type = "prob") accuracy <- mean(pred_class == y_test) cat("Out-of-sample accuracy:", round(accuracy, 4), "\n") ## ----confusion---------------------------------------------------------------- table(Actual = y_test, Predicted = pred_class) ## ----alert-------------------------------------------------------------------- stress_prob <- pred_prob[, "Stress"] alert_flag <- ifelse(stress_prob > 0.60, "COR Stress Alert", "No Alert") cat("\nCOR Stress Alert Summary (test period):\n") print(table(alert_flag)) cat("\nPosterior Stress probability — first 10 test observations:\n") print(round(head(stress_prob, 10L), 4)) ## ----nu-table----------------------------------------------------------------- nu_df <- as.data.frame(t(model$params$nu)) colnames(nu_df) <- paste0("nu.", c("Calm", "Steady", "Stress")) nu_df ## ----nu-commentary------------------------------------------------------------ nu_stress_ret <- model$params$nu["Stress", "log_return"] nu_calm_ret <- model$params$nu["Calm", "log_return"] cat(sprintf( "log_return: nu(Stress) = %.0f | nu(Calm) = %.0f\n", nu_stress_ret, nu_calm_ret )) if (nu_stress_ret < nu_calm_ret) { cat("=> Stress regime shows heavier tails on log_return, as hypothesised.\n") } else { cat("=> Note: with this synthetic data nu ordering may differ from empirical results.\n") } ## ----session-info------------------------------------------------------------- sessionInfo()