## ----setup, include=FALSE-----------------------------------------------------
# Vignette code is executed locally (NOT_CRAN=true) but not on CRAN, where
# the CPU fallback would multi-thread and trip the "CPU time > elapsed" NOTE.
knitr::opts_chunk$set(eval = identical(Sys.getenv("NOT_CRAN"), "true"))

## -----------------------------------------------------------------------------
# library(ggmlR)

## -----------------------------------------------------------------------------
# if (ggml_vulkan_available()) {
#   cat("Vulkan is available\n")
#   ggml_vulkan_status()              # print device list and properties
# } else {
#   cat("No Vulkan GPU — running on CPU\n")
# }
# 
# n <- ggml_vulkan_device_count()
# cat("Vulkan device count:", n, "\n")

## -----------------------------------------------------------------------------
# # Low-level device registry (all backends including CPU)
# ggml_backend_load_all()
# 
# n_dev <- ggml_backend_dev_count()
# for (i in seq_len(n_dev)) {
#   dev  <- ggml_backend_dev_get(i - 1L)   # 0-based
#   name <- ggml_backend_dev_name(dev)
#   desc <- ggml_backend_dev_description(dev)
#   mem  <- ggml_backend_dev_memory(dev)
#   cat(sprintf("[%d] %s — %s\n", i, name, desc))
#   cat(sprintf("    %.1f GB free / %.1f GB total\n",
#               mem["free"] / 1e9, mem["total"] / 1e9))
# }

## -----------------------------------------------------------------------------
# # Select GPU (falls back to CPU if unavailable)
# device <- tryCatch({
#   ag_device("gpu")
#   "gpu"
# }, error = function(e) {
#   message("GPU not available, using CPU")
#   "cpu"
# })
# 
# cat("Active device:", device, "\n")

## -----------------------------------------------------------------------------
# if (device == "gpu") {
#   ag_dtype("f16")     # half-precision on Vulkan GPU
#   # ag_dtype("bf16") # bfloat16 — falls back to f16 on Vulkan automatically
# } else {
#   ag_dtype("f32")     # full precision on CPU
# }
# 
# cat("Active dtype:", ag_default_dtype(), "\n")

## -----------------------------------------------------------------------------
# if (ggml_vulkan_available()) {
#   mem <- ggml_vulkan_device_memory(0L)
#   cat(sprintf("GPU memory: %.1f MB free / %.1f MB total\n",
#               mem$free / 1e6, mem$total / 1e6))
# }

## -----------------------------------------------------------------------------
# n_gpu <- ggml_vulkan_device_count()
# cat(sprintf("Using %d GPU(s)\n", n_gpu))
# 
# # dp_train handles multi-GPU internally — see vignette("data-parallel-training")

## -----------------------------------------------------------------------------
# data(iris)
# x_train <- scale(as.matrix(iris[, 1:4]))
# y_train <- model.matrix(~ Species - 1, iris)
# 
# model <- ggml_model_sequential() |>
#   ggml_layer_dense(64L, activation = "relu", input_shape = 4L) |>
#   ggml_layer_dense(3L,  activation = "softmax") |>
#   ggml_compile(optimizer = "adam", loss = "categorical_crossentropy")
# 
# # Training runs on GPU if Vulkan is available
# model <- ggml_fit(model, x_train, y_train, epochs = 50L,
#                   batch_size = 32L, verbose = 0L)

## ----eval = FALSE-------------------------------------------------------------
# # Weights loaded to GPU once at load time
# model_onnx <- onnx_load("model.onnx", device = "vulkan")
# 
# # Repeated inference — no weight re-transfer
# for (i in seq_len(100L)) {
#   out <- onnx_run(model_onnx, list(input = batch[[i]]))
# }

## -----------------------------------------------------------------------------
# cat(ggml_version(), "\n")
# ggml_vulkan_status()   # shows "Vulkan not available" if not compiled in