## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ## ----grs-check-local---------------------------------------------------------- # library(ukbflow) # # # Local: weights file on your machine # w <- grs_check("weights.csv", dest = "weights_clean.txt") # #> Read 'weights.csv': 312 rows, 5 columns. # #> ✔ No NA values. # #> ✔ No duplicate SNPs. # #> ✔ All SNP IDs match rs[0-9]+ format. # #> ✔ All effect alleles are A/T/C/G. # #> Beta summary: # #> Range : -0.412 to 0.589 # #> Mean |beta|: 0.183 # #> Positive : 187 (59.9%) # #> Negative : 125 (40.1%) # #> Zero : 0 # #> ✔ Weights file passed checks: 312 SNPs ready for UKB RAP. # #> ✔ Saved: 'weights_clean.txt' ## ----grs-check-rap------------------------------------------------------------ # # On RAP (RStudio) -- use /mnt/project/ paths directly # w <- grs_check( # file = "/mnt/project/weights/weights.csv", # dest = "/mnt/project/weights/weights_clean.txt" # ) ## ----grs-bgen2pgen-test------------------------------------------------------- # # Test on the smallest chromosome first # ids <- grs_bgen2pgen(chr = 22, dest = "/pgen/", priority = "high") # #> Uploading 'grs_bgen2pgen_std.R' to RAP ... # #> ✔ Uploaded: '/grs_bgen2pgen_std.R' # #> Submitting 1 job(s) -- mem2_ssd1_v2_x4 / priority: high # #> ✔ chr22 -> 'job-Gxxxxxxxxxxxxxxxxxxxxxxxx' # #> ✔ 1/1 job(s) submitted. Monitor with job_ls(). ## ----grs-bgen2pgen-all-------------------------------------------------------- # # Full run: small chromosomes on standard, large on upgraded instance # ids_small <- grs_bgen2pgen(chr = 17:22, dest = "/pgen/") # ids_large <- grs_bgen2pgen(chr = 1:16, dest = "/pgen/", instance = "large") # # # Monitor progress (job_wait() takes one job ID at a time) # job_ls() # for (id in c(ids_small, ids_large)) job_wait(id) ## ----grs-score---------------------------------------------------------------- # ids <- grs_score( # file = c( # grs_a = "weights/grs_a_weights.txt", # grs_b = "weights/grs_b_weights.txt" # ), # pgen_dir = "/mnt/project/pgen", # dest = "/grs/", # priority = "high" # ) # #> ── Uploading 2 weight file(s) to RAP ──────────────────────────────────────── # #> Uploading 'grs_a_weights.txt' ... # #> ✔ Uploaded: '/grs_a_weights.txt' # #> Uploading 'grs_b_weights.txt' ... # #> ✔ Uploaded: '/grs_b_weights.txt' # #> Submitting 2 job(s) -- mem2_ssd1_v2_x4 / priority: high # #> ✔ grs_a -> 'job-Gxxxxxxxxxxxxxxxxxxxxxxxx' # #> ✔ grs_b -> 'job-Gyyyyyyyyyyyyyyyyyyyyyyyy' # #> ✔ 2/2 job(s) submitted. Monitor with job_ls(). # # job_wait(ids) ## ----grs-score-rap------------------------------------------------------------ # # On RAP: weights already at /mnt/project/grs_a_weights.txt # ids <- grs_score( # file = c(grs_a = "/mnt/project/grs_a_weights.txt"), # pgen_dir = "/mnt/project/pgen", # dest = "/grs/" # ) # #> ℹ grs_a_weights.txt already at RAP root, skipping upload. ## ----grs-standardize---------------------------------------------------------- # # Auto-detect all columns containing "grs" (case-insensitive) # cohort <- grs_standardize(cohort) # #> Auto-detected 2 GRS column(s): 'GRS_a', 'GRS_b' # #> ✔ GRS_a -> GRS_a_z [mean=0.0000, sd=1.0000] # #> ✔ GRS_b -> GRS_b_z [mean=0.0000, sd=1.0000] ## ----grs-standardize-explicit------------------------------------------------- # # Or specify columns explicitly # cohort <- grs_standardize(cohort, grs_cols = c("GRS_a", "GRS_b")) ## ----grs-validate-logistic---------------------------------------------------- # res <- grs_validate( # data = cohort, # grs_cols = c("GRS_a_z", "GRS_b_z"), # outcome_col = "outcome" # ) # #> ── Creating GRS groups ─────────────────────────────────────────────────────── # #> ── Effect per SD (OR) ─────────────────────────────────────────────────────── # #> ── High vs Low ────────────────────────────────────────────────────────────── # #> ── Trend test ─────────────────────────────────────────────────────────────── # #> ── AUC ────────────────────────────────────────────────────────────────────── # #> ✔ Validation complete. # # res$per_sd # res$discrimination ## ----grs-validate-cox--------------------------------------------------------- # res <- grs_validate( # data = cohort, # grs_cols = c("GRS_a_z", "GRS_b_z"), # outcome_col = "outcome", # time_col = "followup_years", # covariates = c("age", "sex", paste0("pc", 1:10)) # ) # # res$per_sd # HR per SD x model # res$high_vs_low # HR: top 20% vs bottom 20% # res$trend # p-trend across Q1–Q4 # res$discrimination # C-index with 95% CI ## ----grs-full-pipeline-------------------------------------------------------- # library(ukbflow) # # # 1. Validate weights (local) # grs_check("weights.csv", dest = "weights_clean.txt") # # # 2. Convert BGEN -> PGEN on RAP (submit jobs) # ids_std <- grs_bgen2pgen(chr = 17:22, dest = "/pgen/", maf = 0.01) # ids_lrg <- grs_bgen2pgen(chr = 1:16, dest = "/pgen/", maf = 0.01, instance = "large") # for (id in c(ids_std, ids_lrg)) job_wait(id) # # # 3. Score GRS on RAP (submit jobs) # score_ids <- grs_score( # file = c(grs_a = "weights_clean.txt"), # pgen_dir = "/mnt/project/pgen", # maf = 0.01, # must match grs_bgen2pgen() # dest = "/grs/" # ) # job_wait(score_ids) # # # 4. Download score CSV from RAP # fetch_file("/grs/GRS_a_scores.csv", dest = "GRS_a_scores.csv") # # # 5. Merge into analysis dataset and standardise # # cohort: your analysis data.table with IID and phenotype columns # scores <- data.table::fread("GRS_a_scores.csv") # columns: IID, GRS_a # cohort <- scores[cohort, on = "IID"] # right-join: keep all cohort rows # cohort <- grs_standardize(cohort, grs_cols = "GRS_a") # # # 6. Validate # res <- grs_validate( # data = cohort, # grs_cols = "GRS_a_z", # outcome_col = "outcome", # time_col = "followup_years", # covariates = c("age", "sex", paste0("pc", 1:10)) # ) # # res$per_sd # res$discrimination