## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(RiskyCNV) ## ----file_paths--------------------------------------------------------------- sample_file <- system.file("extdata", "sample_data.csv", package = "RiskyCNV") cnv_file <- system.file("extdata", "cnv_data.txt", package = "RiskyCNV") gene_file <- system.file("extdata", "gene_annotation.csv", package = "RiskyCNV") annotated_file <- system.file("extdata", "annotated_cnv.csv", package = "RiskyCNV") cnv_matrix_file <- system.file("extdata", "cnv_matrix.csv", package = "RiskyCNV") rna_file <- system.file("extdata", "rna_data.csv", package = "RiskyCNV") # Preview the clinical sample data head(read.csv(sample_file)) ## ----grade_preset------------------------------------------------------------- grade_groups <- extract_metadata( file_path = sample_file, column_name = "gleason_score", disease_type = "prostate", output_dir = tempdir() ) print(names(grade_groups)) print(sapply(grade_groups, length)) ## ----grade_auto--------------------------------------------------------------- grade_groups_auto <- extract_metadata( file_path = sample_file, column_name = "gleason_score", disease_type = "auto", n_groups = 5, group_type = "grade", output_dir = tempdir() ) print(names(grade_groups_auto)) ## ----risk_preset-------------------------------------------------------------- risk_groups <- classify_risk( file_path = sample_file, column_name = "gleason_score", disease_type = "prostate", output_dir = tempdir() ) print(names(risk_groups)) print(sapply(risk_groups, length)) ## ----risk_auto---------------------------------------------------------------- # Two risk groups risk_2 <- classify_risk( file_path = sample_file, column_name = "gleason_score", disease_type = "auto", n_groups = 2, output_dir = tempdir() ) print(names(risk_2)) # Four risk groups risk_4 <- classify_risk( file_path = sample_file, column_name = "gleason_score", disease_type = "auto", n_groups = 4, output_dir = tempdir() ) print(names(risk_4)) ## ----aberration--------------------------------------------------------------- aberrations <- aberration( cnv_data_file = cnv_file, effect_size = 0.3 ) # Aberrant regions per chromosome print(sapply(aberrations, nrow)) ## ----recurrent, eval = FALSE-------------------------------------------------- # recurrent_file <- recurrent( # x = risk_groups, # risk_level = "high_risk", # cnv_data_file = cnv_file, # threshold = 2 # ) # # recurrent_data <- read.csv(recurrent_file) # head(recurrent_data) ## ----annotate, eval = FALSE--------------------------------------------------- # annotated <- annotate( # genes_file = gene_file, # risk_file = recurrent_file, # output_dir = tempdir() # ) # # head(annotated) ## ----cnv_matrix--------------------------------------------------------------- old_wd <- getwd() setwd(tempdir()) cnv_matrix <- create_CNVMatrix(input_file = annotated_file) setwd(old_wd) print(dim(cnv_matrix)) print(cnv_matrix[, 1:min(5, ncol(cnv_matrix))]) ## ----correlations------------------------------------------------------------- old_wd <- getwd() setwd(tempdir()) corr_results <- correlate_with_expr( cnv_file = cnv_matrix_file, rna_file = rna_file ) setwd(old_wd) cat("All correlations:\n") print(corr_results$all_correlations) cat("\nSignificant correlations (p < 0.05):\n") print(corr_results$significant) cat("\nHigh-confidence CNV-driven genes (p < 0.05, r > 0.8):\n") print(corr_results$high_correlation) ## ----generalised, eval = FALSE------------------------------------------------ # # Breast cancer with Nottingham scores # breast_grades <- extract_metadata( # file_path = "breast_samples.csv", # column_name = "nottingham_score", # disease_type = "auto", # n_groups = 3, # group_type = "grade", # output_dir = tempdir() # ) # # # Lymphoma with two risk groups (limited vs. advanced) # lymphoma_risk <- classify_risk( # file_path = "lymphoma_samples.csv", # column_name = "ann_arbor_stage", # disease_type = "auto", # n_groups = 2, # output_dir = tempdir() # )