## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ## ----load-data---------------------------------------------------------------- # library(ukbflow) # # df <- ops_toy(n = 500) ## ----derive-missing----------------------------------------------------------- # df <- derive_missing(df) ## ----derive-missing-unknown--------------------------------------------------- # df <- derive_missing(df, action = "unknown") ## ----derive-missing-extra----------------------------------------------------- # df <- derive_missing(df, extra_labels = "Not applicable") ## ----derive-covariate--------------------------------------------------------- # df <- derive_covariate( # df, # as_factor = c( # "p31", # sex # "p20116_i0", # smoking_status_i0 # "p1558_i0" # alcohol_intake_frequency_i0 # ), # factor_levels = list( # p20116_i0 = c("Never", "Previous", "Current") # ) # ) ## ----derive-cut--------------------------------------------------------------- # df <- derive_cut( # df, # col = "p21001_i0", # body_mass_index_bmi_i0 # n = 4, # breaks = c(18.5, 25, 30), # labels = c("Underweight", "Normal", "Overweight", "Obese"), # name = "bmi_cat" # ) # # df <- derive_cut( # df, # col = "p22189", # townsend_deprivation_index_at_recruitment # n = 4, # labels = c("Q1 (least deprived)", "Q2", "Q3", "Q4 (most deprived)"), # name = "tdi_cat" # ) ## ----derive-selfreport-------------------------------------------------------- # # Non-cancer: type 2 diabetes (field 20002) # df <- derive_selfreport(df, # name = "dm", # regex = "type 2 diabetes" # ) ## ----derive-selfreport-cancer------------------------------------------------- # # Cancer: lung cancer (field 20001) # df <- derive_selfreport(df, # name = "lung_cancer", # regex = "lung cancer", # field = "cancer" # ) ## ----derive-hes--------------------------------------------------------------- # # Prefix match: codes starting with "I10" (hypertension) # df <- derive_hes(df, name = "htn", icd10 = "I10") # # # Exact match # df <- derive_hes(df, name = "dm_hes", icd10 = "E11", match = "exact") # # # Regex: E10 and E11 simultaneously # df <- derive_hes(df, name = "dm_broad", icd10 = "^E1[01]", match = "regex") ## ----derive-fo---------------------------------------------------------------- # # ops_toy includes p131742 as a representative First Occurrence column # df <- derive_first_occurrence(df, name = "htn", field = 131742L, col = "p131742") ## ----derive-cancer------------------------------------------------------------ # # ICD-10 only # df <- derive_cancer_registry(df, # name = "skin_cancer", # icd10 = "^C44" # ) # # # With histology and behaviour filters # df <- derive_cancer_registry(df, # name = "scc", # icd10 = "^C44", # histology = c(8070L, 8071L, 8072L), # behaviour = 3L # 3 = malignant # ) ## ----derive-death------------------------------------------------------------- # df <- derive_death_registry(df, name = "mi", icd10 = "I21") # df <- derive_death_registry(df, name = "dm", icd10 = "E11") # df <- derive_death_registry(df, name = "lung", icd10 = "C34") ## ----derive-icd10------------------------------------------------------------- # # Non-cancer disease: HES + death + First Occurrence # df <- derive_icd10(df, # name = "dm", # icd10 = "E11", # source = c("hes", "death", "first_occurrence"), # fo_col = "p131742" # ) # # # Cancer outcome: cancer registry # df <- derive_icd10(df, # name = "lung", # icd10 = "^C3[34]", # match = "regex", # source = "cancer_registry", # behaviour = 3L # ) ## ----derive-case-------------------------------------------------------------- # df <- derive_case(df, name = "dm")