## ----setup, include=FALSE----------------------------------------------------- library(surveycore) knitr::opts_chunk$set( comment = "#>", eval = requireNamespace("surveytidy", quietly = TRUE) ) ## ----as_survey---------------------------------------------------------------- gss_svy <- as_survey( gss_2024, # the cluster ids ids = vpsu, # the strata strata = vstrat, # the weights weights = wtssps, nest = TRUE ) gss_svy ## ----srs---------------------------------------------------------------------- ca_api_2000_svy <- as_survey( ca_api_2000, weights = pw, fpc = fpc # reduces SEs ) ca_api_2000_svy ## ----replicate---------------------------------------------------------------- pew_jewish_svy <- as_survey_replicate( pew_jewish_2020, weights = extweight, repweights = extweight1:extweight100, type = "JK2" ) pew_jewish_svy ## ----calibrated--------------------------------------------------------------- ns_wave1_svy <- as_survey_nonprob(ns_wave1, weights = weight) ns_wave1_svy ## ----nwtco, eval=requireNamespace("survival", quietly=TRUE)------------------- nwtco <- survival::nwtco # in.subcohort is stored as 0/1 — must be logical for as_survey_twophase() nwtco$in.subcohort <- as.logical(nwtco$in.subcohort) # Phase 1: all 4,028 enrolled patients (each patient is their own unit) phase1 <- as_survey(nwtco, ids = seqno) # Phase 2: subcohort, with Phase 2 sampling stratified by relapse status nwtco_svy <- as_survey_twophase( phase1, strata2 = rel, # Phase 2 strata: cases (rel=1) vs. non-cases (rel=0) subset = in.subcohort, # Logical column: TRUE = selected into Phase 2 method = "full" ) nwtco_svy ## ----freqs-basic-------------------------------------------------------------- get_freqs(ns_wave1_svy, consider_trump) ## ----freqs-multi-------------------------------------------------------------- get_freqs(ns_wave1_svy, c(news_sources_facebook:news_sources_other)) ## ----freqs-rename------------------------------------------------------------- ns_wave1_svy |> get_freqs( c(news_sources_facebook:news_sources_other), names_to = "news_source", values_to = "choice" ) ## ----means-basic-------------------------------------------------------------- # Average favorability towards Biden ns_wave1_svy |> # remove those who said "Not sure" (coded as 999) surveytidy::filter_out(cand_favorability_biden == 999) |> get_means(cand_favorability_biden) ## ----totals-pew--------------------------------------------------------------- pew_jewish_svy |> # only include jews by religion and jews of no religion to match Pew's report surveytidy::filter(jewishcat %in% c(1:2)) |> get_totals() ## ----totals-ns---------------------------------------------------------------- get_totals(gss_svy) ## ----totals-x----------------------------------------------------------------- get_totals(ca_api_2000_svy, x = enroll) ## ----totals-group------------------------------------------------------------- pew_jewish_svy |> # only include jews by religion and jews of no religion to match Pew's report surveytidy::filter(jewishcat %in% c(1:2)) |> get_totals(group = age4cat) ## ----corr-basic--------------------------------------------------------------- ns_wave1_clean_svy <- ns_wave1_svy |> surveytidy::drop_na( cand_favorability_trump, cand_favorability_biden ) |> surveytidy::filter_out( cand_favorability_trump == 999, cand_favorability_biden == 999 ) get_corr( ns_wave1_clean_svy, c(cand_favorability_trump, cand_favorability_biden) ) ## ----corr-multi--------------------------------------------------------------- fav_vars <- c( "cand_favorability_trump", "cand_favorability_biden", "cand_favorability_harris", "cand_favorability_sanders", "cand_favorability_warren", "cand_favorability_buttigieg", "cand_favorability_pence" ) ns_wave1_multi <- ns_wave1_clean_svy |> # remove NAs from all variables of interest surveytidy::drop_na(tidyselect::all_of(fav_vars)) |> # remove those who said "not sure" to any variable of interest surveytidy::filter_out( dplyr::if_any( tidyselect::all_of(fav_vars), \(x) x == 999 ) ) get_corr( ns_wave1_multi, c(cand_favorability_trump:cand_favorability_pence) ) ## ----corr-wide---------------------------------------------------------------- get_corr( ns_wave1_multi, c(cand_favorability_trump:cand_favorability_pence), format = "wide" ) ## ----ratios-basic------------------------------------------------------------- get_ratios( ns_wave1_multi, numerator = cand_favorability_trump, denominator = cand_favorability_biden ) ## ----quantiles-basic---------------------------------------------------------- # Quartiles and median of age (default probs = c(0.25, 0.5, 0.75)) get_quantiles(ns_wave1_svy, age) ## ----diffs-basic-------------------------------------------------------------- ns_wave1_svy |> surveytidy::filter_out(cand_favorability_biden == 999) |> get_diffs(cand_favorability_biden, treats = pid3) ## ----diffs-pct---------------------------------------------------------------- ns_wave1_svy |> surveytidy::filter_out(cand_favorability_biden == 999) |> get_diffs( cand_favorability_biden, treats = pid3, show_pct_change = TRUE ) ## ----t-test-basic------------------------------------------------------------- get_t_test(gss_svy, hrs1, by = sex) ## ----pairwise-basic----------------------------------------------------------- get_pairwise(ns_wave1_svy, age, by = pid3) ## ----variance-basic----------------------------------------------------------- get_variance(ns_wave1_svy, age) ## ----group-means-------------------------------------------------------------- get_freqs(ns_wave1_svy, consider_trump, group = pid3) ## ----variance-options--------------------------------------------------------- get_means( ns_wave1_svy, age, variance = c("se", "ci", "moe"), conf_level = 0.9 ) ## ----n-weighted--------------------------------------------------------------- get_freqs(pew_jewish_svy, age4cat, n_weighted = TRUE) ## ----glm-fit------------------------------------------------------------------ fit <- gss_svy |> # convert race to a factor so one variable is a factor surveytidy::mutate( race_f = surveytidy::make_factor(race) ) |> survey_glm(hrs1 ~ sex + degree + age + race_f) fit ## ----glm-clean---------------------------------------------------------------- clean(fit)