## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = '#>',
  fig.align = 'center',
  out.width = '90%',
  fig.width = 7,
  fig.height = 4.5
)

make_table <- function(x, caption, digits = 3) {
  knitr::kable(x, caption = caption, digits = digits)
}

## ----data---------------------------------------------------------------------
# Pull playoff, regular-season, and biometric records.
playoff_stats <- nhlscraper::skater_playoff_statistics()
career_stats <- nhlscraper::skater_statistics()[, c(
  'playerId',
  'rsGamesPlayed',
  'rsPoints',
  'positionCode'
)]
player_bios <- nhlscraper::players()[, c(
  'playerId',
  'playerFullName',
  'height',
  'weight'
)]

# Join player-level tables.
analysis_tbl <- merge(
  playoff_stats,
  career_stats,
  by = c('playerId', 'positionCode'),
  all.x = TRUE
)
analysis_tbl <- merge(
  analysis_tbl,
  player_bios,
  by = 'playerId',
  all.x = TRUE
)

# Keep salary-cap skaters with meaningful samples.
analysis_tbl <- analysis_tbl[
  !is.na(analysis_tbl[['height']]) &
    !is.na(analysis_tbl[['weight']]) &
    analysis_tbl[['firstSeasonForGameType']] >= 20052006 &
    analysis_tbl[['gamesPlayed']] >= 20 &
    analysis_tbl[['rsGamesPlayed']] >= 200,
]

# Fill missing names and compute scoring rates.
analysis_tbl[['playerFullName']] <- ifelse(
  is.na(analysis_tbl[['playerFullName']]) |
    analysis_tbl[['playerFullName']] == '',
  paste(
    analysis_tbl[['skaterFirstName']],
    analysis_tbl[['skaterLastName']]
  ),
  analysis_tbl[['playerFullName']]
)
analysis_tbl[['regularPPG']] <-
  analysis_tbl[['rsPoints']] / analysis_tbl[['rsGamesPlayed']]
analysis_tbl[['playoffPPG']] <-
  analysis_tbl[['points']] / analysis_tbl[['gamesPlayed']]
analysis_tbl[['playoffLift']] <-
  analysis_tbl[['playoffPPG']] - analysis_tbl[['regularPPG']]
analysis_tbl[['positionBucket']] <- ifelse(
  analysis_tbl[['positionCode']] == 'D',
  'Defense',
  'Forward'
)

# Assign equal-count weight quartiles.
weight_share <- rank(
  analysis_tbl[['weight']],
  ties.method = 'first'
) / nrow(analysis_tbl)
analysis_tbl[['weightQuartile']] <- cut(
  weight_share,
  breaks = c(0, 0.25, 0.50, 0.75, 1),
  include.lowest = TRUE,
  labels = c('Lightest', 'Second', 'Third', 'Heaviest')
)
nrow(analysis_tbl)

## ----sample-table-------------------------------------------------------------
# Show sample of strongest playoff scorers.
sample_tbl <- analysis_tbl[
  order(-analysis_tbl[['playoffPPG']], -analysis_tbl[['gamesPlayed']]),
  c(
    'playerFullName',
    'positionCode',
    'weight',
    'rsGamesPlayed',
    'gamesPlayed',
    'regularPPG',
    'playoffPPG',
    'playoffLift'
  )
]
sample_tbl <- utils::head(sample_tbl, 8)
make_table(
  sample_tbl,
  caption = 'Top playoff scoring rates among skaters in the working sample.'
)

## ----quartiles----------------------------------------------------------------
# Summarize scoring levels and playoff lift by quartile.
quartile_summary <- aggregate(
  cbind(regularPPG, playoffPPG, playoffLift) ~ weightQuartile,
  data = analysis_tbl,
  FUN = mean
)
quartile_counts <- as.data.frame(table(analysis_tbl[['weightQuartile']]))
names(quartile_counts) <- c('weightQuartile', 'n')
quartile_summary <- merge(
  quartile_summary,
  quartile_counts,
  by = 'weightQuartile'
)
quartile_summary <- quartile_summary[
  match(levels(analysis_tbl[['weightQuartile']]), quartile_summary[['weightQuartile']]),
  c('weightQuartile', 'n', 'regularPPG', 'playoffPPG', 'playoffLift')
]
make_table(
  quartile_summary,
  caption = 'Regular-season scoring, playoff scoring, and playoff lift by weight quartile.'
)

## ----quartile-plot, fig.cap = 'Playoff scoring level and playoff lift by weight quartile.'----
# Plot playoff scoring distribution and average lift.
old_par <- graphics::par(no.readonly = TRUE)
graphics::par(mfrow = c(1, 2), mar = c(8, 4, 3, 1))
graphics::boxplot(
  playoffPPG ~ weightQuartile,
  data = analysis_tbl,
  col = c('#d9ed92', '#b5e48c', '#76c893', '#34a0a4'),
  border = '#3a5a40',
  las = 2,
  ylab = 'Playoff Points Per Game',
  xlab = ''
)
graphics::barplot(
  quartile_summary[['playoffLift']],
  names.arg = quartile_summary[['weightQuartile']],
  col = c('#f4d35e', '#ee964b', '#f95738', '#7b2cbf'),
  border = NA,
  las = 2,
  ylab = 'Playoff Lift',
  xlab = ''
)
graphics::abline(h = 0, lty = 2, col = '#4d4d4d')
graphics::par(old_par)

## ----risers-------------------------------------------------------------------
# Show largest positive playoff lifts among larger-sample skaters.
risers_tbl <- analysis_tbl[
  analysis_tbl[['gamesPlayed']] >= 40,
  c(
    'playerFullName',
    'positionBucket',
    'weight',
    'regularPPG',
    'playoffPPG',
    'playoffLift'
  )
]
risers_tbl <- risers_tbl[order(-risers_tbl[['playoffLift']]), ]
risers_tbl <- utils::head(risers_tbl, 10)
make_table(
  risers_tbl,
  caption = 'Largest playoff scoring lifts among skaters with at least 40 playoff games.'
)

## ----model--------------------------------------------------------------------
# Fit simple playoff-lift model.
lift_fit <- stats::lm(
  playoffLift ~ height + weight + I(positionCode == 'D'),
  data = analysis_tbl
)
lift_fit_tbl <- as.data.frame(summary(lift_fit)$coefficients)
lift_fit_tbl[['term']] <- rownames(lift_fit_tbl)
rownames(lift_fit_tbl) <- NULL
lift_fit_tbl[['term']] <- c(
  'Intercept',
  'Height',
  'Weight',
  'Defense indicator'
)
lift_fit_tbl <- lift_fit_tbl[, c(
  'term',
  'Estimate',
  'Std. Error',
  't value',
  'Pr(>|t|)'
)]
make_table(
  lift_fit_tbl,
  caption = 'Linear model of playoff scoring lift on height, weight, and position.',
  digits = 4
)