## ----setup, echo=FALSE, results='hide'---------------------------------------- library(pls) knitr::opts_chunk$set(fig.align = "center") # Corresponds to pdf.options(pointsize=10) and options(digits = 4) in original options(digits = 4) ## ----load-lib----------------------------------------------------------------- library(pls) ## ----load-data---------------------------------------------------------------- data(yarn) data(oliveoil) data(gasoline) data(mayonnaise) ## ----fig-NIR, echo=FALSE, fig.height=1.5, fig.cap="Gasoline NIR spectra"------ par(mar = c(2, 4, 0, 1) + 0.1) matplot(t(gasoline$NIR), type = "l", lty = 1, ylab = "log(1/R)", xaxt = "n") ind <- pretty(seq(from = 900, to = 1700, by = 2)) ind <- ind[ind >= 900 & ind <= 1700] ind <- (ind - 898) / 2 axis(1, ind, colnames(gasoline$NIR)[ind]) ## ----split-data--------------------------------------------------------------- gasTrain <- gasoline[1:50,] gasTest <- gasoline[51:60,] ## ----fit-gas1----------------------------------------------------------------- gas1 <- plsr(octane ~ NIR, ncomp = 10, data = gasTrain, validation = "LOO") ## ----summary-gas1------------------------------------------------------------- summary(gas1) ## ----plot-RMSEP-code, eval=FALSE---------------------------------------------- # plot(RMSEP(gas1), legendpos = "topright") ## ----fig-RMSEP, echo=FALSE, fig.height=2.5, fig.cap="Cross-validated RMSEP curves for the gasoline data"---- par(mar = c(4, 4, 2.5, 1) + 0.1) plot(RMSEP(gas1), legendpos = "topright") ## ----plot-pred-code, eval=FALSE----------------------------------------------- # plot(gas1, ncomp = 2, asp = 1, line = TRUE) ## ----fig-cvpreds, echo=FALSE, fig.width=3.5, fig.height=3.7, fig.cap="Cross-validated predictions for the gasoline data"---- par(mar = c(4, 4, 2.5, 1) + 0.1) plot(gas1, ncomp = 2, asp = 1, line = TRUE) ## ----plot-scores-code, eval=FALSE--------------------------------------------- # plot(gas1, plottype = "scores", comps = 1:3) ## ----fig-scores, echo=FALSE, fig.cap="Score plot for the gasoline data"------- plot(gas1, plottype = "scores", comps = 1:3) ## ----explvar------------------------------------------------------------------ explvar(gas1) ## ----fig-loadings, fig.cap="Loading plot for the gasoline data", fig.height=2.5---- par(mar = c(4, 4, 0.3, 1) + 0.1) plot(gas1, "loadings", comps = 1:2, legendpos = "topleft", labels = "numbers", xlab = "nm") abline(h = 0) ## ----fig-scores-cv, fig.cap="Calibrated and cross-validated score plots for the gasoline data", fig.height=2.5, fig.width=5---- par.old <- par(mfrow=c(1,2)) scoreplot(gas1) scoreplot(gas1, estimate="CV") par(par.old) ## ----predict------------------------------------------------------------------ predict(gas1, ncomp = 2, newdata = gasTest) ## ----rmsep-test--------------------------------------------------------------- RMSEP(gas1, newdata = gasTest) ## ----dens1-------------------------------------------------------------------- dens1 <- plsr(density ~ NIR, ncomp = 5, data = yarn) ## ----olive1------------------------------------------------------------------- dim(oliveoil$sensory) plsr(sensory ~ chemical, data = oliveoil) ## ----update------------------------------------------------------------------- trainind <- which(yarn$train == TRUE) dens2 <- update(dens1, subset = trainind) ## ----update2------------------------------------------------------------------ dens3 <- update(dens1, ncomp = 10) ## ----olive2------------------------------------------------------------------- olive1 <- plsr(sensory ~ chemical, scale = TRUE, data = oliveoil) ## ----gas2--------------------------------------------------------------------- gas2 <- plsr(octane ~ msc(NIR), ncomp = 10, data = gasTrain) ## ----predict-gas2, eval=FALSE------------------------------------------------- # predict(gas2, ncomp = 3, newdata = gasTest) ## ----selectNcomp-code, eval=FALSE--------------------------------------------- # ncomp.onesigma <- selectNcomp(gas2, method = "onesigma", plot = TRUE, # ylim = c(.18, .6)) # ncomp.permut <- selectNcomp(gas2, method = "randomization", plot = TRUE, # ylim = c(.18, .6)) ## ----fig-NComp, echo=FALSE, fig.height=4.5, fig.width=10, fig.cap="The two strategies for suggesting optimal model dimensions: the left plot shows the one-sigma strategy, the right plot the permutation strategy."---- par(mfrow = c(1,2)) ncomp.onesigma <- selectNcomp(gas1, "onesigma", plot = TRUE, ylim = c(.18, .6)) ncomp.permut <- selectNcomp(gas1, "randomization", plot = TRUE, ylim = c(.18, .6)) ## ----crossval----------------------------------------------------------------- gas2.cv <- crossval(gas2, segments = 10) plot(MSEP(gas2.cv), legendpos="topright") summary(gas2.cv, what = "validation") ## ----plot-coef-code, eval=FALSE----------------------------------------------- # plot(gas1, plottype = "coef", ncomp=1:3, legendpos = "bottomleft", # labels = "numbers", xlab = "nm") ## ----fig-gascoefs, echo=FALSE, fig.height=3, fig.cap="Regression coefficients for the gasoline data"---- par(mar = c(4, 4, 2.5, 1) + 0.1) plot(gas1, plottype = "coef", ncomp=1:3, legendpos = "bottomleft", labels = "numbers", xlab = "nm") ## ----fig-corrplot, echo=FALSE, fig.width=3.5, fig.height=3.4, fig.cap="Correlation loadings plot for the gasoline data"---- par(mar = c(4, 4, 0, 1) + 0.1) plot(gas1, plottype = "correlation") ## ----predict2----------------------------------------------------------------- predict(gas1, ncomp = 2:3, newdata = gasTest[1:5,]) ## ----predict3----------------------------------------------------------------- predict(gas1, comps = 2, newdata = gasTest[1:5,]) ## ----predict4----------------------------------------------------------------- drop(predict(gas1, ncomp = 2:3, newdata = gasTest[1:5,])) ## ----predplot-code, eval=FALSE------------------------------------------------ # predplot(gas1, ncomp = 2, newdata = gasTest, asp = 1, line = TRUE) ## ----fig-testPreds, echo=FALSE, fig.width=3.5, fig.height=3.7, fig.cap="Test set predictions"---- par(mar = c(4, 4, 2.5, 1)) predplot(gas1, ncomp = 2, newdata = gasTest, asp = 1, line = TRUE) ## ----pls.options-------------------------------------------------------------- pls.options() ## ----pls.options-set---------------------------------------------------------- pls.options(plsralg = "oscorespls") ## ----manual-cv---------------------------------------------------------------- X <- gasTrain$NIR Y <- gasTrain$octane ncomp <- 5 cvPreds <- matrix(nrow = nrow(X), ncol = ncomp) for (i in 1:nrow(X)) { fit <- simpls.fit(X[-i,], Y[-i], ncomp = ncomp, stripped = TRUE) cvPreds[i,] <- (X[i,] - fit$Xmeans) %*% drop(fit$coefficients) + fit$Ymeans } ## ----manual-cv-rmsep---------------------------------------------------------- sqrt(colMeans((cvPreds - Y)^2))