## ----SetUp, echo = FALSE, eval = TRUE, results = "hide"----------------------- # R options & configuration: set.seed(9) suppressPackageStartupMessages(library("knitr")) suppressPackageStartupMessages(library("chemometrics")) suppressPackageStartupMessages(library("LearnPCA")) suppressPackageStartupMessages(library("ade4")) suppressPackageStartupMessages(library("latex2exp")) # Stuff specifically for knitr: opts_chunk$set(eval = TRUE, echo = FALSE, results = "hide") ## ----echo = FALSE------------------------------------------------------------- desc <- packageDescription("LearnPCA") ## ----echo = FALSE, results = "asis"------------------------------------------- res <- knitr::knit_child("top_matter.md", quiet = TRUE) cat(res, sep = '\n') ## ----prep-data, echo = TRUE, eval = TRUE, results = "show"-------------------- data(tintoodiel) # activate the data set from package ade4 TO <- tintoodiel$tab # to save typing, rename the element with the data # select just a few samples (in rows) & variables (in columns) FeCu <- TO[28:43,c("Fe2O3", "Cu")] summary(FeCu) ## ----metals-raw-table, results = "asis"--------------------------------------- if (!is_latex_output()) { FeCu2 <- FeCu # create a copy to format the column names names(FeCu2) <- c("Fe~2~O~3~", "Cu") kable(FeCu2, row.names = FALSE, caption = "The FeCu data set. Values for Fe~2~O~3~ are percentages, those for Cu are ppm.") } if (is_latex_output()) { kable(FeCu, row.names = FALSE, caption = "The FeCu data set. Values for $\\mathrm{Fe_{2}O_{3}}$ are percentages, those for Cu are ppm.") } ## ----plot-raw-dotplot, eval = TRUE, fig.cap = "The range of the raw data values in `FeCu`."---- stripchart(FeCu, vertical = TRUE, pch = 20, xlim = c(0.5, 2.5), ylab = TeX(r"(Values ($Fe_{2}O_{3}$ in percent, Cu in ppm))"), xlab = "") ## ----plot-raw-data-2D, eval = TRUE, fig.cap = "The relationship between the raw data values in `FeCu`.", fig.asp = 1---- plot(x = FeCu$Fe2O3, y = FeCu$Cu, pch = 20, xlab = TeX(r"($Fe_{2}O_{3}$ (percent))"), ylab = "Cu (ppm)") ## ----center-raw-data, echo = TRUE--------------------------------------------- FeCu_centered <- scale(FeCu, scale = FALSE, center = TRUE) # see ?scale for defaults ## ----plot-centered-data, eval = TRUE, fig.cap = "Centered data values in `FeCu`."---- stripchart(as.data.frame(FeCu_centered), vertical = TRUE, pch = 20, xlim = c(0.5, 2.5), ylab = "centered values", xlab = "") ## ----scale-centered-data, echo = TRUE----------------------------------------- FeCu_centered_scaled <- scale(FeCu_centered, center = FALSE, scale = TRUE) # see ?scale for defaults ## ----show-std-dev, echo = TRUE, results = "show"------------------------------ apply(FeCu_centered_scaled, 2, sd) ## ----plot-centered-scaled-data, fig.cap = "Centered and scaled data."--------- stripchart(as.data.frame(FeCu_centered_scaled), vertical = TRUE, pch = 20, xlim = c(0.5, 2.5), ylab = "centered & scaled values", xlab = "") ## ----prcomp, echo = TRUE, results = "show"------------------------------------ pca_FeCu <- prcomp(FeCu_centered_scaled) str(pca_FeCu) ## ----plot-scores, fig.cap = "Scores.", fig.asp = 1---------------------------- plot(pca_FeCu$x, pch = 20, xlab = "PC1", ylab = "PC2") ## ----process-all-data, echo = TRUE-------------------------------------------- pca_TO <- prcomp(TO, scale. = TRUE) ## ----plot-all-data, fig.cap = "Score plot using all the data.", fig.asp = 1---- plot(pca_TO$x, pch = 20, xlab = "PC1", ylab = "PC2") ## ----scree, echo = TRUE, fig.cap = "Scree plot."------------------------------ plot(pca_FeCu, main = "") ## ----loading1, echo = TRUE, fig.cap = "Plot of the loadings on PC1."---------- barplot(pca_FeCu$rotation[,1], main = "") ## ----recon, eval = FALSE, echo = TRUE----------------------------------------- # Xhat <- pca_FeCu$x[, 1:z] %*% t(pca_FeCu$rotation[, 1:z]) ## ----unscale, eval = FALSE, echo = TRUE--------------------------------------- # Xhat <- scale(Xhat, center = FALSE, scale = 1/pca_FeCu$scale) ## ----uncenter, eval = FALSE, echo = TRUE-------------------------------------- # Xhat <- scale(Xhat, center = -pca_FeCu$center, scale = FALSE) ## ----full-recon-2, echo = TRUE, results = "TRUE"------------------------------ TOhat <- pca_TO$x %*% t(pca_TO$rotation) TOhat <- scale(TOhat, center = FALSE, scale = 1/pca_TO$scale) TOhat <- scale(TOhat, center = -pca_TO$center, scale = FALSE) ## ----mean-diff, echo = TRUE, results = "show"--------------------------------- mean(TOhat - as.matrix(TO)) ## ----reconstruction, results = "show"----------------------------------------- ntests <- ncol(TO) rmsd <- rep(NA_real_, ntests) for (i in 1:ntests) { ans <- XtoPCAtoXhat(TO, i, sd) error <- ans - TO rmsd[i] <- sqrt(sum(error^2)/length(error)) # RMSD } ## ----rmsd, fig.cap = "Reduction of error as the number of components included in the reconstruction increases."---- plot(rmsd, type = "b", ylim = c(0.0, max(rmsd)), xlab = "No. of Components Retained", ylab = "Error") abline(h = 0.0, col = "pink") ## ----echo = FALSE, results = "asis"------------------------------------------- res <- knitr::knit_child("refer_to_works_consulted.md", quiet = TRUE) cat(res, sep = '\n')