## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) knitr::opts_chunk$set(fig.width=5, fig.height=4) ## ----LoadPackage, include = FALSE--------------------------------------------- library(MetabolSSMF) set.seed(12345) ## ----eval=FALSE--------------------------------------------------------------- # # Install the package # devtools::install_github("WenxuanLiu1996/MetabolSSMF") # # # Load the package # library(MetabolSSMF) ## ----Data--------------------------------------------------------------------- # Simulated data set X X <- as.matrix(SimulatedDataset) # Simulated W matrix (prototype matrix) W <- as.matrix(SimulatedPrototypes) # Simulated H matrix (membership matrix) H <- as.matrix(SimulatedMemberships) ## ----Visulisation1, warning=FALSE--------------------------------------------- # Define 4 different colors color <- c(ggsci::pal_jco("default", alpha=0.8)(7)[2], ggsci::pal_jama("default", alpha=0.8)(7)[c(3:5)]) op1 <- par(mar=c(5.1, 4.1, 4.1, 4.1), xpd=TRUE) matplot(t(W), type='l', lty = 1, ylab='Prototypes', col = color, lwd=2, ylim = c(0,1)) legend('topright', inset=c(-0.15,0), legend = 1:4, fill=color[1:4], cex = 0.8) par(op1) ## ----Visulisation2, warning=FALSE--------------------------------------------- pca_X <- prcomp(X, center = F, scale. = F) op1 <- par(mar=c(5.1, 4.1, 4.1, 4.1), xpd=TRUE) caroline::pies(lapply(apply(H, 1, list), unlist), color.table = caroline::nv(color[1:4], paste0('Pty', 1:4)), x0=pca_X$x[,1], y0=pca_X$x[,2], radii = 1.5, ylab='PC2', xlab='PC1') legend('topright', inset=c(-0.15,0), legend = 1:4, fill=color[1:4], cex = 0.8) par(op1) ## ----SSMF, eval=FALSE--------------------------------------------------------- # # Set the maximum k that is used in loop. # K <- 10 # # # Run the SSMF algorithm with various k and save the results as a list # fit_SSMF <- list() # for(k in 1:K){ # fit <- ssmf(X, k = k, lr = 0.001, meth='kmeans') # fit_SSMF[[k]] <- fit # } ## ----include=FALSE------------------------------------------------------------ K <- 10 ## ----RSS---------------------------------------------------------------------- # Extract the RSS and save as a vector rss_SSMF <- unlist(lapply(fit_SSMF, function(x) x$SSE)) # Plot RSS plot(1:K, rss_SSMF, type="b", xlab = "K", ylab = "RSS", main='Elbow Criterion') ## ----Gap, eval=FALSE---------------------------------------------------------- # # Apply the gap statistic to the simulated data # fit_gap <- gap(X, rss = rss_SSMF) ## ----Visulisation3------------------------------------------------------------ # Visualise the results op2 <- par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE) plot(x = c(1:K), y = fit_gap$gap, type='b', ylim=range((fit_gap$gap), (fit_gap$gap - fit_gap$standard.error)[-1]), xlab='K', ylab='GAP', main='Gap statistic') lines(x = c(1:(K-1)), y = (fit_gap$gap - fit_gap$standard.error)[-1], col='blue', lty=2, type='b') lines(x = rep(fit_gap$optimal.k, 2), y = range(fit_gap$gap), lty = 2, col='red') legend('topright', inset=c(-0.45,0), col=c('black', 'blue', 'red'), lty=c(1, 2, 2), legend = c('Gap', '-1 SE', 'Optimal K'), cex=0.8) par(op2) ## ----Bootstrap, eval=FALSE---------------------------------------------------- # # Set the number of times to bootstrap # M <- 1000 # # # Bootstrap # # Initial H (membership) matrix to start the algorithm # # Based on the results above, k=4 here # initialisation <- init(data = X, k = 4, method = 'kmeans') # fit_boot <- bootstrap(data = X, k = 4, H = initialisation$H, mtimes=M) ## ----Visulisation4------------------------------------------------------------ op1 <- par(mar=c(5.1, 4.1, 4.1, 4.1), xpd=TRUE) matplot(t(fit_boot$W.est[c(2,4,1,3),]), type='n', xaxt='n', ylab='Features', ylim = range(fit_boot$lower,fit_boot$upper)) for(i in 1:4){ alpha <- 0.2 plat <- c(ggsci::pal_jco("default",alpha=alpha)(7)[2], ggsci::pal_jama("default", alpha=alpha)(7)[c(3:5)]) col <- switch(i, plat[1], plat[2], plat[3], plat[4]) polygon(c(1:138, 138:1), c(fit_boot$upper[c(2,4,1,3),][i,], rev(fit_boot$lower[c(2,4,1,3),][i,])), col = col, border= col, lty = 1) } matplot(t(fit_boot$W.est[c(2,4,1,3),]), type='l', lty = 1, ylab='Features', add=T, col=color, lwd=2) legend('topright', inset=c(-0.2,0), legend = 1:4, fill=color[1:4]) axis(1, at=c(1, seq(10, 130, 10), 138), labels = c(1, seq(10, 130, 10), 138), cex=0.8) par(op1) ## ----Visulisation5, warning=FALSE--------------------------------------------- op1 <- par(mar=c(5.1, 4.1, 4.1, 4.1), xpd=TRUE) caroline::pies(lapply(apply(fit_SSMF[[4]]$H, 1, list), unlist), color.table = caroline::nv(color[1:4], c(4,3,2,1)), x0=pca_X$x[,1], y0=pca_X$x[,2], radii = 1.5, ylab='PC2', xlab='PC1') legend('topright', inset=c(-0.15,0), legend = 1:4, fill=color[1:4], cex=0.8) par(op1) ## ----sARI--------------------------------------------------------------------- # Compare the true and estimated soft membership matrix sARI(fit_SSMF[[4]]$H, H) # Self comparison of the true soft membership matrix sARI(H, H) ## ----Shannon------------------------------------------------------------------ E <- rep(NA, nrow(fit_SSMF[[4]]$H)) for(i in 1:nrow(fit_SSMF[[4]]$H)){ E[i] <- diversity(fit_SSMF[[4]]$H[i,], two.power=T) } round(mean(E), 1)