## ----echo=F, results="asis"--------------------------------------------------- cat(" ") cat(" ") ## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----echo=T, eval=T----------------------------------------------------------- library(OTclust) data(sim1) ## ----echo=F, eval=F, results="hide", cache=F---------------------------------- # C=4 # load('ens.data.rda') # load('OTA.rda') ## ----echo=T, eval=T, results="hide", cache=T---------------------------------- # the number of clusters. C = 4 # generate an ensemble of perturbed partitions. # if perturb_method is 1 then perturbed by bootstrap resampling, it it is 0, then perturbed by adding Gaussian noise. ens.data = ensemble(sim1$X, nbs=100, clust_param=C, clustering="kmeans", perturb_method=1) ## ----echo=T, eval=T, results="hide", cache=T---------------------------------- # find mean partition and uncertainty statistics. ota = otclust(ens.data) ## ----echo=T, cache=T---------------------------------------------------------- # calculate baseline method for comparison. kcl = kmeans(sim1$X,C) # align clustering results for convenience of comparison. compar = align(cbind(sim1$z,kcl$cluster,ota$meanpart)) lab.match = lapply(compar$weight,function(x) apply(x,2,which.max)) kcl.algnd = match(kcl$cluster,lab.match[[1]]) ota.algnd = match(ota$meanpart,lab.match[[2]]) ## ----echo=T, cache=T, fig.show='hold'----------------------------------------- # plot the result on two dimensional space. otplot(sim1$X,sim1$z,con=F,title='Truth') # ground truth otplot(sim1$X,kcl.algnd,con=F,title='Kmeans') # baseline method otplot(sim1$X,ota.algnd,con=F,title='Mean partition') # mean partition by OTclust ## ----echo=T, cache=T---------------------------------------------------------- # distance between ground truth and each partition wassDist(sim1$z,kmeans(sim1$X,C)$cluster) # baseline method wassDist(sim1$z,ota$meanpart) # mean partition by OTclust # Topological relationships between mean partition and ensemble clusters t(ota$match) # Cluster Alignment and Points based (CAP) separability ota$cap ## ----echo=T, cache=T, fig.show='hold'----------------------------------------- # Covering Point Set(CPS) otplot(sim1$X,ota$cps[lab.match[[2]][1],],legend.labels=c('','CPS'),add.text=F,title='CPS for C1') otplot(sim1$X,ota$cps[lab.match[[2]][2],],legend.labels=c('','CPS'),add.text=F,title='CPS for C2') otplot(sim1$X,ota$cps[lab.match[[2]][3],],legend.labels=c('','CPS'),add.text=F,title='CPS for C3') otplot(sim1$X,ota$cps[lab.match[[2]][4],],legend.labels=c('','CPS'),add.text=F,title='CPS for C4') ## ----fig.show='hold', cache=T------------------------------------------------- # CPS analysis on selection of visualization methods data(vis_pollen) c=visCPS(vis_pollen$vis, vis_pollen$ref) ## ----fig.show='hold', cache=T------------------------------------------------- # visualization of the result mplot(c,2) cplot(c,2) ## ----fig.show='hold', cache=T------------------------------------------------- # overall tightness c$tight_all # cluster-wise tightness c$tight ## ----fig.show='hold', cache=T------------------------------------------------- # CPS Analysis on validation of clustering result data(YAN) y=clustCPS(YAN, k=7, l=FALSE, pre=FALSE, noi="after", cmethod="kmeans", dimr="PCA", vis="tsne") # visualization of the results mplot(y,4) cplot(y,4) # point-wise stability assessment p=pplot(y) p$v