--- title: "Network inference and analysis of CLL data" shorttitle: "Network inference and analysis of CLL data" author: - name: "Frédéric Bertrand and Myriam Maumy-Bertrand" affiliation: - Université de Strasbourg et CNRS, - IRMA, labex IRMIA email: frederic.bertrand@utt.fr date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Network inference and analysis of CLL data} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} LOCAL <- identical(Sys.getenv("LOCAL"), "TRUE") #LOCAL=FALSE knitr::opts_chunk$set(purl = LOCAL) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` # Data preparation Retrieve the full CLL dataset. ```{r loadPatterns, echo=FALSE, cache=FALSE, eval = LOCAL} require(Patterns) ``` ```{r retcll, warning=FALSE, cache=TRUE, eval = LOCAL} require(Patterns) CLLfile <- "https://github.com/fbertran/Patterns/raw/master/add_data/CLL.RData" repmis::source_data(CLLfile) CLL[1:10,1:5] ``` Split the `CLL` dataset into healthy and aggressive stimulated and unstimulated dataset. ```{r spltcll, warning=FALSE, cache=TRUE, eval = LOCAL} hea_US<-CLL[,which((1:48)%%8<5&(1:48)%%8>0)+2] hea_S<-CLL[,which(!((1:48)%%8<5&(1:48)%%8>0))+2] agg_US<-CLL[,which((1:40)%%8<5&(1:40)%%8>0)+98] agg_S<-CLL[,which(!((1:40)%%8<5&(1:40)%%8>0))+98] m_hea_US<-as.omics_array(hea_US,c(60,90,210,390),6,name=CLL[,1],gene_ID=CLL[,2]) m_hea_S<- as.omics_array(hea_S,c(60,90,210,390),6,name=CLL[,1],gene_ID=CLL[,2]) m_agg_US<-as.omics_array((agg_US),c(60,90,210,390),5,name=CLL[,1],gene_ID=CLL[,2]) m_agg_S<- as.omics_array((agg_S),c(60,90,210,390),5,name=CLL[,1],gene_ID=CLL[,2]) ``` Focus on EGR1, run the code to get the graph of the expression values (pasted together for all the subjects) for all the probeset tagged as EGR1. ```{r focusEGR1, warning=FALSE, cache=TRUE, fig.keep="none", eval = LOCAL} matplot(t(log(agg_S[which(CLL[,2] %in% "EGR1"),])),type="l",lty=1) ``` # Selection genes according to their profiles. ```{r selection1, message=FALSE, warning=FALSE, cache=TRUE, eval = LOCAL} selection1<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)),-1,alpha=0.1) ``` ```{r selection2, message=FALSE, warning=FALSE, cache=TRUE, eval = LOCAL} selection2<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+1),-1,alpha=0.1) ``` ```{r selection3, message=FALSE, warning=FALSE, cache=TRUE, eval = LOCAL} selection3<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+2),50,alpha=0.005) ``` ```{r selection4, message=FALSE, warning=FALSE, cache=TRUE, eval = LOCAL} selection4<-geneSelection(list(m_agg_US,m_agg_S),list("condition&time",c(1,2),c(1,1)+3),50,alpha=0.005) ``` Merge the four selections into a single one. ```{r mergeselection, warning=FALSE, fig.keep='first', eval = LOCAL} selection<-Patterns::unionOmics(list(selection1,selection2,selection3,selection4)) summary(selection) ``` Number of genes in the merged selection. ```{r sizemergeselection, warning=FALSE, cache=TRUE, eval = LOCAL} length(selection@gene_ID) ``` Translate the probesets' names for the selection. ```{r translatecll, warning=FALSE, cache=TRUE, eval = LOCAL} require(biomaRt) affyids=c("202763_at","209310_s_at","207500_at") ensembl = useMart("ensembl",dataset="hsapiens_gene_ensembl") infos<-getBM(attributes=c("affy_hg_u133_plus_2","ensembl_gene_id","hgnc_symbol","chromosome_name","start_position","end_position","band"), filters = "affy_hg_u133_plus_2", values = CLL[CLL[,1] %in% selection@name,1] , mart = ensembl,uniqueRows=TRUE, checkFilters = TRUE) ``` ```{r addgeneid, warning=FALSE, cache=TRUE, eval = LOCAL} selection@gene_ID <- lapply(selection@name,function(x) {unique(infos[infos$affy_hg_u133_plus_2==x,"hgnc_symbol"])}) ``` # Network inference Add groupping information according to the pre-merge selection membership to perform network inference. ```{r addgroupselection, warning=FALSE, fig.keep='last', eval = LOCAL} selection@group <- rep(NA, length(selection@name)) names(selection@group) <- selection@name selection@group[selection@name %in% selection4@name] <- 4 selection@group[selection@name %in% selection3@name] <- 3 selection@group[selection@name %in% selection2@name] <- 2 selection@group[selection@name %in% selection1@name] <- 1 plot(selection) ``` Check the length of the `group` slot of the `selection` object. ```{r checkgroup, warning=FALSE, cache=TRUE, eval = LOCAL} length(selection@group) ``` Performs a lasso based inference of the network. Then prints the `network` pbject. ```{r inference, warning=FALSE, cache=TRUE, fig.keep='none', eval = LOCAL} network<-inference(selection,fitfun="LASSO2",Finit=CascadeFinit(4,4),Fshape=CascadeFshape(4,4)) str(network) ``` Plot the inferred F matrix. ```{r plotF, eval = LOCAL} plotF(network@F, choice='F') ``` Save results. ```{r saveinference, eval=FALSE} save(list=c("selection"),file="selection.RData") save(list=c("infos"),file="infos.RData") ``` # Focus on transcription factors. Retrieve human transcription factors from HumanTFDB, extracted from AnimalTFDB 3.0: a comprehensive resource for annotation and prediction of animal transcription factors. Hui Hu, Ya-Ru Miao, Long-Hao Jia, Qing-Yang Yu, Qiong Zhang and An-Yuan Guo. *Nucl. Acids Res*. (2018). ```{r HumanTFDBfailsafe, warning=FALSE, cache=TRUE, include=FALSE, eval = LOCAL} getTF <- FALSE try({doc <- read.delim("http://bioinfo.life.hust.edu.cn/static/AnimalTFDB3/download/Homo_sapiens_TF",encoding = "UTF-8", header=TRUE); getTF <- TRUE}, silent = TRUE) if(!getTF){data(doc)} TF<-as.character(doc[,"Symbol"]) TF<-TF[order(TF)] ``` ```{r HumanTFDB, warning=FALSE, cache=TRUE, eval = FALSE} doc <- read.delim("http://bioinfo.life.hust.edu.cn/static/AnimalTFDB3/download/Homo_sapiens_TF",encoding = "UTF-8", header=TRUE) TF<-as.character(doc[,"Symbol"]) TF<-TF[order(TF)] ``` The `TF` object holds the list of human transcription factors geneID. We retrieve those that are in the `selection` object. ```{r TFinsel, warning=FALSE, cache=TRUE, eval = LOCAL} infos_selection <- infos[infos$affy_hg_u133_plus_2 %in% selection@name,] tfs<-which(infos_selection[,"hgnc_symbol"] %in% TF) ``` Some plots of the `TF` found in the selection. ```{r plotTFinsel, warning=FALSE, eval = LOCAL} matplot(t(selection@omicsarray[tfs,]),type="l",lty=1) ``` ```{r plotTFinsel2, warning=FALSE, eval = LOCAL} kk<-kmeans((selection@omicsarray[tfs,]),10) matplot(t(kk$centers),type="l",lty=1) ``` ```{r TODO, warning=FALSE, echo=FALSE, eval=FALSE} #TO DO #Focus on TF that were not selected. indice<-which(CLL[,2] %in% TF[tfs<-which(! TF %in% selection@gene_ID)]) a<-1:200 matplot(log(t(agg_S[indice[a],]/agg_US[indice[a],])),lty=1,type="l") kkk<-kmeans(log((agg_S[indice,]/agg_US[indice,])),10) matplot(t(kkk$centers),type="l",lty=1) poi<-indice[which(kkk$cluster==2 )] matmat<-log((agg_S[poi,]/agg_US[poi,])) addna<-function(mat,t,p){ mat2<-mat[,1:t] for(i in 2:p){ print(1:t+(i-1)*t) mat2<-cbind(mat2,rep(NA,nrow(mat2)),mat[,1:t+(i-1)*t]) } return(mat2) } pdf("forgotten_TF.pdf",width=15,height=5) for(i in 1:15){ poi<-indice[which(kkk$cluster==i )] if(length(poi)>2){ matmat<-log((agg_S[poi,]/agg_US[poi,])) #matplot(t(matmat),lty=1,type="l") matplot(t(addna(matmat,4,5)),lty=1,type="l")} } dev.off() abline(v=c(2,6,10,14,18)) poi<-indice[which(kkk$cluster==1 )] matplot(log(t(agg_S[poi,]/agg_US[poi,])),lty=1,type="l") TFi<-function(x) length(which(TF %in% x)) n<-40 kre<-kmeans(selection@omicsarray,n) kre lll<-split(selection@gene_ID,kre$cluster) require(DCGL) require("clusterProfiler") require("AnnotationFuncs") require(org.Hs.eg.db) pp<-list() for(k in 1:2){ print(k) pp[[k]]<-translate(lll[[k]],from=org.Hs.egSYMBOL2EG,simplify=TRUE) # GOs[[k]]<-enrichGO(pp, organism = "human", ont = "MF", pvalueCutoff = 0.05, # pAdjustMethod = "BH", qvalueCutoff = 0.2, minGSSize = 5, # readable = FALSE) } names(pp)<-paste("X",1:2,sep="") test<-compareCluster(pp,fun="enrichGO", organism="human", pvalueCutoff=0.05) plot(test) translate(lll[[k]],from=org.Hs.egSYMBOL2EG,simplify=TRUE) TFu<-(unlist(lapply(pp,TFi))) TFy<-unlist(lapply(pp,length)) plot(TFu/TFy) plot(TFu) sum(TFu) entrez<-translate(selection@gene_ID,from=org.Hs.egSYMBOL2EG,simplify=TRUE) geneName<-translate(entrez[which(TF %in% entrez)],from=org.Hs.egSYMBOL,simplify=TRUE) which(selection@gene_ID %in% "EGR1") ```