## ---- echo=FALSE, cache=FALSE---------------------------------------------------------------------
options(width = 100)

## -------------------------------------------------------------------------------------------------
library("NetRep")
data("NetRep")

## -------------------------------------------------------------------------------------------------
# Read in the data:
data("NetRep")

# Set up the input data structures for NetRep. We will call these datasets 
# "cohort1" and "cohort2" to avoid confusion with the "discovery" and "test"
# arguments in NetRep's functions:
data_list <- list(cohort1=discovery_data, cohort2=test_data)
correlation_list <- list(cohort1=discovery_correlation, cohort2=test_correlation)
network_list <- list(cohort1=discovery_network, cohort2=test_network)

# We do not need to set up a list for the 'moduleAssignments', 'modules', or 
# 'test' arguments because there is only one "discovery" dataset.

## -------------------------------------------------------------------------------------------------
# Assess the preservation of modules in the test dataset.
preservation <- modulePreservation(
 network=network_list, data=data_list, correlation=correlation_list, 
 moduleAssignments=module_labels, discovery="cohort1", test="cohort2", 
 nPerm=10000, nThreads=2
)

## -------------------------------------------------------------------------------------------------
preservation$observed
preservation$p.value

## -------------------------------------------------------------------------------------------------
# Get the maximum permutation test p-value
max_pval <- apply(preservation$p.value, 1, max)
max_pval

## ---- eval=FALSE----------------------------------------------------------------------------------
#  # Handling NA entries in the 'cor.degree' null distribution for sparse networks
#  
#  # Get the entries in the null distribution where there were no edges in the
#  # permuted module
#  na.entries <- which(is.na(preservation$nulls[,'cor.degree',]))
#  no.edges <- which(preservation$nulls[,'avg.weight',][na.entries] == 0)
#  
#  # Set those entries to 0
#  preservation$nulls[,'cor.degree',][no.edges] <- 0
#  
#  # Recalculate the permutation test p-values
#  preservation$p.values <- permutationTest(
#    preservation$nulls, preservation$observed, preservation$nVarsPresent,
#    preservation$totalSize, preservation$alternative
#  )

## ---- eval=FALSE----------------------------------------------------------------------------------
#  not.present <- which(discovery_data == 0)
#  nSamples <- nrow(discovery_data)
#  discovery_data[not.present] <- runif(length(not.present), min=0, max=1/nSamples)

## ----modules_in_discovery, dev="jpeg", dev.args=list(quality=50),  dpi=72, fig.height=7, fig.width=7, fig.align="center", results="hold", fig.keep="last", fig.show="hold"----
plotModule(
  data=data_list, correlation=correlation_list, network=network_list, 
  moduleAssignments=module_labels, modules=c(1,2,3,4),
  discovery="cohort1", test="cohort1"
)

## ----modules_in_test, dev="jpeg", dev.args=list(quality=50),  dpi=72, fig.height=7, fig.width=7, fig.align="center", results="hold", fig.keep="last", fig.show="hold"----
plotModule(
  data=data_list, correlation=correlation_list, network=network_list, 
  moduleAssignments=module_labels, modules=c(1,2,3,4),
  discovery="cohort1", test="cohort2"
)

## ----mean_degree, dev="jpeg", dev.args=list(quality=50),  dpi=72, fig.height=7, fig.width=7, fig.align="center", results="hold", fig.keep="last", fig.show="hold"----
plotModule(
  data=data_list, correlation=correlation_list, network=network_list, 
  moduleAssignments=module_labels, modules=c(1,4), # only the preserved modules
  discovery="cohort1", test="cohort2",
  orderNodesBy=c("cohort1", "cohort2") # this can be any number of datasets
)

## ----dry_run, dev="jpeg", dev.args=list(quality=50),  dpi=72, fig.height=7, fig.width=7, fig.align="center", results="hold", fig.keep="last", fig.show="hold"----
plotModule(
  data=data_list, correlation=correlation_list, network=network_list, 
  moduleAssignments=module_labels, modules=c(1,4),
  discovery="cohort1", test="cohort2",
  orderNodesBy=c("cohort1", "cohort2"),
  dryRun=TRUE
)

## ----dry_run_customised, dev="jpeg", dev.args=list(quality=50),  dpi=72, fig.height=7, fig.width=7, fig.align="center", results="hold", fig.keep="last", fig.show="hold"----
# Change the margins so the plot is more compressed. Alternatively we could 
# change the device window.
par(mar=c(3,10,3,10)) # bottom, left, top, right margin sizes
plotModule(
  data=data_list, correlation=correlation_list, network=network_list, 
  moduleAssignments=module_labels, modules=c(1,4),
  discovery="cohort1", test="cohort2",
  orderNodesBy=c("cohort1", "cohort2"),
  dryRun=TRUE, 
  # Title of the plot
  main = "Preserved modules", 
  # Use the maximum edge weight as the highest value instead of 1 in the
  # network heatmap
  netRange=NA,
  # Turn off the node and sample labels:
  plotNodeNames=FALSE, plotSampleNames=FALSE,
  # The distance from the bottom axis should the module labels be drawn:
  maxt.line=0,
  # The distance from the legend the legend titles should be drawn:
  legend.main.line=2
)

## ----mean_degree_customised, dev="jpeg", dev.args=list(quality=50),  dpi=72, fig.height=7, fig.width=7, fig.align="center", results="hold", fig.keep="last", fig.show="hold"----
par(mar=c(3,10,3,10)) 
plotModule(
  data=data_list, correlation=correlation_list, network=network_list, 
  moduleAssignments=module_labels, modules=c(1,4),
  discovery="cohort1", test="cohort2",
  orderNodesBy=c("cohort1", "cohort2"), main = "Preserved modules", 
  netRange=NA, plotNodeNames=FALSE, plotSampleNames=FALSE,
  maxt.line=0, legend.main.line=2
)

## ----correlation_heatmap, dev="jpeg", dev.args=list(quality=50),  dpi=72, fig.height=7, fig.width=7, fig.align="center", results="hold", fig.keep="last", fig.show="hold"----
par(mar=c(5,5,4,4)) 
plotCorrelation(
  data=data_list, correlation=correlation_list, network=network_list, 
  moduleAssignments=module_labels, modules=0:4, discovery="cohort1",
  test="cohort1", symmetric=TRUE, orderModules=FALSE
)

## -------------------------------------------------------------------------------------------------
properties <- networkProperties(
  data=data_list, correlation=correlation_list, network=network_list, 
  moduleAssignments=module_labels, 
  # Only calculate for the reproducible modules
  modules=c(1,4),
  # what dataset were the modules identified in?
  discovery="cohort1", 
  # which datasets do we want to calculate their properties in?
  test=c("cohort1", "cohort2")
)

# The summary profile of module 1 in the discovery dataset:
properties[["cohort1"]][["1"]][["summary"]]
# Along with the proportion of variance in the module data explained by the 
# summary profile:
properties[["cohort1"]][["1"]][["coherence"]]

# The same information in the test dataset:
properties[["cohort2"]][["1"]][["summary"]]
properties[["cohort2"]][["1"]][["coherence"]]

## ---- echo=FALSE, message=FALSE, results="hide"---------------------------------------------------
# This is the code necessary for the later part of this section to run.
# The vignette doesnt actually save the data.
discovery_data <- as.disk.matrix(discovery_data, tempfile())
discovery_correlation <- as.disk.matrix(discovery_correlation, tempfile())
discovery_network <- as.disk.matrix(discovery_network, tempfile())
test_data <- as.disk.matrix(test_data, tempfile())
test_correlation <- as.disk.matrix(test_correlation, tempfile())
test_network <- as.disk.matrix(test_network, tempfile())

## ---- eval=FALSE----------------------------------------------------------------------------------
#  # serialize=TRUE will save the data using 'saveRDS'.
#  # serialize=FALSE will save the data as a tab-separated file ('sep="\t"').
#  discovery_data <- as.disk.matrix(
#    x=discovery_data,
#    file="discovery_data.rds",
#    serialize=TRUE)
#  discovery_correlation <- as.disk.matrix(
#    x=discovery_correlation,
#    file="discovery_correlation.rds",
#    serialize=TRUE)
#  discovery_network <- as.disk.matrix(
#    x=discovery_network,
#    file="discovery_network.rds",
#    serialize=TRUE)
#  test_data <- as.disk.matrix(
#    x=test_data,
#    file="test_data.rds",
#    serialize=TRUE)
#  test_correlation <- as.disk.matrix(
#    x=test_correlation,
#    file="test_correlation.rds",
#    serialize=TRUE)
#  test_network <- as.disk.matrix(
#    x=test_network,
#    file="test_network.rds",
#    serialize=TRUE)

## ---- eval=FALSE----------------------------------------------------------------------------------
#  test_network

## ---- echo=FALSE----------------------------------------------------------------------------------
cat("Pointer to matrix stored at test_network.rds\n")

## -------------------------------------------------------------------------------------------------
as.matrix(test_network)[1:5, 1:5]

## ---- eval=FALSE----------------------------------------------------------------------------------
#  # If files are saved as tables, set 'serialized=FALSE' and specify arguments
#  # that would normally be provided to 'read.table'. Note: this function doesnt
#  # check whether the file can actually be read in as a matrix!
#  discovery_data <- attach.disk.matrix("discovery_data.rds")
#  discovery_correlation <- attach.disk.matrix("discovery_correlation.rds")
#  discovery_network <- attach.disk.matrix("discovery_network.rds")
#  test_data <- attach.disk.matrix("test_data.rds")
#  test_correlation <- attach.disk.matrix("test_correlation.rds")
#  test_network <- attach.disk.matrix("test_network.rds")

## -------------------------------------------------------------------------------------------------
data_list <- list(cohort1=discovery_data, cohort2=test_data)
correlation_list <- list(cohort1=discovery_correlation, cohort2=test_correlation)
network_list <- list(cohort1=discovery_network, cohort2=test_network)

## -------------------------------------------------------------------------------------------------
# Assess the preservation of modules in the test dataset.
preservation <- modulePreservation(
 network=network_list, data=data_list, correlation=correlation_list, 
 moduleAssignments=module_labels, discovery="cohort1", test="cohort2", 
 nPerm=10000, nThreads=2
)

## -------------------------------------------------------------------------------------------------
# Determine the nodes and samples on a plot in advance:
nodesToPlot <- nodeOrder(
  data=data_list, correlation=correlation_list, network=network_list, 
  moduleAssignments=module_labels, modules=c(1,4), discovery="cohort1", 
  test=c("cohort1", "cohort2"), mean=TRUE
)
# We need to know which module will appear left-most on the plot:
firstModule <- module_labels[nodesToPlot[1]]

samplesToPlot <- sampleOrder(
  data=data_list, correlation=correlation_list, network=network_list, 
  moduleAssignments=module_labels, modules=firstModule, discovery="cohort1",
  test="cohort2"
)

# Load in the dataset we are plotting:
test_data <- as.matrix(test_data)
test_correlation <- as.matrix(test_correlation)
test_network <- as.matrix(test_network)

## ----disk_matrix_dry_run, dev="jpeg", dev.args=list(quality=50),  dpi=72, fig.height=7, fig.width=7, fig.align="center", results="hold", fig.keep="last", fig.show="hold"----
# Now we can use 'dryRun=TRUE' quickly:
plotModule(
  data=test_data[samplesToPlot, nodesToPlot], 
  correlation=test_correlation[nodesToPlot, nodesToPlot], 
  network=test_network[nodesToPlot, nodesToPlot],
  moduleAssignments=module_labels[nodesToPlot],
  orderNodesBy=NA, orderSamplesBy=NA, dryRun=TRUE
)

## ----disk_matrix_plot, dev="jpeg", dev.args=list(quality=50),  dpi=72, fig.height=7, fig.width=7, fig.align="center", results="hold", fig.keep="last", fig.show="hold"----
# And draw the final plot once we determine the plot parameters 
par(mar=c(3,10,3,10)) 
plotModule(
  data=test_data[samplesToPlot, nodesToPlot], 
  correlation=test_correlation[nodesToPlot, nodesToPlot], 
  network=test_network[nodesToPlot, nodesToPlot],
  moduleAssignments=module_labels[nodesToPlot],
  orderNodesBy=NA, orderSamplesBy=NA
)

## ---- echo=FALSE, hold=TRUE-----------------------------------------------------------------------
cat(
"[2016-06-14 17:25:16 AEST] Validating user input...\n",
"[2016-06-14 17:25:16 AEST]   Loading matrices of dataset \"liver\" into RAM...\n",
"[2016-06-14 17:26:29 AEST]   Checking matrices for problems...\n",
"[2016-06-14 17:26:31 AEST]   Unloading dataset from RAM...\n",
"[2016-06-14 17:26:31 AEST]   Loading matrices of dataset \"brain\" into RAM...\n",
"[2016-06-14 17:27:45 AEST]   Checking matrices for problems...\n",
"[2016-06-14 17:27:47 AEST] Input ok!\n",
"[2016-06-14 17:27:47 AEST] Calculating preservation of network subsets from\n",
"                           dataset \"brain\" in dataset \"liver\".\n",
"[2016-06-14 17:27:47 AEST]   Pre-computing intermediate properties in dataset\n",
"                             \"brain\"...\n",
"[2016-06-14 17:27:48 AEST]   Unloading dataset from RAM...\n",
"[2016-06-14 17:27:48 AEST]   Loading matrices of dataset \"liver\" into RAM...\n",
"[2016-06-14 17:29:01 AEST]   Calculating observed test statistics...\n",
"[2016-06-14 17:29:02 AEST]   Generating null distributions from 320\n",
"                             permutations using 32 threads...\n",
"\n",
"  100% completed.\n",
"\n",
"[2016-06-14 17:29:24 AEST]   Calculating P-values...\n",
"[2016-06-14 17:29:24 AEST]   Collating results...\n",
"[2016-06-14 17:29:24 AEST] Unloading dataset from RAM...\n",
"[2016-06-14 17:29:25 AEST] Done!\n", sep=""
)