---
title: "Genomes"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Genomes}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
```
```{r setup}
library(misha)
```
# Create a `misha` database from UCSC
The easiest way to create a `misha` database is to use the `gdb.create_genome` function:
```{r, eval = FALSE}
gdb.create_genome("hg19") # creates a database for the hg19 genome
gdb.create_genome("hg38") # creates a database for the hg38 genome
gdb.create_genome("mm10") # creates a database for the mm10 genome
gdb.create_genome("mm9") # creates a database for the mm9 genome
gdb.create_genome("mm39") # creates a database for the mm39 genome
```
However, if you need to create a database for a genome that is not supported by `gdb.create_genome`, or if you want to make sure that the database is created from the latest version of the genome in ucsc, you can create it manually using the commands below.
## hg19
In order to create a misha database for _hg19_ genome, run the following commands (assuming *"hg19"* is your new data base path):
```{r, eval = FALSE}
ftp <- "ftp://hgdownload.soe.ucsc.edu/goldenPath/hg19"
gdb.create(
"hg19",
paste(ftp, "chromosomes", paste0("chr", c(1:22, "X", "Y", "M"), ".fa.gz"), sep = "/"),
paste(ftp, "database/knownGene.txt.gz", sep = "/"),
paste(ftp, "database/kgXref.txt.gz", sep = "/"),
c(
"kgID", "mRNA", "spID", "spDisplayID", "geneSymbol",
"refseq", "protAcc", "description", "rfamAcc",
"tRnaName"
)
)
gdb.init("hg19")
```
## hg38
In order to create a misha database for _hg38_ genome, run the following commands (assuming *"hg38"* is your new data base path):
```{r, eval = FALSE}
ftp <- "ftp://hgdownload.soe.ucsc.edu/goldenPath/hg38"
gdb.create(
"hg38",
paste(ftp, "chromosomes", paste0("chr", c(1:22, "X", "Y", "M"), ".fa.gz"), sep = "/"),
paste(ftp, "database/knownGene.txt.gz", sep = "/"),
paste(ftp, "database/kgXref.txt.gz", sep = "/"),
c(
"kgID", "mRNA", "spID", "spDisplayID", "geneSymbol",
"refseq", "protAcc", "description", "rfamAcc",
"tRnaName"
)
)
gdb.init("hg38")
```
## mm9
In order to create a misha database for _mm9_ genome, run the following commands (assuming *"mm9"* is your new data base path):
```{r, eval = FALSE}
ftp <- "ftp://hgdownload.soe.ucsc.edu/goldenPath/mm9"
gdb.create(
"mm9",
paste(ftp, "chromosomes", paste0("chr", c(1:19, "X", "Y", "M"), ".fa.gz"), sep = "/"),
paste(ftp, "database/knownGene.txt.gz", sep = "/"),
paste(ftp, "database/kgXref.txt.gz", sep = "/"),
c(
"kgID", "mRNA", "spID", "spDisplayID", "geneSymbol",
"refseq", "protAcc", "description"
)
)
gdb.init("mm9")
```
## mm10
In order to create a misha database for _mm10_ genome, run the following commands (assuming *"mm10"* is your new data base path):
```{r, eval = FALSE}
ftp <- "ftp://hgdownload.soe.ucsc.edu/goldenPath/mm10"
gdb.create(
"mm10",
paste(ftp, "chromosomes", paste0("chr", c(1:19, "X", "Y", "M"), ".fa.gz"), sep = "/"),
paste(ftp, "database/knownGene.txt.gz", sep = "/"),
paste(ftp, "database/kgXref.txt.gz", sep = "/"),
c(
"kgID", "mRNA", "spID", "spDisplayID", "geneSymbol",
"refseq", "protAcc", "description", "rfamAcc",
"tRnaName"
)
)
gdb.init("mm10")
```
## mm39
In order to create a misha database for _mm39_ genome, run the following commands (assuming *"mm39"* is your new data base path):
```{r, eval = FALSE}
ftp <- "ftp://hgdownload.soe.ucsc.edu/goldenPath/mm39"
gdb.create(
"mm39",
paste(ftp, "chromosomes", paste0("chr", c(1:19, "X", "Y", "M"), ".fa.gz"), sep = "/"),
paste(ftp, "database/knownGene.txt.gz", sep = "/"),
paste(ftp, "database/kgXref.txt.gz", sep = "/"),
c(
"kgID", "mRNA", "spID", "spDisplayID", "geneSymbol",
"refseq", "protAcc", "description", "rfamAcc",
"tRnaName"
)
)
gdb.init("mm39")
```