Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/bin/Rscript
# Load libraries
if(!require("optparse")){
install.packages("optparse", repos="http://cran.r-project.org")
library("optparse")
}
source("http://bioconductor.org/biocLite.R")
if (!require("groHMM")){
biocLite("groHMM")
library("groHMM")
}
if (!require("GenomicFeatures")){
biocLite("GenomicFeatures")
library("GenomicFeatures")
}
# Currently mouse or human
if (!require("TxDb.Hsapiens.UCSC.hg19.knownGene")){
biocLite("TxDb.Hsapiens.UCSC.hg19.knownGene")
library("TxDb.Hsapiens.UCSC.hg19.knownGene")
}
if (!require("TxDb.Mmusculus.UCSC.mm10.knownGene")){
biocLite("TxDb.Mmusculus.UCSC.mm10.knownGene")
library("TxDb.Mmusculus.UCSC.mm10.knownGene")
}
if (!require("org.Hs.eg.db")){
biocLite("org.Hs.eg.db")
library("org.Hs.eg.db")
}
if (!require("org.Mm.eg.db")){
biocLite("org.Mm.eg.db")
library("org.Mm.eg.db")
}
# Specify our desired options
option_list <- list(
make_option(c("-a1", "--alignment1"), action="store", type='character', help = "File path to Replicate 1"),
make_option(c("-a2", "--alignment2"), action="store", type='character', help = "File path to Replicate 2"),
make_option(c("-g", "--genome"), action="store", type='character', help = "The UCSC genome to use"),
make_option(c("-o", "--out"), action="store", type='character', help = "The output directory")
)
# Parse arguments
args = parse_args(OptionParser(option_list=option_list))
# Set the working directory to output directory
setwd(args$out)
# Set mc.cores to 1
options(mc.cores=getCores(1))
# Load alignment files
alignment_1 <- as(readGAlignments(args$alignment1), "GRanges")
alignments <- sort(c(alignment_1))
# Load UCSC Genes
if(args$genome=='hg19') {
kgdb <- loadDb("/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/gencode.sqlite")
} else if(args$genome=='mm10') {
kgdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
org <- org.Mm.eg.db
}
kgtx <- transcripts(kgdb,columns=c("gene_id", "tx_id", "tx_name"))
# Collapse overlapping annotations
kgConsensus <- makeConsensusAnnotations(kgtx, keytype="gene_id",
mc.cores=getOption("mc.cores"))
#map <- select(org, keys=unlist(mcols(kgConsensus)$gene_id),
# columns=c("SYMBOL"), keytype=c("ENTREZID"))
#mcols(kgConsensus)$symbol <- map$SYMBOL
#mcols(kgConsensus)$type <- "gene"
tune <- data.frame(LtProbB=c(rep(-100,9), rep(-150,9), rep(-200,9), rep(-250,9), rep(-300,9),rep(-350,9),rep(-400,9) ),
UTS=rep(c(5,10,15,20,25,30,35,40,45), 7))
evals <- mclapply(seq_len(63), function(x) {
hmm <- detectTranscripts(reads=alignments, LtProbB=tune$LtProbB[x], UTS=tune$UTS[x])
e <- evaluateHMMInAnnotations(hmm$transcripts, kgConsensus)
e$eval
}, mc.cores=getOption("mc.cores"), mc.silent=TRUE)
tune <- cbind(tune, do.call(rbind, evals))
write.table(tune,file="tune.tsv", quote=F, sep="\t", row.names=F, col.names=T)