Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/bin/Rscript
# Load libraries
if(!require("argparse")){
install.packages("argparse", repos="http://cran.r-project.org")
library(argparse)
}
if(!require("jsonlite")){
install.packages("jsonlite", repos="http://cran.r-project.org")
library("jsonlite")
}
source("http://bioconductor.org/biocLite.R")
if (!require("groHMM")){
biocLite("groHMM")
library("groHMM")
}
if (!require("GenomicFeatures")){
biocLite("GenomicFeatures")
library("GenomicFeatures")
}
if (!require("rtracklayer")){
biocLite("rtracklayer")
library("rtracklayer")
}
if(!require("jsonlite")){
install.packages("jsonlite", repos="http://cran.r-project.org")
library("jsonlite")
}
# Currently mouse or human
if (!require("TxDb.Hsapiens.UCSC.hg19.knownGene")){
biocLite("TxDb.Hsapiens.UCSC.hg19.knownGene")
library("TxDb.Hsapiens.UCSC.hg19.knownGene")
}
if (!require("TxDb.Mmusculus.UCSC.mm10.knownGene")){
biocLite("TxDb.Mmusculus.UCSC.mm10.knownGene")
library("TxDb.Mmusculus.UCSC.mm10.knownGene")
}
if (!require("org.Hs.eg.db")){
biocLite("org.Hs.eg.db")
library("org.Hs.eg.db")
}
if (!require("org.Mm.eg.db")){
biocLite("org.Mm.eg.db")
library("org.Mm.eg.db")
}
# Create parser object
parser <- ArgumentParser()
# Specify our desired options
parser$add_argument("-r1", "--replicate1", nargs=1, help = "File paths to Replicate 1", required = TRUE)
parser$add_argument("-g", "--genome", help = "The ucsc genome", required = TRUE)
parser$add_argument("-o", "--out", help = "The output directory", required = TRUE)
parser$add_argument("-b", "--ltprob", type="integer", help = "The LtsProbB", required = TRUE)
parser$add_argument("-u", "--uts", type="integer", help = "The UTS", required = TRUE)
# Parse arguments
args <- parser$parse_args()
# Set the working directory to output directory
setwd(args$out)
# Set mc.cores to 4
options(mc.cores=getCores(4))
# Load alignment files
alignment_1 <- as(readGAlignments(args$replicate1), "GRanges")
alignments <- sort(c(alignment_1))
# Load UCSC Genes
if(args$genome=='hg19') {
kgdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
org <- org.Hs.eg.db
} else if(args$genome=='mm10') {
kgdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
org <- org.Mm.eg.db
}
kgtx <- transcripts(kgdb,columns=c("gene_id", "tx_id", "tx_name"))
# Collapse overlapping annotations
kgConsensus <- makeConsensusAnnotations(kgtx, keytype="gene_id",
mc.cores=getOption("mc.cores"))
map <- select(org, keys=unlist(mcols(kgConsensus)$gene_id),
columns=c("SYMBOL"), keytype=c("ENTREZID"))
mcols(kgConsensus)$symbol <- map$SYMBOL
mcols(kgConsensus)$type <- "gene"
# Detect transcripts
hmmResult <- detectTranscripts(alignments, LtProbB=args$ltprob, UTS=args$uts)
txHMM <- hmmResult$transcripts
getExpressedAnnotations <- function(features, reads) {
fLimit <- limitToXkb(features)
count <- countOverlaps(fLimit, reads)
features <- features[count!=0,]
return(features[(quantile(width(features), .05) < width(features))
& (width(features) < quantile(width(features), .95)),])}
conExpressed <- getExpressedAnnotations(features=kgConsensus,reads=alignments)
# Plot Density
png('transcript-density-plot.png') # TODO: Fix plot output
td <- getTxDensity(txHMM, conExpressed, mc.cores=getOption("mc.cores"))
u <- par("usr")
lines(c(u[1], 0, 0, 1000, 1000, u[2]), c(0,0,u[4]-.04,u[4]-.04,0,0),col="red")
legend("topright", lty=c(1,1), col=c(2,1), c("ideal", "groHMM"))
text(c(-500,500), c(.05,.5), c("FivePrimeFP", "TP"))
dev.off()
td_JSON <- toJSON(td, null='null',auto_unbox = TRUE, pretty=TRUE)
write(td_JSON,"transcript-density-quality-metrics.json", ncolumns = 1, append = FALSE, sep = " ")
# Repairing called transcripts
break_plus <- breakTranscriptsOnGenes(txHMM, kgConsensus, strand="+")
break_minus <- breakTranscriptsOnGenes(txHMM, kgConsensus, strand="-")
txBroken <- c(break_plus, break_minus)
txFinal <- combineTranscripts(txBroken, kgConsensus)
export(txFinal, con="final-transcripts.bed")