#!/bin/Rscript # Load libraries if(!require("argparse")){ install.packages("argparse", repos="http://cran.r-project.org") library(argparse) } if(!require("jsonlite")){ install.packages("jsonlite", repos="http://cran.r-project.org") library("jsonlite") } source("http://bioconductor.org/biocLite.R") if (!require("groHMM")){ biocLite("groHMM") library("groHMM") } if (!require("GenomicFeatures")){ biocLite("GenomicFeatures") library("GenomicFeatures") } if (!require("rtracklayer")){ biocLite("rtracklayer") library("rtracklayer") } if(!require("jsonlite")){ install.packages("jsonlite", repos="http://cran.r-project.org") library("jsonlite") } # Currently mouse or human if (!require("TxDb.Hsapiens.UCSC.hg19.knownGene")){ biocLite("TxDb.Hsapiens.UCSC.hg19.knownGene") library("TxDb.Hsapiens.UCSC.hg19.knownGene") } if (!require("TxDb.Mmusculus.UCSC.mm10.knownGene")){ biocLite("TxDb.Mmusculus.UCSC.mm10.knownGene") library("TxDb.Mmusculus.UCSC.mm10.knownGene") } if (!require("org.Hs.eg.db")){ biocLite("org.Hs.eg.db") library("org.Hs.eg.db") } if (!require("org.Mm.eg.db")){ biocLite("org.Mm.eg.db") library("org.Mm.eg.db") } # Create parser object parser <- ArgumentParser() # Specify our desired options parser$add_argument("-r1", "--replicate1", nargs='*', help = "File paths to Replicate 1", required = TRUE) parser$add_argument("-g", "--genome", help = "The ucsc genome", required = TRUE) parser$add_argument("-e", "--experiment", nargs='*', help = "The experiment names", required = TRUE) parser$add_argument("-o", "--out", help = "The output directory", required = TRUE) parser$add_argument("-b", "--ltprob", type="integer", help = "The LtsProbB", required = TRUE) parser$add_argument("-u", "--uts", type="integer", help = "The UTS", required = TRUE) # Parse arguments args <- parser$parse_args() # Set the working directory to output directory setwd(args$out) # Set mc.cores to 4 options(mc.cores=getCores(4)) # Load alignment files alignments <- GRanges() for (i in seq(1:length(args$experiment))){ a1 = args$replicate1[i] alignments_1 = as(readGAlignments(a1), "GRanges") combined.replicates <- sort(c(alignments_1,alignments)) alignments <- combined.replicates } # Load UCSC Genes if(args$genome=='hg19') { kgdb <- TxDb.Hsapiens.UCSC.hg19.knownGene org <- org.Hs.eg.db } else if(args$genome=='mm10') { kgdb <- TxDb.Mmusculus.UCSC.mm10.knownGene org <- org.Mm.eg.db } kgtx <- transcripts(kgdb,columns=c("gene_id", "tx_id", "tx_name")) # Collapse overlapping annotations kgConsensus <- makeConsensusAnnotations(kgtx, keytype="gene_id", mc.cores=getOption("mc.cores")) map <- select(org, keys=unlist(mcols(kgConsensus)$gene_id), columns=c("SYMBOL"), keytype=c("ENTREZID")) mcols(kgConsensus)$symbol <- map$SYMBOL mcols(kgConsensus)$type <- "gene" # Detect transcripts hmmResult <- detectTranscripts(reads=alignments, LtProbB=args$ltprob, UTS=args$uts) txHMM <- hmmResult$transcripts getExpressedAnnotations <- function(features, reads) { fLimit <- limitToXkb(features) count <- countOverlaps(fLimit, reads) features <- features[count!=0,] return(features[(quantile(width(features), .05) < width(features)) & (width(features) < quantile(width(features), .95)),])} conExpressed <- getExpressedAnnotations(features=kgConsensus,reads=alignments) # Plot Density #png('transcript-density-plot.png') # TODO: Fix plot output td <- getTxDensity(txHMM, conExpressed, mc.cores=getOption("mc.cores"), plot=FALSE) #u <- par("usr") #lines(c(u[1], 0, 0, 1000, 1000, u[2]), c(0,0,u[4]-.04,u[4]-.04,0,0),col="red") #legend("topright", lty=c(1,1), col=c(2,1), c("ideal", "groHMM")) #text(c(-500,500), c(.05,.5), c("FivePrimeFP", "TP")) #dev.off() td_JSON <- toJSON(td, null='null',auto_unbox = TRUE, pretty=TRUE) write(td_JSON,"transcript-density-quality-metrics.json", ncolumns = 1, append = FALSE, sep = " ") # Repairing called transcripts break_plus <- breakTranscriptsOnGenes(txHMM, kgConsensus, strand="+") break_minus <- breakTranscriptsOnGenes(txHMM, kgConsensus, strand="-") txBroken <- c(break_plus, break_minus) txFinal <- combineTranscripts(txBroken, kgConsensus) export(txFinal, con="final-transcripts.bed")