#!/bin/Rscript # Load libraries if(!require("optparse")){ install.packages("optparse", repos="http://cran.r-project.org") library("optparse") } source("http://bioconductor.org/biocLite.R") if (!require("groHMM")){ biocLite("groHMM") library("groHMM") } if (!require("GenomicFeatures")){ biocLite("GenomicFeatures") library("GenomicFeatures") } # Currently mouse or human if (!require("TxDb.Hsapiens.UCSC.hg19.knownGene")){ biocLite("TxDb.Hsapiens.UCSC.hg19.knownGene") library("TxDb.Hsapiens.UCSC.hg19.knownGene") } if (!require("TxDb.Mmusculus.UCSC.mm10.knownGene")){ biocLite("TxDb.Mmusculus.UCSC.mm10.knownGene") library("TxDb.Mmusculus.UCSC.mm10.knownGene") } if (!require("org.Hs.eg.db")){ biocLite("org.Hs.eg.db") library("org.Hs.eg.db") } if (!require("org.Mm.eg.db")){ biocLite("org.Mm.eg.db") library("org.Mm.eg.db") } # Specify our desired options option_list <- list( make_option(c("-a1", "--alignment1"), action="store", type='character', help = "File path to Replicate 1"), make_option(c("-a2", "--alignment2"), action="store", type='character', help = "File path to Replicate 2"), make_option(c("-g", "--genome"), action="store", type='character', help = "The UCSC genome to use"), make_option(c("-o", "--out"), action="store", type='character', help = "The output directory") ) # Parse arguments args = parse_args(OptionParser(option_list=option_list)) # Set the working directory to output directory setwd(args$out) # Set mc.cores to 1 options(mc.cores=getCores(1)) # Load alignment files alignment_1 <- as(readGAlignments(args$alignment1), "GRanges") alignments <- sort(c(alignment_1)) # Load UCSC Genes if(args$genome=='hg19') { kgdb <- loadDb("/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/gencode.sqlite") } else if(args$genome=='mm10') { kgdb <- TxDb.Mmusculus.UCSC.mm10.knownGene org <- org.Mm.eg.db } kgtx <- transcripts(kgdb,columns=c("gene_id", "tx_id", "tx_name")) # Collapse overlapping annotations kgConsensus <- makeConsensusAnnotations(kgtx, keytype="gene_id", mc.cores=getOption("mc.cores")) #map <- select(org, keys=unlist(mcols(kgConsensus)$gene_id), # columns=c("SYMBOL"), keytype=c("ENTREZID")) #mcols(kgConsensus)$symbol <- map$SYMBOL #mcols(kgConsensus)$type <- "gene" # Tune HMM tune <- data.frame(LtProbB=c(rep(-100,9), rep(-150,9), rep(-200,9), rep(-250,9), rep(-300,9),rep(-350,9),rep(-400,9) ), UTS=rep(c(5,10,15,20,25,30,35,40,45), 7)) evals <- mclapply(seq_len(63), function(x) { hmm <- detectTranscripts(reads=alignments, LtProbB=tune$LtProbB[x], UTS=tune$UTS[x]) e <- evaluateHMMInAnnotations(hmm$transcripts, kgConsensus) e$eval }, mc.cores=getOption("mc.cores"), mc.silent=TRUE) tune <- cbind(tune, do.call(rbind, evals)) write.table(tune,file="tune.tsv", quote=F, sep="\t", row.names=F, col.names=T)