diff --git a/CHANGELOG.md b/CHANGELOG.md index c40315d50aad8d3de8ca915d04a0668a2c7208c2..bc28c53bfc58c5288b3b912d06152a1657e3db0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ All notable changes to this project will be documented in this file. - Add Python version to MultiQC - Add and Update tests - Use GTF files instead of TxDb and org libraries in Annotate Peaks +- Make gtf and geneName files as param inputs ## [publish_1.0.6 ] - 2019-05-31 ### Added diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index 7237cad47346ee0699fcadcf73dcd2a70f3431a5..980ab7b9fad2e649d51cb8cf1760074b508aa801 100644 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -74,25 +74,28 @@ params { // Reference file paths on BioHPC genomes { 'GRCh38' { - bwa = '/project/shared/bicf_workflow_ref/GRCh38' + bwa = '/project/shared/bicf_workflow_ref/human/GRCh38' genomesize = 'hs' - chromsizes = '/project/shared/bicf_workflow_ref/GRCh38/genomefile.txt' - fasta = '/project/shared/bicf_workflow_ref/GRCh38/genome.fa' - gtf = '/project/shared/bicf_workflow_ref/GRCh38/gencode.gtf' + chromsizes = '/project/shared/bicf_workflow_ref/human/GRCh38/genomefile.txt' + fasta = '/project/shared/bicf_workflow_ref/human/GRCh38/genome.fa' + gtf = '/project/shared/bicf_workflow_ref/human/GRCh38/gencode.gtf' + geneNames = '/project/shared/bicf_workflow_ref/human/GRCh38/genenames.txt' } 'GRCh37' { - bwa = '/project/shared/bicf_workflow_ref/GRCh37' + bwa = '/project/shared/bicf_workflow_ref/human/GRCh37' genomesize = 'hs' - chromsizes = '/project/shared/bicf_workflow_ref/GRCh37/genomefile.txt' - fasta = '/project/shared/bicf_workflow_ref/GRCh37/genome.fa' - gtf = '/project/shared/bicf_workflow_ref/GRCh37/gencode.gtf' + chromsizes = '/project/shared/bicf_workflow_ref/human/GRCh37/genomefile.txt' + fasta = '/project/shared/bicf_workflow_ref/human/GRCh37/genome.fa' + gtf = '/project/shared/bicf_workflow_ref/human/GRCh37/gencode.gtf' + geneNames = '/project/shared/bicf_workflow_ref/human/GRCh37/genenames.txt' } 'GRCm38' { - bwa = '/project/shared/bicf_workflow_ref/GRCm38' + bwa = '/project/shared/bicf_workflow_ref/mouse/GRCm38' genomesize = 'mm' - chromsizes = '/project/shared/bicf_workflow_ref/GRCm38/genomefile.txt' - fasta = '/project/shared/bicf_workflow_ref/GRCm38/genome.fa' - gtf = '/project/shared/bicf_workflow_ref/GRCm38/gencode.gtf' + chromsizes = '/project/shared/bicf_workflow_ref/mouse/GRCm38/genomefile.txt' + fasta = '/project/shared/bicf_workflow_ref/mouse/GRCm38/genome.fa' + gtf = '/project/shared/bicf_workflow_ref/mouse/GRCm38/gencode.gtf' + geneNames = '/project/shared/bicf_workflow_ref/mouse/GRCm38/genenames.txt' } } } diff --git a/workflow/main.nf b/workflow/main.nf index da287e857bdf38a5131992d6b1adfd7ff5a9cc1b..324b14addd3364051bafe3758c4c81acace9a018 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -33,14 +33,27 @@ params.multiqc = "$baseDir/conf/multiqc_config.yaml" if (params.astrocyte) { print("Running under astrocyte") referenceLocation = "/project/shared/bicf_workflow_ref" - params.bwaIndex = "$referenceLocation/$params.genome" - params.chromSizes = "$referenceLocation/$params.genome/genomefile.txt" - params.fasta = "$referenceLocation/$params.genome/genome.fa" - params.gtf = "$referenceLocation/$params.genome/gencode.gtf" - if (params.genome == 'GRCh37' || params.genome == 'GRCh38') { + if (params.genome == 'GRCh37') { + params.bwaIndex = "$referenceLocation/human/$params.genome" + params.chromSizes = "$referenceLocation/human/$params.genome/genomefile.txt" + params.fasta = "$referenceLocation/human/$params.genome/genome.fa" + params.gtf = "$referenceLocation/human/$params.genome/gencode.v19.chr_patch_hapl_scaff.annotation.gtf" + params.geneNames = "$referenceLocation/human/$params.genome/genenames.txt" params.genomeSize = 'hs' } else if (params.genome == 'GRCm38') { + params.bwaIndex = "$referenceLocation/mouse/$params.genome" + params.chromSizes = "$referenceLocation/mouse/$params.genome/genomefile.txt" + params.fasta = "$referenceLocation/mouse/$params.genome/genome.fa" + params.gtf = "$referenceLocation/mouse/$params.genome/gencode.vM20.annotation.gtf" + params.geneNames = "$referenceLocation/mouse/$params.genome/genenames.txt" params.genomeSize = 'mm' + } else if (params.genome == 'GRCh38') { + params.bwaIndex = "$referenceLocation/human/$params.genome" + params.chromSizes = "$referenceLocation/human/$params.genome/genomefile.txt" + params.fasta = "$referenceLocation/human/$params.genome/genome.fa" + params.gtf = "$referenceLocation/human/$params.genome/gencode.v25.chr_patch_hapl_scaff.annotation.gtf" + params.geneNames = "$referenceLocation/human/$params.genome/genenames.txt" + params.genomeSize = 'hs' } } else { params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false @@ -48,6 +61,7 @@ if (params.astrocyte) { params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false params.gtf = params.genome ? params.genomes[ params.genome ].gtf ?: false : false + params.geneNames = params.genome ? params.genomes[ params.genome ].geneNames ?: false : false } @@ -84,7 +98,9 @@ skipMotif = params.skipMotif skipPlotProfile = params.skipPlotProfile references = params.references multiqc = params.multiqc -gtfFile = Channel.fromPath(params.gtf) +gtfFile_plotProfile = Channel.fromPath(params.gtf) +gtfFile_annotPeaks = Channel.fromPath(params.gtf) +geneNames = Channel.fromPath(params.geneNames) // Check design file for errors process checkDesignFile { @@ -469,7 +485,7 @@ process plotProfile { input: file ("*.pooled.fc_signal.bw") from bigwigs.collect() - file gtf from gtfFile + file gtf from gtfFile_plotProfile output: @@ -524,6 +540,8 @@ process peakAnnotation { input: file designAnnotatePeaks + file gtf from gtfFile_annotPeaks + file geneNames output: @@ -534,7 +552,7 @@ process peakAnnotation { """ module load R/3.3.2-gccmkl - Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $genome + Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $genome $gtf $geneNames """ } diff --git a/workflow/scripts/annotate_peaks.R b/workflow/scripts/annotate_peaks.R index 68b88139f9eb18656e4aba33ca5a84364c6327b1..09919f0b05c2fd04ff56f0699cb065f0294bbe98 100644 --- a/workflow/scripts/annotate_peaks.R +++ b/workflow/scripts/annotate_peaks.R @@ -16,24 +16,18 @@ library(GenomicFeatures) args <- commandArgs(trailingOnly=TRUE) # Check input args -if (length(args) != 2) { - stop("Usage: annotate_peaks.R annotate_design.tsv genome_assembly", call.=FALSE) +if (length(args) != 4) { + stop("Usage: annotate_peaks.R annotate_design.tsv genome_assembly gtf geneNames", call.=FALSE) } design_file <- args[1] genome_assembly <- args[2] +gtf <- args[3] +geneNames <- args[4] # Load UCSC Known Genes -if(genome_assembly=='GRCh37') { - txdb <- makeTxDbFromGFF("/project/shared/bicf_workflow_ref/human/GRCh37/gencode.v19.chr_patch_hapl_scaff.annotation.gtf") - sym <- read.table("/project/shared/bicf_workflow_ref/human/GRCh37/genenames.txt", header=T, sep='\t') [,4:5] -} else if(genome_assembly=='GRCm38') { - txdb <- makeTxDbFromGFF("/project/shared/bicf_workflow_ref/mouse/GRCm38/gencode.vM20.annotation.gtf") - sym <- read.table("/project/shared/bicf_workflow_ref/mouse/GRCm38/genenames.txt", header=T, sep='\t') [,4:5] -} else if(genome_assembly=='GRCh38') { - txdb <- makeTxDbFromGFF("/project/shared/bicf_workflow_ref/human/GRCh38/gencode.v25.chr_patch_hapl_scaff.annotation.gtf") - sym <- read.table("/project/shared/bicf_workflow_ref/human/GRCh38/genenames.txt", header=T, sep='\t') [,4:5] -} +txdb <- makeTxDbFromGFF(gtf) +sym <- read.table(geneNames, header=T, sep='\t') [,4:5] # Output version of ChIPseeker chipseeker_version = packageVersion('ChIPseeker')