Skip to content
Snippets Groups Projects
Commit 5e5202b0 authored by Jeremy Mathews's avatar Jeremy Mathews
Browse files

Fix and test use of GTF and Genename params

parent d716364d
1 merge request!57Resolve "annotate peaks"
Pipeline #4630 failed with stages
in 13 hours, 22 minutes, and 8 seconds
......@@ -11,6 +11,7 @@ All notable changes to this project will be documented in this file.
- Add Python version to MultiQC
- Add and Update tests
- Use GTF files instead of TxDb and org libraries in Annotate Peaks
- Make gtf and geneName files as param inputs
## [publish_1.0.6 ] - 2019-05-31
### Added
......
......@@ -74,25 +74,28 @@ params {
// Reference file paths on BioHPC
genomes {
'GRCh38' {
bwa = '/project/shared/bicf_workflow_ref/GRCh38'
bwa = '/project/shared/bicf_workflow_ref/human/GRCh38'
genomesize = 'hs'
chromsizes = '/project/shared/bicf_workflow_ref/GRCh38/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/GRCh38/genome.fa'
gtf = '/project/shared/bicf_workflow_ref/GRCh38/gencode.gtf'
chromsizes = '/project/shared/bicf_workflow_ref/human/GRCh38/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/human/GRCh38/genome.fa'
gtf = '/project/shared/bicf_workflow_ref/human/GRCh38/gencode.gtf'
geneNames = '/project/shared/bicf_workflow_ref/human/GRCh38/genenames.txt'
}
'GRCh37' {
bwa = '/project/shared/bicf_workflow_ref/GRCh37'
bwa = '/project/shared/bicf_workflow_ref/human/GRCh37'
genomesize = 'hs'
chromsizes = '/project/shared/bicf_workflow_ref/GRCh37/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/GRCh37/genome.fa'
gtf = '/project/shared/bicf_workflow_ref/GRCh37/gencode.gtf'
chromsizes = '/project/shared/bicf_workflow_ref/human/GRCh37/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/human/GRCh37/genome.fa'
gtf = '/project/shared/bicf_workflow_ref/human/GRCh37/gencode.gtf'
geneNames = '/project/shared/bicf_workflow_ref/human/GRCh37/genenames.txt'
}
'GRCm38' {
bwa = '/project/shared/bicf_workflow_ref/GRCm38'
bwa = '/project/shared/bicf_workflow_ref/mouse/GRCm38'
genomesize = 'mm'
chromsizes = '/project/shared/bicf_workflow_ref/GRCm38/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/GRCm38/genome.fa'
gtf = '/project/shared/bicf_workflow_ref/GRCm38/gencode.gtf'
chromsizes = '/project/shared/bicf_workflow_ref/mouse/GRCm38/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/mouse/GRCm38/genome.fa'
gtf = '/project/shared/bicf_workflow_ref/mouse/GRCm38/gencode.gtf'
geneNames = '/project/shared/bicf_workflow_ref/mouse/GRCm38/genenames.txt'
}
}
}
......
......@@ -33,14 +33,27 @@ params.multiqc = "$baseDir/conf/multiqc_config.yaml"
if (params.astrocyte) {
print("Running under astrocyte")
referenceLocation = "/project/shared/bicf_workflow_ref"
params.bwaIndex = "$referenceLocation/$params.genome"
params.chromSizes = "$referenceLocation/$params.genome/genomefile.txt"
params.fasta = "$referenceLocation/$params.genome/genome.fa"
params.gtf = "$referenceLocation/$params.genome/gencode.gtf"
if (params.genome == 'GRCh37' || params.genome == 'GRCh38') {
if (params.genome == 'GRCh37') {
params.bwaIndex = "$referenceLocation/human/$params.genome"
params.chromSizes = "$referenceLocation/human/$params.genome/genomefile.txt"
params.fasta = "$referenceLocation/human/$params.genome/genome.fa"
params.gtf = "$referenceLocation/human/$params.genome/gencode.v19.chr_patch_hapl_scaff.annotation.gtf"
params.geneNames = "$referenceLocation/human/$params.genome/genenames.txt"
params.genomeSize = 'hs'
} else if (params.genome == 'GRCm38') {
params.bwaIndex = "$referenceLocation/mouse/$params.genome"
params.chromSizes = "$referenceLocation/mouse/$params.genome/genomefile.txt"
params.fasta = "$referenceLocation/mouse/$params.genome/genome.fa"
params.gtf = "$referenceLocation/mouse/$params.genome/gencode.vM20.annotation.gtf"
params.geneNames = "$referenceLocation/mouse/$params.genome/genenames.txt"
params.genomeSize = 'mm'
} else if (params.genome == 'GRCh38') {
params.bwaIndex = "$referenceLocation/human/$params.genome"
params.chromSizes = "$referenceLocation/human/$params.genome/genomefile.txt"
params.fasta = "$referenceLocation/human/$params.genome/genome.fa"
params.gtf = "$referenceLocation/human/$params.genome/gencode.v25.chr_patch_hapl_scaff.annotation.gtf"
params.geneNames = "$referenceLocation/human/$params.genome/genenames.txt"
params.genomeSize = 'hs'
}
} else {
params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false
......@@ -48,6 +61,7 @@ if (params.astrocyte) {
params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
params.gtf = params.genome ? params.genomes[ params.genome ].gtf ?: false : false
params.geneNames = params.genome ? params.genomes[ params.genome ].geneNames ?: false : false
}
......@@ -84,7 +98,9 @@ skipMotif = params.skipMotif
skipPlotProfile = params.skipPlotProfile
references = params.references
multiqc = params.multiqc
gtfFile = Channel.fromPath(params.gtf)
gtfFile_plotProfile = Channel.fromPath(params.gtf)
gtfFile_annotPeaks = Channel.fromPath(params.gtf)
geneNames = Channel.fromPath(params.geneNames)
// Check design file for errors
process checkDesignFile {
......@@ -469,7 +485,7 @@ process plotProfile {
input:
file ("*.pooled.fc_signal.bw") from bigwigs.collect()
file gtf from gtfFile
file gtf from gtfFile_plotProfile
output:
......@@ -524,6 +540,8 @@ process peakAnnotation {
input:
file designAnnotatePeaks
file gtf from gtfFile_annotPeaks
file geneNames
output:
......@@ -534,7 +552,7 @@ process peakAnnotation {
"""
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $genome
Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $genome $gtf $geneNames
"""
}
......
......@@ -16,24 +16,18 @@ library(GenomicFeatures)
args <- commandArgs(trailingOnly=TRUE)
# Check input args
if (length(args) != 2) {
stop("Usage: annotate_peaks.R annotate_design.tsv genome_assembly", call.=FALSE)
if (length(args) != 4) {
stop("Usage: annotate_peaks.R annotate_design.tsv genome_assembly gtf geneNames", call.=FALSE)
}
design_file <- args[1]
genome_assembly <- args[2]
gtf <- args[3]
geneNames <- args[4]
# Load UCSC Known Genes
if(genome_assembly=='GRCh37') {
txdb <- makeTxDbFromGFF("/project/shared/bicf_workflow_ref/human/GRCh37/gencode.v19.chr_patch_hapl_scaff.annotation.gtf")
sym <- read.table("/project/shared/bicf_workflow_ref/human/GRCh37/genenames.txt", header=T, sep='\t') [,4:5]
} else if(genome_assembly=='GRCm38') {
txdb <- makeTxDbFromGFF("/project/shared/bicf_workflow_ref/mouse/GRCm38/gencode.vM20.annotation.gtf")
sym <- read.table("/project/shared/bicf_workflow_ref/mouse/GRCm38/genenames.txt", header=T, sep='\t') [,4:5]
} else if(genome_assembly=='GRCh38') {
txdb <- makeTxDbFromGFF("/project/shared/bicf_workflow_ref/human/GRCh38/gencode.v25.chr_patch_hapl_scaff.annotation.gtf")
sym <- read.table("/project/shared/bicf_workflow_ref/human/GRCh38/genenames.txt", header=T, sep='\t') [,4:5]
}
txdb <- makeTxDbFromGFF(gtf)
sym <- read.table(geneNames, header=T, sep='\t') [,4:5]
# Output version of ChIPseeker
chipseeker_version = packageVersion('ChIPseeker')
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment