From 75c3181000c3a71cc1b6093b8f1ca7e20e17dc19 Mon Sep 17 00:00:00 2001 From: s181706 <jonathan.gesell@utsouthwestern.edu> Date: Wed, 29 Jan 2020 17:50:18 -0600 Subject: [PATCH] Initial branch commit, tested and working. --- workflow/conf/biohpc.config | 3 +++ workflow/nextflow.config | 3 +++ workflow/rna-seq.nf | 36 ++++++++++++++++++++++++++++++++---- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index e2b1335..dbc4e53 100755 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -18,6 +18,9 @@ process { withName:alignReads { queue = '256GB,256GBv1,384GB' } + withName: dedupReads { + queue = '128GB,256GB,256GBv1,384GB' + } } singularity { diff --git a/workflow/nextflow.config b/workflow/nextflow.config index acb710d..67cb95e 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -26,6 +26,9 @@ process { withName: alignReads { container = 'bicf/gudmaprbkaligner:2.0.0' } + withName: dedupReads { + container = 'bicf/picard2.21.7:2.0.0' + } } trace { diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index ff34829..a15e439 100755 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -244,17 +244,45 @@ process alignReads { val referenceDir_alignReads output: - set repRID, file ("${repRID}.unal.gz"), file ("${repRID}.sorted.bam"), file ("${repRID}.sorted.bai") - set repRID, file ("${repRID}.align.out"), file ("${repRID}.align.err") + tuple repRID, file ("${repRID}.unal.gz"), file ("${repRID}.sorted.bam"), file ("${repRID}.sorted.bai") into dedupReads + tuple repRID, file ("${repRID}.align.out"), file ("${repRID}.align.err") script: """ + hostname >${repRID}.align.err + ulimit -a >>${repRID}.align.err + + # align reads if [ "${endsManual_alignReads}" == 'pe' ]; then - hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x ${referenceDir_alignReads} ${stranded_alignReads} --no-mixed --no-discordant -1 ${fqs[0]} -2 ${fqs[1]} 1>${repRID}.align.out 2>${repRID}.align.err; - else hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x ${referenceDir_alignReads} ${stranded_alignReads} -U ${fqs[0]} 1>${repRID}.align.out 2>${repRID}.align.err; + hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x ${referenceDir_alignReads} ${stranded_alignReads} --no-mixed --no-discordant -1 ${fqs[0]} -2 ${fqs[1]} 1>${repRID}.align.out 2>>${repRID}.align.err; + else hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x ${referenceDir_alignReads} ${stranded_alignReads} -U ${fqs[0]} 1>${repRID}.align.out 2>>${repRID}.align.err; fi; samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${repRID}.bam ${repRID}.sam 1>>${repRID}.align.out 2>>${repRID}.align.err; samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.bam ${repRID}.bam 1>>${repRID}.align.out 2>>${repRID}.align.err; samtools index -@ `nproc` -b ${repRID}.sorted.bam ${repRID}.sorted.bai 1>>${repRID}.align.out 2>>${repRID}.align.err; """ } + +/* + *dedupReads: mark the duplicate reads, specifically focused on PCR or optical duplicates +*/ +process dedupReads { + tag "${repRID}" + publishDir "${outDir}/deduped", mode: 'copy', pattern: "${repRID}.deduped.{bam,Metrics.txt}" + publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.dedup.{out,err}" + + input: + tuple repRID, file (unal), file (sortedBam), file (sortedBai) from dedupReads + + output: + tuple repRID, file ("${repRID}.deduped.bam"), file ("${repRID}.deduped.Metrics.txt") + + script: + """ + hostname >${repRID}.dedup.err + ulimit -a >>${repRID}.dedup.err + + #Remove duplicated reads + java -jar /picard/build/libs/picard.jar MarkDuplicates I=${sortedBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err + """ +} -- GitLab