diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index e2b1335e2a2f9a923931ad3f293cf364f689e69f..dbc4e53bdc004d1bc61e52806bc9a94d7f7634cd 100755 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -18,6 +18,9 @@ process { withName:alignReads { queue = '256GB,256GBv1,384GB' } + withName: dedupReads { + queue = '128GB,256GB,256GBv1,384GB' + } } singularity { diff --git a/workflow/nextflow.config b/workflow/nextflow.config index acb710d45a2036fbfb83a0b5abb7a80ce64ce3ca..67cb95ebd958c861697dad2eede6b906cc728669 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -26,6 +26,9 @@ process { withName: alignReads { container = 'bicf/gudmaprbkaligner:2.0.0' } + withName: dedupReads { + container = 'bicf/picard2.21.7:2.0.0' + } } trace { diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index ff348297ea88cefa54d494cc1097f090f896fb41..a15e439d5eb8ab5bd3a53c941733360c159c0470 100755 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -244,17 +244,45 @@ process alignReads { val referenceDir_alignReads output: - set repRID, file ("${repRID}.unal.gz"), file ("${repRID}.sorted.bam"), file ("${repRID}.sorted.bai") - set repRID, file ("${repRID}.align.out"), file ("${repRID}.align.err") + tuple repRID, file ("${repRID}.unal.gz"), file ("${repRID}.sorted.bam"), file ("${repRID}.sorted.bai") into dedupReads + tuple repRID, file ("${repRID}.align.out"), file ("${repRID}.align.err") script: """ + hostname >${repRID}.align.err + ulimit -a >>${repRID}.align.err + + # align reads if [ "${endsManual_alignReads}" == 'pe' ]; then - hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x ${referenceDir_alignReads} ${stranded_alignReads} --no-mixed --no-discordant -1 ${fqs[0]} -2 ${fqs[1]} 1>${repRID}.align.out 2>${repRID}.align.err; - else hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x ${referenceDir_alignReads} ${stranded_alignReads} -U ${fqs[0]} 1>${repRID}.align.out 2>${repRID}.align.err; + hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x ${referenceDir_alignReads} ${stranded_alignReads} --no-mixed --no-discordant -1 ${fqs[0]} -2 ${fqs[1]} 1>${repRID}.align.out 2>>${repRID}.align.err; + else hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x ${referenceDir_alignReads} ${stranded_alignReads} -U ${fqs[0]} 1>${repRID}.align.out 2>>${repRID}.align.err; fi; samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${repRID}.bam ${repRID}.sam 1>>${repRID}.align.out 2>>${repRID}.align.err; samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.bam ${repRID}.bam 1>>${repRID}.align.out 2>>${repRID}.align.err; samtools index -@ `nproc` -b ${repRID}.sorted.bam ${repRID}.sorted.bai 1>>${repRID}.align.out 2>>${repRID}.align.err; """ } + +/* + *dedupReads: mark the duplicate reads, specifically focused on PCR or optical duplicates +*/ +process dedupReads { + tag "${repRID}" + publishDir "${outDir}/deduped", mode: 'copy', pattern: "${repRID}.deduped.{bam,Metrics.txt}" + publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.dedup.{out,err}" + + input: + tuple repRID, file (unal), file (sortedBam), file (sortedBai) from dedupReads + + output: + tuple repRID, file ("${repRID}.deduped.bam"), file ("${repRID}.deduped.Metrics.txt") + + script: + """ + hostname >${repRID}.dedup.err + ulimit -a >>${repRID}.dedup.err + + #Remove duplicated reads + java -jar /picard/build/libs/picard.jar MarkDuplicates I=${sortedBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err + """ +}