From 1fe7d8b3ffc800bc16f79f193c5a7e423c993033 Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Wed, 20 Jan 2021 11:56:21 -0600 Subject: [PATCH] Use 1 less process for samtools threading and limit memto 75% of available --- CHANGELOG.md | 5 +++-- workflow/rna-seq.nf | 10 ++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 761a706..b4dfcdf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ **User Facing** **Background** -* Add memory limit per thread for samtools sort (#108) +* Add memory limit (75%) per thread for samtools sort (#108) * Remove parsing restrictions for submitted stranded/spike/species (#105, #106) * Pass unidentified ends instead of overwriting it as unknown * Move fastqc process before trim to catch fastq errors (#107) @@ -13,7 +13,8 @@ * Handle blank submitted endness better * Don't use file.csv from inputBag to parse manual endness, use counted from getData * Detect malformed fastq's (#107) -* Restrict sampled alignment process to use >32GB nodes on BioHPC +* Restrict sampled alignment process to use >32GB nodes on BioHPC (#108) +* Use nproc**-1** for alignment processes (#108) *Known Bugs* * Override params (inputBag, fastq, species) aren't checked for integrity diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 5c68c00..e596205 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -873,9 +873,10 @@ process alignSampleData { # sort the bam file using Samtools echo -e "LOG: sorting the bam file" >> ${repRID}.${ref}.alignSampleData.log + proc=\$(expr `nproc` - 1) mem=\$(vmstat -s -S K | grep 'total memory' | grep -o '[0-9]*') - mem=\$(expr \${mem} / `nproc` \\* 85 / 100) - samtools sort -@ `nproc` -m \${mem}K -O BAM -o ${ref}.sampled.sorted.bam ${ref}.sampled.bam + mem=\$(expr \${mem} / \${proc} \\* 85 / 100) + samtools sort -@ \${proc} -m \${mem}K -O BAM -o ${ref}.sampled.sorted.bam ${ref}.sampled.bam # index the sorted bam using Samtools echo -e "LOG: indexing sorted bam file" >> ${repRID}.${ref}.alignSampleData.log @@ -1576,9 +1577,10 @@ process alignData { # sort the bam file using Samtools echo -e "LOG: sorting the bam file" >> ${repRID}.align.log + proc=\$(expr `nproc` - 1) mem=\$(vmstat -s -S K | grep 'total memory' | grep -o '[0-9]*') - mem=\$(expr \${mem} / `nproc` \\* 85 / 100) - samtools sort -@ `nproc` -m \${mem}K -O BAM -o ${repRID}.sorted.bam ${repRID}.bam + mem=\$(expr \${mem} / \${proc} \\* 75 / 100) + samtools sort -@ \${proc} -m \${mem}K -O BAM -o ${repRID}.sorted.bam ${repRID}.bam # index the sorted bam using Samtools echo -e "LOG: indexing sorted bam file" >> ${repRID}.align.log -- GitLab