From 7cf5d610ec3ae37ee8f55257d2b6be796e407243 Mon Sep 17 00:00:00 2001 From: Venkat Malladi <venkat.malladi@utsouthwestern.edu> Date: Thu, 12 Nov 2020 22:24:00 -0600 Subject: [PATCH] Add back parallel trim step. Moved fastqc step after trim. --- .gitlab-ci.yml | 4 ++-- CHANGELOG.md | 1 + workflow/rna-seq.nf | 7 +++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3a60c64..5ada019 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -113,8 +113,8 @@ trimData: - merge_requests script: - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --version > version_trimgalore.txt - - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz - - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz + - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se -j 4 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz + - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe -j 4 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz - readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') - readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') - pytest -m trimData diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b47ef9..02c9891 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ * Make pull references from BioHPC default (including in biohpc.config) * Start using new gudmaprbk dockerhub (images autobuilt) * Moved consistency checks to be fully python +* Added back parallel form of trim_galore and now use fastqc after trim step *Known Bugs* * Datahub reference pull uses dev.gudmap.org as source until referencencs are placed on production diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 551f18d..8829006 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -229,12 +229,10 @@ if (fastqsForce != "") { .ifEmpty { exit 1, "override inputBag file not found: ${fastqsForce}" } .collect().into { fastqs_trimData - fastqs_fastqc } } else { fastqs.into { fastqs_trimData - fastqs_fastqc } } @@ -343,6 +341,7 @@ process trimData { output: path ("*.fq.gz") into fastqsTrim + path ("*.R{1,2}.fastq.gz") into fastqs_fastqc path ("*_trimming_report.txt") into trimQC path ("readLength.csv") into inferMetadata_readLength @@ -355,11 +354,11 @@ process trimData { echo -e "LOG: trimming ${ends}" >> ${repRID}.trimData.log if [ "${ends}" == "se" ] then - trim_galore --gzip -q 25 --length 35 --basename ${repRID} ${fastq[0]} + trim_galore --gzip -q 25 --length 35 --basename ${repRID} -j 4 ${fastq[0]} readLength=\$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') elif [ "${ends}" == "pe" ] then - trim_galore --gzip -q 25 --length 35 --paired --basename ${repRID} ${fastq[0]} ${fastq[1]} + trim_galore --gzip -q 25 --length 35 --paired --basename ${repRID} -j 4 ${fastq[0]} ${fastq[1]} readLength=\$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') fi echo -e "LOG: trimmed" >> ${repRID}.trimData.log -- GitLab