diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3a60c64384aadb5de50677736fb18f6866f52d34..5ada019c23633666a8f071e43be9f096411d23a6 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -113,8 +113,8 @@ trimData: - merge_requests script: - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --version > version_trimgalore.txt - - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz - - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz + - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se -j 4 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz + - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe -j 4 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz - readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') - readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') - pytest -m trimData diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b47ef9673ccdecefa03bb17e3d1a2850d83e55e..02c9891f6f7810cc78068aa080dc749e9f781ae4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ * Make pull references from BioHPC default (including in biohpc.config) * Start using new gudmaprbk dockerhub (images autobuilt) * Moved consistency checks to be fully python +* Added back parallel form of trim_galore and now use fastqc after trim step *Known Bugs* * Datahub reference pull uses dev.gudmap.org as source until referencencs are placed on production diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 551f18d262dca6808ae3b514c0f4a6e36d23cfad..882900694bd1d0bb9c72f617547f31433ff9286a 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -229,12 +229,10 @@ if (fastqsForce != "") { .ifEmpty { exit 1, "override inputBag file not found: ${fastqsForce}" } .collect().into { fastqs_trimData - fastqs_fastqc } } else { fastqs.into { fastqs_trimData - fastqs_fastqc } } @@ -343,6 +341,7 @@ process trimData { output: path ("*.fq.gz") into fastqsTrim + path ("*.R{1,2}.fastq.gz") into fastqs_fastqc path ("*_trimming_report.txt") into trimQC path ("readLength.csv") into inferMetadata_readLength @@ -355,11 +354,11 @@ process trimData { echo -e "LOG: trimming ${ends}" >> ${repRID}.trimData.log if [ "${ends}" == "se" ] then - trim_galore --gzip -q 25 --length 35 --basename ${repRID} ${fastq[0]} + trim_galore --gzip -q 25 --length 35 --basename ${repRID} -j 4 ${fastq[0]} readLength=\$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') elif [ "${ends}" == "pe" ] then - trim_galore --gzip -q 25 --length 35 --paired --basename ${repRID} ${fastq[0]} ${fastq[1]} + trim_galore --gzip -q 25 --length 35 --paired --basename ${repRID} -j 4 ${fastq[0]} ${fastq[1]} readLength=\$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') fi echo -e "LOG: trimmed" >> ${repRID}.trimData.log