Skip to content
Snippets Groups Projects
Commit 7cf5d610 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Add back parallel trim step. Moved fastqc step after trim.

parent 3ccdc626
Branches
Tags
2 merge requests!58Develop,!50Add back parallel trim step. Moved fastqc step after trim.
Pipeline #8357 canceled with stages
in 10 minutes and 34 seconds
...@@ -113,8 +113,8 @@ trimData: ...@@ -113,8 +113,8 @@ trimData:
- merge_requests - merge_requests
script: script:
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --version > version_trimgalore.txt - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --version > version_trimgalore.txt
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se -j 4 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe -j 4 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
- readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') - readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
- readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') - readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
- pytest -m trimData - pytest -m trimData
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
* Make pull references from BioHPC default (including in biohpc.config) * Make pull references from BioHPC default (including in biohpc.config)
* Start using new gudmaprbk dockerhub (images autobuilt) * Start using new gudmaprbk dockerhub (images autobuilt)
* Moved consistency checks to be fully python * Moved consistency checks to be fully python
* Added back parallel form of trim_galore and now use fastqc after trim step
*Known Bugs* *Known Bugs*
* Datahub reference pull uses dev.gudmap.org as source until referencencs are placed on production * Datahub reference pull uses dev.gudmap.org as source until referencencs are placed on production
......
...@@ -229,12 +229,10 @@ if (fastqsForce != "") { ...@@ -229,12 +229,10 @@ if (fastqsForce != "") {
.ifEmpty { exit 1, "override inputBag file not found: ${fastqsForce}" } .ifEmpty { exit 1, "override inputBag file not found: ${fastqsForce}" }
.collect().into { .collect().into {
fastqs_trimData fastqs_trimData
fastqs_fastqc
} }
} else { } else {
fastqs.into { fastqs.into {
fastqs_trimData fastqs_trimData
fastqs_fastqc
} }
} }
...@@ -343,6 +341,7 @@ process trimData { ...@@ -343,6 +341,7 @@ process trimData {
output: output:
path ("*.fq.gz") into fastqsTrim path ("*.fq.gz") into fastqsTrim
path ("*.R{1,2}.fastq.gz") into fastqs_fastqc
path ("*_trimming_report.txt") into trimQC path ("*_trimming_report.txt") into trimQC
path ("readLength.csv") into inferMetadata_readLength path ("readLength.csv") into inferMetadata_readLength
...@@ -355,11 +354,11 @@ process trimData { ...@@ -355,11 +354,11 @@ process trimData {
echo -e "LOG: trimming ${ends}" >> ${repRID}.trimData.log echo -e "LOG: trimming ${ends}" >> ${repRID}.trimData.log
if [ "${ends}" == "se" ] if [ "${ends}" == "se" ]
then then
trim_galore --gzip -q 25 --length 35 --basename ${repRID} ${fastq[0]} trim_galore --gzip -q 25 --length 35 --basename ${repRID} -j 4 ${fastq[0]}
readLength=\$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') readLength=\$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
elif [ "${ends}" == "pe" ] elif [ "${ends}" == "pe" ]
then then
trim_galore --gzip -q 25 --length 35 --paired --basename ${repRID} ${fastq[0]} ${fastq[1]} trim_galore --gzip -q 25 --length 35 --paired --basename ${repRID} -j 4 ${fastq[0]} ${fastq[1]}
readLength=\$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') readLength=\$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
fi fi
echo -e "LOG: trimmed" >> ${repRID}.trimData.log echo -e "LOG: trimmed" >> ${repRID}.trimData.log
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment