Skip to content
Snippets Groups Projects
Commit 7cf5d610 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Add back parallel trim step. Moved fastqc step after trim.

parent 3ccdc626
Branches
Tags
2 merge requests!58Develop,!50Add back parallel trim step. Moved fastqc step after trim.
Pipeline #8357 canceled with stages
in 10 minutes and 34 seconds
......@@ -113,8 +113,8 @@ trimData:
- merge_requests
script:
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --version > version_trimgalore.txt
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se -j 4 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe -j 4 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
- readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
- readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
- pytest -m trimData
......
......@@ -9,6 +9,7 @@
* Make pull references from BioHPC default (including in biohpc.config)
* Start using new gudmaprbk dockerhub (images autobuilt)
* Moved consistency checks to be fully python
* Added back parallel form of trim_galore and now use fastqc after trim step
*Known Bugs*
* Datahub reference pull uses dev.gudmap.org as source until referencencs are placed on production
......
......@@ -229,12 +229,10 @@ if (fastqsForce != "") {
.ifEmpty { exit 1, "override inputBag file not found: ${fastqsForce}" }
.collect().into {
fastqs_trimData
fastqs_fastqc
}
} else {
fastqs.into {
fastqs_trimData
fastqs_fastqc
}
}
......@@ -343,6 +341,7 @@ process trimData {
output:
path ("*.fq.gz") into fastqsTrim
path ("*.R{1,2}.fastq.gz") into fastqs_fastqc
path ("*_trimming_report.txt") into trimQC
path ("readLength.csv") into inferMetadata_readLength
......@@ -355,11 +354,11 @@ process trimData {
echo -e "LOG: trimming ${ends}" >> ${repRID}.trimData.log
if [ "${ends}" == "se" ]
then
trim_galore --gzip -q 25 --length 35 --basename ${repRID} ${fastq[0]}
trim_galore --gzip -q 25 --length 35 --basename ${repRID} -j 4 ${fastq[0]}
readLength=\$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
elif [ "${ends}" == "pe" ]
then
trim_galore --gzip -q 25 --length 35 --paired --basename ${repRID} ${fastq[0]} ${fastq[1]}
trim_galore --gzip -q 25 --length 35 --paired --basename ${repRID} -j 4 ${fastq[0]} ${fastq[1]}
readLength=\$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
fi
echo -e "LOG: trimmed" >> ${repRID}.trimData.log
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment