Skip to content
Snippets Groups Projects
Commit 3b4d0255 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Add dowsampling trimmed fastqs

parent 7e0d0e5d
Branches
Tags
2 merge requests!37v0.0.1,!28Resolve "Move inference to start of pipeline"
Pipeline #6342 canceled with stages
in 4 minutes and 11 seconds
...@@ -18,6 +18,9 @@ process { ...@@ -18,6 +18,9 @@ process {
withName:trimData { withName:trimData {
queue = 'super' queue = 'super'
} }
withName:downsampleData {
executor = 'local'
}
withName:alignData { withName:alignData {
queue = '256GB,256GBv1' queue = '256GB,256GBv1'
} }
......
...@@ -26,6 +26,9 @@ process { ...@@ -26,6 +26,9 @@ process {
withName: trimData { withName: trimData {
container = 'bicf/trimgalore:1.1' container = 'bicf/trimgalore:1.1'
} }
withName: downsampleData {
container = 'bicf/seqtk:2.0.0'
}
withName: alignData { withName: alignData {
container = 'bicf/gudmaprbkaligner:2.0.0' container = 'bicf/gudmaprbkaligner:2.0.0'
} }
......
...@@ -117,6 +117,7 @@ process getData { ...@@ -117,6 +117,7 @@ process getData {
// Replicate raw fastqs for multiple process inputs // Replicate raw fastqs for multiple process inputs
fastqs.into { fastqs.into {
fastqs_downsampleData
fastqs_trimData fastqs_trimData
fastqs_fastqc fastqs_fastqc
} }
...@@ -190,6 +191,7 @@ metadata.splitCsv(sep: ",", header: false).separate( ...@@ -190,6 +191,7 @@ metadata.splitCsv(sep: ",", header: false).separate(
) )
// Replicate metadata for multiple process inputs // Replicate metadata for multiple process inputs
endsManual.into { endsManual.into {
endsManual_downsampleData
endsManual_trimData endsManual_trimData
endsManual_alignData endsManual_alignData
endsManual_featureCounts endsManual_featureCounts
...@@ -284,7 +286,7 @@ process trimData { ...@@ -284,7 +286,7 @@ process trimData {
path (fastq) from fastqs_trimData path (fastq) from fastqs_trimData
output: output:
path ("*.fq.gz") into fastqs_trimmed path ("*.fq.gz") into fastqsTrim
path ("*_trimming_report.txt") into trimQC path ("*_trimming_report.txt") into trimQC
path ("${repRID}.trimData.{out,err}") path ("${repRID}.trimData.{out,err}")
...@@ -306,6 +308,47 @@ process trimData { ...@@ -306,6 +308,47 @@ process trimData {
""" """
} }
// Replicate trimmed fastqs
fastqsTrim.into {
fastqsTrim_downsampleData
fastqsTrim_alignData
}
/*
* downsampleData: downsample fastq's for metadata inference
*/
process downsampleData {
tag "${repRID}"
publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.downsampleData.{out,err}"
input:
val endsManual_downsampleData
path fastq from fastqsTrim_downsampleData
output:
path ("sampled.{1,2}.fq") into fastqsSample
path ("${repRID}.downsampleData.{out,err}")
script:
"""
hostname > ${repRID}.downsampleData.err
ulimit -a >> ${repRID}.downsampleData.err
export https_proxy=\${http_proxy}
if [ "${endsManual_downsampleData}" == "se" ]
then
echo "LOG: downsampling single-end trimmed fastq" >> ${repRID}.downsampleData.err
seqtk sample -s100 *trimmed.fq.gz 10000 1> sampled.1.fq 2>> ${repRID}.downsampleData.err
elif [ "${endsManual_downsampleData}" == "pe" ]
then
echo "LOG: downsampling read 1 of paired-end trimmed fastq" >> ${repRID}.downsampleData.err
seqtk sample -s100 *1.fq.gz 1000000 1> sampled.1.fq 2>> ${repRID}.downsampleData.err
echo "LOG: downsampling read 2 of paired-end trimmed fastq" >> ${repRID}.downsampleData.err
seqtk sample -s100 *2.fq.gz 1000000 1> sampled.2.fq 2>> ${repRID}.downsampleData.err
fi
"""
}
/* /*
* alignData: aligns the reads to a reference database * alignData: aligns the reads to a reference database
*/ */
...@@ -316,7 +359,7 @@ process alignData { ...@@ -316,7 +359,7 @@ process alignData {
input: input:
val endsManual_alignData val endsManual_alignData
val stranded_alignData val stranded_alignData
path fastq from fastqs_trimmed path fastq from fastqsTrim_alignData
path reference_alignData path reference_alignData
output: output:
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment