From 3b4d0255e7f2cb51c8e4a984f20127003fbd9bf4 Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Tue, 24 Mar 2020 15:39:49 -0500 Subject: [PATCH] Add dowsampling trimmed fastqs --- workflow/conf/biohpc.config | 3 +++ workflow/nextflow.config | 3 +++ workflow/rna-seq.nf | 47 +++++++++++++++++++++++++++++++++++-- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index 83fac1f..5bcbc27 100755 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -18,6 +18,9 @@ process { withName:trimData { queue = 'super' } + withName:downsampleData { + executor = 'local' + } withName:alignData { queue = '256GB,256GBv1' } diff --git a/workflow/nextflow.config b/workflow/nextflow.config index f0be347..eb9ca4d 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -26,6 +26,9 @@ process { withName: trimData { container = 'bicf/trimgalore:1.1' } + withName: downsampleData { + container = 'bicf/seqtk:2.0.0' + } withName: alignData { container = 'bicf/gudmaprbkaligner:2.0.0' } diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 2285223..625e3a0 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -117,6 +117,7 @@ process getData { // Replicate raw fastqs for multiple process inputs fastqs.into { + fastqs_downsampleData fastqs_trimData fastqs_fastqc } @@ -190,6 +191,7 @@ metadata.splitCsv(sep: ",", header: false).separate( ) // Replicate metadata for multiple process inputs endsManual.into { + endsManual_downsampleData endsManual_trimData endsManual_alignData endsManual_featureCounts @@ -284,7 +286,7 @@ process trimData { path (fastq) from fastqs_trimData output: - path ("*.fq.gz") into fastqs_trimmed + path ("*.fq.gz") into fastqsTrim path ("*_trimming_report.txt") into trimQC path ("${repRID}.trimData.{out,err}") @@ -306,6 +308,47 @@ process trimData { """ } +// Replicate trimmed fastqs +fastqsTrim.into { + fastqsTrim_downsampleData + fastqsTrim_alignData +} + +/* + * downsampleData: downsample fastq's for metadata inference + */ +process downsampleData { + tag "${repRID}" + publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.downsampleData.{out,err}" + + input: + val endsManual_downsampleData + path fastq from fastqsTrim_downsampleData + + output: + path ("sampled.{1,2}.fq") into fastqsSample + path ("${repRID}.downsampleData.{out,err}") + + script: + """ + hostname > ${repRID}.downsampleData.err + ulimit -a >> ${repRID}.downsampleData.err + export https_proxy=\${http_proxy} + + if [ "${endsManual_downsampleData}" == "se" ] + then + echo "LOG: downsampling single-end trimmed fastq" >> ${repRID}.downsampleData.err + seqtk sample -s100 *trimmed.fq.gz 10000 1> sampled.1.fq 2>> ${repRID}.downsampleData.err + elif [ "${endsManual_downsampleData}" == "pe" ] + then + echo "LOG: downsampling read 1 of paired-end trimmed fastq" >> ${repRID}.downsampleData.err + seqtk sample -s100 *1.fq.gz 1000000 1> sampled.1.fq 2>> ${repRID}.downsampleData.err + echo "LOG: downsampling read 2 of paired-end trimmed fastq" >> ${repRID}.downsampleData.err + seqtk sample -s100 *2.fq.gz 1000000 1> sampled.2.fq 2>> ${repRID}.downsampleData.err + fi + """ +} + /* * alignData: aligns the reads to a reference database */ @@ -316,7 +359,7 @@ process alignData { input: val endsManual_alignData val stranded_alignData - path fastq from fastqs_trimmed + path fastq from fastqsTrim_alignData path reference_alignData output: -- GitLab