From f8219ccf8c7f35bd6fb01d02a6b538d1e4d1638a Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Mon, 3 Feb 2020 21:57:19 -0600 Subject: [PATCH] Add initial fastqc --- workflow/conf/biohpc.config | 3 +++ workflow/nextflow.config | 3 +++ workflow/rna-seq.nf | 35 ++++++++++++++++++++++++++++++++--- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index a17f91e..56e7e6a 100755 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -24,6 +24,9 @@ process { withName: dedupData { queue = 'super' } + withName: fastqc { + queue = 'super' + } } singularity { diff --git a/workflow/nextflow.config b/workflow/nextflow.config index 0b6f27a..02c71af 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -32,6 +32,9 @@ process { withName: dedupData { container = 'bicf/picard2.21.7:2.0.0' } + withName: fastqc { + container = 'bicf/fastqc:2.0.0' + } } trace { diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 740db3b..082e04b 100755 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -105,6 +105,12 @@ process getData { """ } +// Split fastq's +fastqs.into { + fastqs_trimData + fastqs_fastqc +} + /* * parseMetadata: parses metadata to extract experiment parameters */ @@ -239,7 +245,7 @@ process trimData { input: val endsManual_trimData - path (fastq) from fastqs + path (fastq) from fastqs_trimData output: path ("*.fq.gz") into fastqs_trimmed @@ -303,7 +309,7 @@ process alignData { } /* - *dedupReads: mark the duplicate reads, specifically focused on PCR or optical duplicates + *dedupData: mark the duplicate reads, specifically focused on PCR or optical duplicates */ process dedupData { tag "${repRID}" @@ -323,7 +329,30 @@ process dedupData { hostname >${repRID}.dedup.err ulimit -a >>${repRID}.dedup.err - #Remove duplicated reads + # remove duplicated reads java -jar /picard/build/libs/picard.jar MarkDuplicates I=${rawBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err """ +} + +/* + *fastqc: run fastqc on untrimmed fastq's +*/ +process fastqc { + tag "${repRID}" + publishDir "${logsDir}", mode: 'copy', pattern: "*.fastq.err" + + input: + path (fastq) from fastqs_fastqc + + output: + path ("*_fastqc.zip") into fastqc + + script: + """ + hostname >${repRID}.fastqc.err + ulimit -a >>${repRID}.fastqc.err + + # run fastqc + fastqc *.fastq.gz >>${repRID}.fastqc.err + """ } \ No newline at end of file -- GitLab