diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index a17f91e1a3397c2c28fe84d0e70b2342f32cbb0a..56e7e6aa9abb7b1ce87fbc6cf9857c0460f140d5 100755 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -24,6 +24,9 @@ process { withName: dedupData { queue = 'super' } + withName: fastqc { + queue = 'super' + } } singularity { diff --git a/workflow/nextflow.config b/workflow/nextflow.config index 0b6f27a5c078db72a5ae025a5d0cde2118163047..02c71af5c58998839c270f78aadfdfd84696dfa3 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -32,6 +32,9 @@ process { withName: dedupData { container = 'bicf/picard2.21.7:2.0.0' } + withName: fastqc { + container = 'bicf/fastqc:2.0.0' + } } trace { diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 740db3bc6973c950339127ba8d81934766ad085f..082e04b57baf408919002e650454acaa4405751d 100755 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -105,6 +105,12 @@ process getData { """ } +// Split fastq's +fastqs.into { + fastqs_trimData + fastqs_fastqc +} + /* * parseMetadata: parses metadata to extract experiment parameters */ @@ -239,7 +245,7 @@ process trimData { input: val endsManual_trimData - path (fastq) from fastqs + path (fastq) from fastqs_trimData output: path ("*.fq.gz") into fastqs_trimmed @@ -303,7 +309,7 @@ process alignData { } /* - *dedupReads: mark the duplicate reads, specifically focused on PCR or optical duplicates + *dedupData: mark the duplicate reads, specifically focused on PCR or optical duplicates */ process dedupData { tag "${repRID}" @@ -323,7 +329,30 @@ process dedupData { hostname >${repRID}.dedup.err ulimit -a >>${repRID}.dedup.err - #Remove duplicated reads + # remove duplicated reads java -jar /picard/build/libs/picard.jar MarkDuplicates I=${rawBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err """ +} + +/* + *fastqc: run fastqc on untrimmed fastq's +*/ +process fastqc { + tag "${repRID}" + publishDir "${logsDir}", mode: 'copy', pattern: "*.fastq.err" + + input: + path (fastq) from fastqs_fastqc + + output: + path ("*_fastqc.zip") into fastqc + + script: + """ + hostname >${repRID}.fastqc.err + ulimit -a >>${repRID}.fastqc.err + + # run fastqc + fastqc *.fastq.gz >>${repRID}.fastqc.err + """ } \ No newline at end of file