diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 37083c154c27c879833b85c8f8c256c2aecb46dc..10b06f97a6a83b23bdb1ac01e2ce46e975def353 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -67,6 +67,12 @@ dedupData: - singularity exec 'docker://bicf/picard2.21.7:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5JA_1M.se.sorted.bam O=Q-Y5JA_1M.se.deduped.bam M=Q-Y5JA_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true - pytest -m dedupData +fastqc: + stage: unit + script: + - singularity run 'docker://bicf/fastqc:2.0.0' ./test_data/fastq/small/Q-Y5JA_1M.R1.fastq.gz -o . + - pytest -m fastqc + integration_se: stage: integration script: diff --git a/workflow/conf/aws_ondemand.config b/workflow/conf/aws_ondemand.config index ca23b6c49cd80b98c3120010609d46e7131757f8..0c8b6ff0fd242cda9ca2fb43473e82f99547efef 100755 --- a/workflow/conf/aws_ondemand.config +++ b/workflow/conf/aws_ondemand.config @@ -11,7 +11,7 @@ process { executor = 'awsbatch' queue = 'highpriority-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc' cpus = 1 - memory = '1 GB' + memory = '2 GB' withName:parseMetadata { cpus = 5 @@ -25,10 +25,13 @@ process { } withName:alignData { cpus = 50 - memory = '10 GB' + memory = '5 GB' } withName:dedupData { cpus = 2 memory = '20 GB' } + withName:fastqc { + memory = '5 GB' + } } \ No newline at end of file diff --git a/workflow/conf/aws_spot.config b/workflow/conf/aws_spot.config index e0447565495e09c3ccde2968eb887aa14f7db251..4708298a63ced25cd74edf2646fc3c67a0a7753c 100755 --- a/workflow/conf/aws_spot.config +++ b/workflow/conf/aws_spot.config @@ -11,7 +11,7 @@ process { executor = 'awsbatch' queue = 'default-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc' cpus = 1 - memory = '1 GB' + memory = '2 GB' withName:parseMetadata { cpus = 5 @@ -25,10 +25,13 @@ process { } withName:alignData { cpus = 50 - memory = '10 GB' + memory = '5 GB' } withName:dedupData { cpus = 2 memory = '20 GB' } + withName:fastq { + memory = '5 GB' + } } diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index a17f91e1a3397c2c28fe84d0e70b2342f32cbb0a..56e7e6aa9abb7b1ce87fbc6cf9857c0460f140d5 100755 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -24,6 +24,9 @@ process { withName: dedupData { queue = 'super' } + withName: fastqc { + queue = 'super' + } } singularity { diff --git a/workflow/nextflow.config b/workflow/nextflow.config index 0b6f27a5c078db72a5ae025a5d0cde2118163047..02c71af5c58998839c270f78aadfdfd84696dfa3 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -32,6 +32,9 @@ process { withName: dedupData { container = 'bicf/picard2.21.7:2.0.0' } + withName: fastqc { + container = 'bicf/fastqc:2.0.0' + } } trace { diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 740db3bc6973c950339127ba8d81934766ad085f..0fd3ee0d872768d1d7d80c0be6bcc0cbafd105e0 100755 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -105,6 +105,12 @@ process getData { """ } +// Split fastq's +fastqs.into { + fastqs_trimData + fastqs_fastqc +} + /* * parseMetadata: parses metadata to extract experiment parameters */ @@ -239,7 +245,7 @@ process trimData { input: val endsManual_trimData - path (fastq) from fastqs + path (fastq) from fastqs_trimData output: path ("*.fq.gz") into fastqs_trimmed @@ -303,7 +309,7 @@ process alignData { } /* - *dedupReads: mark the duplicate reads, specifically focused on PCR or optical duplicates + *dedupData: mark the duplicate reads, specifically focused on PCR or optical duplicates */ process dedupData { tag "${repRID}" @@ -323,7 +329,31 @@ process dedupData { hostname >${repRID}.dedup.err ulimit -a >>${repRID}.dedup.err - #Remove duplicated reads + # remove duplicated reads java -jar /picard/build/libs/picard.jar MarkDuplicates I=${rawBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err """ +} + +/* + *fastqc: run fastqc on untrimmed fastq's +*/ +process fastqc { + tag "${repRID}" + publishDir "${outDir}/fastqc", mode: 'copy', pattern: "*_fastqc.zip" + publishDir "${logsDir}", mode: 'copy', pattern: "*.fastq.err" + + input: + path (fastq) from fastqs_fastqc + + output: + path ("*_fastqc.zip") into fastqc + + script: + """ + hostname >${repRID}.fastqc.err + ulimit -a >>${repRID}.fastqc.err + + # run fastqc + fastqc *.fastq.gz -o . >>${repRID}.fastqc.err + """ } \ No newline at end of file diff --git a/workflow/tests/test_fastqc.py b/workflow/tests/test_fastqc.py new file mode 100644 index 0000000000000000000000000000000000000000..251d667c644b818c3ac07753ded20645f66a301a --- /dev/null +++ b/workflow/tests/test_fastqc.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 + +import pytest +import pandas as pd +from io import StringIO +import os + +test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ + '/../../' + +@pytest.mark.fastqc +def test_fastqc(): + assert os.path.exists(os.path.join(test_output_path, 'Q-Y5JA_1M.R1_fastqc.zip')) \ No newline at end of file