Merge branch '9-fastqc' into 'develop'

Resolve "process_fastqc" Closes #9 See merge request !18

Merge branch '9-fastqc' into 'develop'
Resolve "process_fastqc" Closes #9 See merge request !18
eac0a7c5 · Venkat Malladi · a30baa9c · 645546fa · eac0a7c5 · eac0a7c5
Commit eac0a7c5 authored 5 years ago by Venkat Malladi
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -67,6 +67,12 @@ dedupData:
  - singularity exec 'docker://bicf/picard2.21.7:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5JA_1M.se.sorted.bam O=Q-Y5JA_1M.se.deduped.bam M=Q-Y5JA_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
  - pytest -m dedupData

+fastqc:
+  stage: unit
+  script:
+  - singularity run 'docker://bicf/fastqc:2.0.0' ./test_data/fastq/small/Q-Y5JA_1M.R1.fastq.gz -o .
+  - pytest -m fastqc
+
 integration_se:
  stage: integration
  script:

--- a/workflow/conf/aws_ondemand.config
+++ b/workflow/conf/aws_ondemand.config
@@ -11,7 +11,7 @@ process {
  executor = 'awsbatch'
  queue = 'highpriority-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc'
  cpus = 1
-  memory = '1 GB'
+  memory = '2 GB'

  withName:parseMetadata {
    cpus = 5
@@ -25,10 +25,13 @@ process {
  }
  withName:alignData {
    cpus = 50
-    memory = '10 GB'
+    memory = '5 GB'
  }
  withName:dedupData {
    cpus = 2
    memory = '20 GB'
  }
+  withName:fastqc {
+    memory = '5 GB'
+  }
 }
\ No newline at end of file
--- a/workflow/conf/aws_spot.config
+++ b/workflow/conf/aws_spot.config
@@ -11,7 +11,7 @@ process {
  executor = 'awsbatch'
  queue = 'default-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc'
  cpus = 1
-  memory = '1 GB'
+  memory = '2 GB'

  withName:parseMetadata {
    cpus = 5
@@ -25,10 +25,13 @@ process {
  }
  withName:alignData {
    cpus = 50
-    memory = '10 GB'
+    memory = '5 GB'
  }
  withName:dedupData {
    cpus = 2
    memory = '20 GB'
  }
+  withName:fastq  {
+    memory = '5 GB'
+  }
 }
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -24,6 +24,9 @@ process {
  withName: dedupData {
    queue = 'super'
  }
+  withName: fastqc {
+    queue = 'super'
+  }
 }

 singularity {

--- a/workflow/nextflow.config
+++ b/workflow/nextflow.config
@@ -32,6 +32,9 @@ process {
  withName: dedupData {
    container = 'bicf/picard2.21.7:2.0.0'
  }
+  withName: fastqc {
+    container = 'bicf/fastqc:2.0.0'
+  }
 }

 trace {

--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -105,6 +105,12 @@ process getData {
    """
 }

+// Split fastq's
+fastqs.into {
+  fastqs_trimData
+  fastqs_fastqc
+}
+
 /*
 * parseMetadata: parses metadata to extract experiment parameters
 */
@@ -239,7 +245,7 @@ process trimData {

  input:
    val endsManual_trimData
-    path (fastq) from fastqs
+    path (fastq) from fastqs_trimData

  output:
    path ("*.fq.gz") into fastqs_trimmed
@@ -303,7 +309,7 @@ process alignData {
 }

 /*
- *dedupReads: mark the duplicate reads, specifically focused on PCR or optical duplicates
+ *dedupData: mark the duplicate reads, specifically focused on PCR or optical duplicates
 */
 process dedupData {
  tag "${repRID}"
@@ -323,7 +329,31 @@ process dedupData {
    hostname >${repRID}.dedup.err
    ulimit -a >>${repRID}.dedup.err

-    #Remove duplicated reads
+    # remove duplicated reads
    java -jar /picard/build/libs/picard.jar MarkDuplicates I=${rawBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err
    """
+}
+
+/*
+ *fastqc: run fastqc on untrimmed fastq's
+*/
+process fastqc {
+  tag "${repRID}"
+  publishDir "${outDir}/fastqc", mode: 'copy', pattern: "*_fastqc.zip"
+  publishDir "${logsDir}", mode: 'copy', pattern: "*.fastq.err"
+
+  input:
+    path (fastq) from fastqs_fastqc
+
+  output:
+    path ("*_fastqc.zip") into fastqc
+
+  script:
+    """
+    hostname >${repRID}.fastqc.err
+    ulimit -a >>${repRID}.fastqc.err
+
+    # run fastqc
+    fastqc *.fastq.gz -o . >>${repRID}.fastqc.err
+    """
 }
\ No newline at end of file
--- a/workflow/tests/test_fastqc.py
+++ b/workflow/tests/test_fastqc.py
+#!/usr/bin/env python3
+
+import pytest
+import pandas as pd
+from io import StringIO
+import os
+
+test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
+                '/../../'
+
+@pytest.mark.fastqc
+def test_fastqc():
+    assert os.path.exists(os.path.join(test_output_path, 'Q-Y5JA_1M.R1_fastqc.zip'))
\ No newline at end of file