From f8219ccf8c7f35bd6fb01d02a6b538d1e4d1638a Mon Sep 17 00:00:00 2001
From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu>
Date: Mon, 3 Feb 2020 21:57:19 -0600
Subject: [PATCH] Add initial fastqc

---
 workflow/conf/biohpc.config |  3 +++
 workflow/nextflow.config    |  3 +++
 workflow/rna-seq.nf         | 35 ++++++++++++++++++++++++++++++++---
 3 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config
index a17f91e..56e7e6a 100755
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -24,6 +24,9 @@ process {
   withName: dedupData {
     queue = 'super'
   }
+  withName: fastqc {
+    queue = 'super'
+  }
 }
 
 singularity {
diff --git a/workflow/nextflow.config b/workflow/nextflow.config
index 0b6f27a..02c71af 100644
--- a/workflow/nextflow.config
+++ b/workflow/nextflow.config
@@ -32,6 +32,9 @@ process {
   withName: dedupData {
     container = 'bicf/picard2.21.7:2.0.0'
   }
+  withName: fastqc {
+    container = 'bicf/fastqc:2.0.0'
+  }
 }
 
 trace {
diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
index 740db3b..082e04b 100755
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -105,6 +105,12 @@ process getData {
     """
 }
 
+// Split fastq's
+fastqs.into {
+  fastqs_trimData
+  fastqs_fastqc
+}
+
 /*
  * parseMetadata: parses metadata to extract experiment parameters
 */
@@ -239,7 +245,7 @@ process trimData {
 
   input:
     val endsManual_trimData
-    path (fastq) from fastqs
+    path (fastq) from fastqs_trimData
 
   output:
     path ("*.fq.gz") into fastqs_trimmed
@@ -303,7 +309,7 @@ process alignData {
 }
 
 /*
- *dedupReads: mark the duplicate reads, specifically focused on PCR or optical duplicates
+ *dedupData: mark the duplicate reads, specifically focused on PCR or optical duplicates
 */
 process dedupData {
   tag "${repRID}"
@@ -323,7 +329,30 @@ process dedupData {
     hostname >${repRID}.dedup.err
     ulimit -a >>${repRID}.dedup.err
 
-    #Remove duplicated reads
+    # remove duplicated reads
     java -jar /picard/build/libs/picard.jar MarkDuplicates I=${rawBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err
     """
+}
+
+/*
+ *fastqc: run fastqc on untrimmed fastq's
+*/
+process fastqc {
+  tag "${repRID}"
+  publishDir "${logsDir}", mode: 'copy', pattern: "*.fastq.err"
+
+  input:
+    path (fastq) from fastqs_fastqc
+
+  output:
+    path ("*_fastqc.zip") into fastqc
+
+  script:
+    """
+    hostname >${repRID}.fastqc.err
+    ulimit -a >>${repRID}.fastqc.err
+
+    # run fastqc
+    fastqc *.fastq.gz >>${repRID}.fastqc.err
+    """
 }
\ No newline at end of file
-- 
GitLab