From 3d73714a01f76ea8f11914ea791e8a7e622f5e97 Mon Sep 17 00:00:00 2001
From: Venkat Malladi <venkat.malladi@utsouthwestern.edu>
Date: Sat, 30 Sep 2017 22:03:51 -0500
Subject: [PATCH] Add in fastqc step.

---
 workflow/main.nf             | 36 +++++++++++++++++++++++++++++++++---
 workflow/scripts/qc_fastq.py | 20 ++++++++++----------
 2 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/workflow/main.nf b/workflow/main.nf
index c025d3b..ee90c3a 100644
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -9,12 +9,11 @@ params.pairedEnd = false
 params.designFile = "$baseDir/../test_data/design_ENCSR238SGC_SE.txt"
 
 // Define List of Files
-Channel
+readsList = Channel
   .fromPath( params.reads )
   .flatten()
   .map { file -> [ file.getFileName().toString(), file.toString() ].join("\t")}
   .collectFile( name: 'fileList.tsv', newLine: true )
-  .set { readsList }
 
 // Define regular variables
 pairedEnd = params.pairedEnd
@@ -37,7 +36,6 @@ process checkDesignFile {
 
   if (pairedEnd) {
     """
-    echo $designFile
     python $baseDir/scripts/check_design.py -d $designFile -f $readsList -p
     """
   }
@@ -48,3 +46,35 @@ process checkDesignFile {
   }
 
 }
+
+// Define channel for raw reads
+if (pairedEnd) {
+  rawReads = designFilePaths
+    .splitCsv(sep: '\t', header: true)
+    .map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read2], row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] }
+} else {
+rawReads = designFilePaths
+  .splitCsv(sep: '\t', header: true)
+  .map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read1], row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] }
+}
+
+process fastQc {
+
+  tag "$sampleId-$replicate"
+  publishDir "$baseDir/output/", mode: 'copy',
+    saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"}
+
+  input:
+
+  set sampleId, reads, biosample, factor, treatment, replicate, controlId from rawReads
+
+  output:
+
+  file '*_fastqc.{zip,html}' into fastqc_results
+
+  script:
+
+  """
+  python $baseDir/scripts/qc_fastq.py -f $reads
+  """
+}
diff --git a/workflow/scripts/qc_fastq.py b/workflow/scripts/qc_fastq.py
index 991d927..95d8172 100755
--- a/workflow/scripts/qc_fastq.py
+++ b/workflow/scripts/qc_fastq.py
@@ -17,10 +17,10 @@ For more details:
 
 ## SETTINGS
 
-LOGGER = logging.getLogger(__name__)
-LOGGER.addHandler(logging.NullHandler())
-LOGGER.propagate = False
-LOGGER.setLevel(logging.INFO)
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+logger.propagate = False
+logger.setLevel(logging.INFO)
 
 
 def get_args():
@@ -41,24 +41,24 @@ def get_args():
 def check_tools():
     '''Checks for required componenets on user system'''
 
-    LOGGER.info('Checking for required libraries and components on this system')
+    logger.info('Checking for required libraries and components on this system')
 
     fastqc_path = shutil.which("fastqc")
     if fastqc_path:
-        LOGGER.info('Found fastqc: %s', fastqc_path)
+        logger.info('Found fastqc: %s', fastqc_path)
     else:
-        print("Please install 'fastqc' before using the tool")
-        sys.exit()
+        logger.error('Missing fastqc')
+        raise Exception('Missing fastqc')
 
 
 def check_qual_fastq(fastq):
     '''Run fastqc on 1 or 2 files.'''
     qc_command = "fastqc -t -f fastq " + " ".join(fastq)
 
-    LOGGER.info("Running fastqc with %s", qc_command)
+    logger.info("Running fastqc with %s", qc_command)
 
     qual_fastq = subprocess.Popen(qc_command, shell=True)
-    qual_fastq .communicate()
+    out, err = qual_fastq.communicate()
 
 
 def main():
-- 
GitLab