From db1c91334fb179dec736ffc3bfc8bdd638cc3f82 Mon Sep 17 00:00:00 2001
From: Venkat Malladi <venkat.malladi@utsouthwestern.edu>
Date: Wed, 13 Sep 2017 10:10:05 -0500
Subject: [PATCH] Add in test data and updated scripts.

---
 astrocyte_pkg.yml            |  2 +-
 test_data/fetch_test_data.sh |  2 ++
 workflow/main.nf             |  3 ++-
 workflow/scripts/qc_fastq.py | 35 +++++++++++++++++++----------------
 4 files changed, 24 insertions(+), 18 deletions(-)
 create mode 100644 test_data/fetch_test_data.sh

diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml
index 9b9ec96..7baf3cc 100644
--- a/astrocyte_pkg.yml
+++ b/astrocyte_pkg.yml
@@ -36,7 +36,7 @@ documentation_files:
 # Remember - The workflow file is always named 'workflow/main.nf'
 #            The workflow must publish all final output into $baseDir
 
-# A list of clueter environment modules that this workflow requires to run.
+# A list of cluster environment modules that this workflow requires to run.
 # Specify versioned module names to ensure reproducability.
 workflow_modules:
   - 'fastqc/0.11.5'
diff --git a/test_data/fetch_test_data.sh b/test_data/fetch_test_data.sh
new file mode 100644
index 0000000..c47fcf0
--- /dev/null
+++ b/test_data/fetch_test_data.sh
@@ -0,0 +1,2 @@
+wget -O ENCLB904PZW_R1.fastq.gz https://www.encodeproject.org/files/ENCFF704XKC/@@download/ENCFF704XKC.fastq.gz
+wget -O ENCLB904PZW_R2.fastq.gz https://www.encodeproject.org/files/ENCFF707CNX/@@download/ENCFF707CNX.fastq.gz
diff --git a/workflow/main.nf b/workflow/main.nf
index 1175c2d..a43afe5 100644
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -21,7 +21,8 @@ process qc_fastq {
     set val(name), file(reads) from read_pairs
 
     output:
-    file "*_fastqc.{zip,html}" into fastqc_results
+    file "*_fastqc.{zip,html}" into qc_fastq_results
+    file "qc.log" into qc_fastq_log
 
     script:
     """
diff --git a/workflow/scripts/qc_fastq.py b/workflow/scripts/qc_fastq.py
index 2e23851..f58cb9f 100755
--- a/workflow/scripts/qc_fastq.py
+++ b/workflow/scripts/qc_fastq.py
@@ -5,9 +5,7 @@
 import os
 import subprocess
 import argparse
-import shlex
 import shutil
-from multiprocessing import cpu_count
 import logging
 import sys
 import json
@@ -19,34 +17,35 @@ For more details:
 
 ## SETTINGS
 
-logger = logging.getLogger(__name__)
-logger.addHandler(logging.NullHandler())
-logger.propagate = False
-logger.setLevel(logging.INFO)
+LOGGER = logging.getLogger(__name__)
+LOGGER.addHandler(logging.NullHandler())
+LOGGER.propagate = False
+LOGGER.setLevel(logging.INFO)
 
 
 def check_tools():
     '''Checks for required componenets on user system'''
 
-    logger.info('Checking for required libraries and components on this system')
+    LOGGER.info('Checking for required libraries and components on this system')
 
     fastqc_path = shutil.which("fastqc")
     if fastqc_path:
-        logger.info('Found fastqc:%s' % (fastqc_path))
+        LOGGER.info('Found fastqc: %s', fastqc_path)
     else:
         print("Please install 'fastqc' before using the tool")
         sys.exit()
 
 
 def get_args():
+    '''Define arguments.'''
     parser = argparse.ArgumentParser(
         description=__doc__, epilog=EPILOG,
         formatter_class=argparse.RawDescriptionHelpFormatter)
 
     parser.add_argument('-f', '--fastq',
-        help="The fastq file to run QC check on.",
-        nargs='+',
-        required=True)
+                        help="The fastq file to run QC check on.",
+                        nargs='+',
+                        required=True)
 
     args = parser.parse_args()
     return args
@@ -56,19 +55,23 @@ def check_qual_fastq(fastq):
     '''Run fastqc on 1 or 2 files.'''
     qc_command = "fastqc -t -f fastq " + " ".join(fastq)
 
-    logger.info("Running fastqc with %s" % (qc_command))
+    LOGGER.info("Running fastqc with %s", qc_command)
 
-    p = subprocess.Popen(qc_command, shell=True)
-    p.communicate()
+    qual_fastq = subprocess.Popen(qc_command, shell=True)
+    qual_fastq .communicate()
 
 
 def main():
     args = get_args()
 
-    # create a file handler
+    # Create a file handler
     handler = logging.FileHandler('qc.log')
-    logger.addHandler(handler)
+    LOGGER.addHandler(handler)
 
+    # Check if tools are present
+    check_tools()
+
+    # Run quality checks
     check_qual_fastq(args.fastq)
 
 
-- 
GitLab