From db1c91334fb179dec736ffc3bfc8bdd638cc3f82 Mon Sep 17 00:00:00 2001 From: Venkat Malladi <venkat.malladi@utsouthwestern.edu> Date: Wed, 13 Sep 2017 10:10:05 -0500 Subject: [PATCH] Add in test data and updated scripts. --- astrocyte_pkg.yml | 2 +- test_data/fetch_test_data.sh | 2 ++ workflow/main.nf | 3 ++- workflow/scripts/qc_fastq.py | 35 +++++++++++++++++++---------------- 4 files changed, 24 insertions(+), 18 deletions(-) create mode 100644 test_data/fetch_test_data.sh diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml index 9b9ec96..7baf3cc 100644 --- a/astrocyte_pkg.yml +++ b/astrocyte_pkg.yml @@ -36,7 +36,7 @@ documentation_files: # Remember - The workflow file is always named 'workflow/main.nf' # The workflow must publish all final output into $baseDir -# A list of clueter environment modules that this workflow requires to run. +# A list of cluster environment modules that this workflow requires to run. # Specify versioned module names to ensure reproducability. workflow_modules: - 'fastqc/0.11.5' diff --git a/test_data/fetch_test_data.sh b/test_data/fetch_test_data.sh new file mode 100644 index 0000000..c47fcf0 --- /dev/null +++ b/test_data/fetch_test_data.sh @@ -0,0 +1,2 @@ +wget -O ENCLB904PZW_R1.fastq.gz https://www.encodeproject.org/files/ENCFF704XKC/@@download/ENCFF704XKC.fastq.gz +wget -O ENCLB904PZW_R2.fastq.gz https://www.encodeproject.org/files/ENCFF707CNX/@@download/ENCFF707CNX.fastq.gz diff --git a/workflow/main.nf b/workflow/main.nf index 1175c2d..a43afe5 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -21,7 +21,8 @@ process qc_fastq { set val(name), file(reads) from read_pairs output: - file "*_fastqc.{zip,html}" into fastqc_results + file "*_fastqc.{zip,html}" into qc_fastq_results + file "qc.log" into qc_fastq_log script: """ diff --git a/workflow/scripts/qc_fastq.py b/workflow/scripts/qc_fastq.py index 2e23851..f58cb9f 100755 --- a/workflow/scripts/qc_fastq.py +++ b/workflow/scripts/qc_fastq.py @@ -5,9 +5,7 @@ import os import subprocess import argparse -import shlex import shutil -from multiprocessing import cpu_count import logging import sys import json @@ -19,34 +17,35 @@ For more details: ## SETTINGS -logger = logging.getLogger(__name__) -logger.addHandler(logging.NullHandler()) -logger.propagate = False -logger.setLevel(logging.INFO) +LOGGER = logging.getLogger(__name__) +LOGGER.addHandler(logging.NullHandler()) +LOGGER.propagate = False +LOGGER.setLevel(logging.INFO) def check_tools(): '''Checks for required componenets on user system''' - logger.info('Checking for required libraries and components on this system') + LOGGER.info('Checking for required libraries and components on this system') fastqc_path = shutil.which("fastqc") if fastqc_path: - logger.info('Found fastqc:%s' % (fastqc_path)) + LOGGER.info('Found fastqc: %s', fastqc_path) else: print("Please install 'fastqc' before using the tool") sys.exit() def get_args(): + '''Define arguments.''' parser = argparse.ArgumentParser( description=__doc__, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-f', '--fastq', - help="The fastq file to run QC check on.", - nargs='+', - required=True) + help="The fastq file to run QC check on.", + nargs='+', + required=True) args = parser.parse_args() return args @@ -56,19 +55,23 @@ def check_qual_fastq(fastq): '''Run fastqc on 1 or 2 files.''' qc_command = "fastqc -t -f fastq " + " ".join(fastq) - logger.info("Running fastqc with %s" % (qc_command)) + LOGGER.info("Running fastqc with %s", qc_command) - p = subprocess.Popen(qc_command, shell=True) - p.communicate() + qual_fastq = subprocess.Popen(qc_command, shell=True) + qual_fastq .communicate() def main(): args = get_args() - # create a file handler + # Create a file handler handler = logging.FileHandler('qc.log') - logger.addHandler(handler) + LOGGER.addHandler(handler) + # Check if tools are present + check_tools() + + # Run quality checks check_qual_fastq(args.fastq) -- GitLab