diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml index 9b9ec964aafd59106c82b9719b8dd0018fdf09fa..7baf3ccc474ecc67275e7c8e2168cffc470cf13a 100644 --- a/astrocyte_pkg.yml +++ b/astrocyte_pkg.yml @@ -36,7 +36,7 @@ documentation_files: # Remember - The workflow file is always named 'workflow/main.nf' # The workflow must publish all final output into $baseDir -# A list of clueter environment modules that this workflow requires to run. +# A list of cluster environment modules that this workflow requires to run. # Specify versioned module names to ensure reproducability. workflow_modules: - 'fastqc/0.11.5' diff --git a/test_data/fetch_test_data.sh b/test_data/fetch_test_data.sh new file mode 100644 index 0000000000000000000000000000000000000000..c47fcf082b441897ab0bd17f5b1d105516314470 --- /dev/null +++ b/test_data/fetch_test_data.sh @@ -0,0 +1,2 @@ +wget -O ENCLB904PZW_R1.fastq.gz https://www.encodeproject.org/files/ENCFF704XKC/@@download/ENCFF704XKC.fastq.gz +wget -O ENCLB904PZW_R2.fastq.gz https://www.encodeproject.org/files/ENCFF707CNX/@@download/ENCFF707CNX.fastq.gz diff --git a/workflow/main.nf b/workflow/main.nf index 1175c2dd93852c088f37139cdb01678e8fbdc287..a43afe578948cb6fb9d9127abf34b918e02767fd 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -21,7 +21,8 @@ process qc_fastq { set val(name), file(reads) from read_pairs output: - file "*_fastqc.{zip,html}" into fastqc_results + file "*_fastqc.{zip,html}" into qc_fastq_results + file "qc.log" into qc_fastq_log script: """ diff --git a/workflow/scripts/qc_fastq.py b/workflow/scripts/qc_fastq.py index 2e238519786f02d4033fc63dd1eca6810dd8dd24..f58cb9f8e1a4bd249b7eb92fe56476616e4cb552 100755 --- a/workflow/scripts/qc_fastq.py +++ b/workflow/scripts/qc_fastq.py @@ -5,9 +5,7 @@ import os import subprocess import argparse -import shlex import shutil -from multiprocessing import cpu_count import logging import sys import json @@ -19,34 +17,35 @@ For more details: ## SETTINGS -logger = logging.getLogger(__name__) -logger.addHandler(logging.NullHandler()) -logger.propagate = False -logger.setLevel(logging.INFO) +LOGGER = logging.getLogger(__name__) +LOGGER.addHandler(logging.NullHandler()) +LOGGER.propagate = False +LOGGER.setLevel(logging.INFO) def check_tools(): '''Checks for required componenets on user system''' - logger.info('Checking for required libraries and components on this system') + LOGGER.info('Checking for required libraries and components on this system') fastqc_path = shutil.which("fastqc") if fastqc_path: - logger.info('Found fastqc:%s' % (fastqc_path)) + LOGGER.info('Found fastqc: %s', fastqc_path) else: print("Please install 'fastqc' before using the tool") sys.exit() def get_args(): + '''Define arguments.''' parser = argparse.ArgumentParser( description=__doc__, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-f', '--fastq', - help="The fastq file to run QC check on.", - nargs='+', - required=True) + help="The fastq file to run QC check on.", + nargs='+', + required=True) args = parser.parse_args() return args @@ -56,19 +55,23 @@ def check_qual_fastq(fastq): '''Run fastqc on 1 or 2 files.''' qc_command = "fastqc -t -f fastq " + " ".join(fastq) - logger.info("Running fastqc with %s" % (qc_command)) + LOGGER.info("Running fastqc with %s", qc_command) - p = subprocess.Popen(qc_command, shell=True) - p.communicate() + qual_fastq = subprocess.Popen(qc_command, shell=True) + qual_fastq .communicate() def main(): args = get_args() - # create a file handler + # Create a file handler handler = logging.FileHandler('qc.log') - logger.addHandler(handler) + LOGGER.addHandler(handler) + # Check if tools are present + check_tools() + + # Run quality checks check_qual_fastq(args.fastq)