diff --git a/workflow/main.nf b/workflow/main.nf index 9ff48c4b39472951a3bd45ef17befeaa38818304..345011b1c15209c74664a4790fd6ee2da2e34ade 100755 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -83,6 +83,7 @@ process mkfastq { file("**/outs/fastq_path/**/*") into mkfastqPaths file("**/outs/**/*.fastq.gz") into fastqPaths file("**/outs/fastq_path/Stats/Stats.json") into bqcPaths + file("version*.txt") into versionPaths_mkfastq script: @@ -91,6 +92,7 @@ process mkfastq { ulimit -a module load cellranger/3.0.2 module load bcl2fastq/2.19.1 + sh $baseDir/scripts/versions_mkfastq.sh cellranger mkfastq --id="${bcl.baseName}" --run=$bcl --csv=$designPaths -r \$SLURM_CPUS_ON_NODE -p \$SLURM_CPUS_ON_NODE -w \$SLURM_CPUS_ON_NODE """ } @@ -105,6 +107,7 @@ process fastqc { output: file("*fastqc.*") into fqcPaths + file("version*.txt") into versionPaths_fastqc script: @@ -118,6 +121,30 @@ process fastqc { } +process versions { + publishDir "$outDir/${task.process}", mode: 'copy' + + input: + + file versionPaths_mkfastq + file versionPaths_fastqc + + output: + + file("*.yaml") into yamlPaths + + script: + + """ + hostname + ulimit -a + module load python/3.6.1-2-anaconda + echo $workflow.nextflow.version > version_nextflow.txt + python3 $baseDir/scripts/generate_versions.py -f version_*.txt -o versions + """ +} + + process multiqc { publishDir "$outDir/${task.process}", mode: 'copy' @@ -125,6 +152,7 @@ process multiqc { file bqcPaths file fqcPaths + file yamlPaths output: diff --git a/workflow/scripts/fastqc.sh b/workflow/scripts/fastqc.sh index ce975dfb8a3c559cdcaf8d7d2931ae02b5030890..d7ee12c87cbd87d0164e0ea775b9a782f255f778 100644 --- a/workflow/scripts/fastqc.sh +++ b/workflow/scripts/fastqc.sh @@ -2,3 +2,5 @@ find . -name '*.fastq.gz' | awk '{printf("fastqc \"%s\"\n", $0)}' | parallel -j 25 --verbose #find . -name '*fastqc.*' | xargs -I '{}' mv '{}' ./ + +fastqc --version |& grep 'FastQC v' | sed -n -e 's/^FastQC v//p' > version_fastqc.txt diff --git a/workflow/scripts/generate_versions.py b/workflow/scripts/generate_versions.py new file mode 100644 index 0000000000000000000000000000000000000000..f61017ba86f7fa639c6dca3e3f54f58ae10b0331 --- /dev/null +++ b/workflow/scripts/generate_versions.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +'''Make YAML of software versions.''' + +from __future__ import print_function +from collections import OrderedDict +import re +import logging +import argparse +import numpy as np + +EPILOG = ''' +For more details: + %(prog)s --help +''' + +# SETTINGS + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) +logger.propagate = False +logger.setLevel(logging.INFO) + +SOFTWARE_REGEX = { + 'Nextflow': ['version_nextflow.txt', r"(\S+)"], + 'cellranger mkfastq': ['version_cellranger.mkfastq.txt', r"(\S+)"], + 'bcl2fastq': ['version_bcl2fastq.txt', r"(\S+)"], + 'fastqc': ['version_fastqc.txt', r"(\S+)"], +} + + +def get_args(): + '''Define arguments.''' + + parser = argparse.ArgumentParser( + description=__doc__, epilog=EPILOG, + formatter_class=argparse.RawDescriptionHelpFormatter) + + parser.add_argument('-f', '--files', + help="The version files.", + required=True, + nargs='*') + + parser.add_argument('-o', '--output', + help="The out file name.", + required=True) + + args = parser.parse_args() + return args + + +def check_files(files): + '''Check if version files are found.''' + + logger.info("Running file check.") + + software_files = np.array(list(SOFTWARE_REGEX.values()))[:,0] + + extra_files = set(files) - set(software_files) + + if len(extra_files) > 0: + logger.error('Missing regex: %s', list(extra_files)) + raise Exception("Missing regex: %s" % list(extra_files)) + + +def main(): + args = get_args() + files = args.files + output = args.output + + out_filename = output + '_mqc.yaml' + + results = OrderedDict() + results['Nextflow'] = '<span style="color:#999999;\">N/A</span>' + results['cellranger mkfastq'] = '<span style="color:#999999;\">N/A</span>' + results['bcl2fastq'] = '<span style="color:#999999;\">N/A</span>' + results['fastqc'] = '<span style="color:#999999;\">N/A</span>' + + # Check for version files: + check_files(files) + + # Search each file using its regex + for k, v in SOFTWARE_REGEX.items(): + with open(v[0]) as x: + versions = x.read() + match = re.search(v[1], versions) + if match: + results[k] = "v{}".format(match.group(1)) + + # Dump to YAML + print( + ''' + id: 'Software Versions' + section_name: 'Software Versions' + section_href: 'https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_mkfastq/' + plot_type: 'html' + description: 'are collected at run time from the software output.' + data: | + <dl class="dl-horizontal"> + ''' + , file = open(out_filename, "w")) + + for k, v in results.items(): + print(" <dt>{}</dt><dd>{}</dd>".format(k, v), file = open(out_filename, "a")) + print(" </dl>", file = open(out_filename, "a")) + + +if __name__ == '__main__': + main() diff --git a/workflow/scripts/versions_mkfastq.sh b/workflow/scripts/versions_mkfastq.sh new file mode 100644 index 0000000000000000000000000000000000000000..c5e0650c63232854625fb9b534710e00e9857dc9 --- /dev/null +++ b/workflow/scripts/versions_mkfastq.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +cellranger mkfastq --version | grep 'cellranger mkfastq ' | sed 's/.*(\(.*\))/\1/' > version_cellranger.mkfastq.txt +bcl2fastq --version |& grep 'bcl2fastq v' | sed -n -e 's/^bcl2fastq v//p' > version_bcl2fastq.txt