Commit 42e5fcf0 authored by Gervaise Henry's avatar Gervaise Henry 🤠
Browse files

Add custom multiqc report yaml with software versions

parent 9fe4abf2
Pipeline #3466 passed with stages
in 2 minutes and 6 seconds
......@@ -83,6 +83,7 @@ process mkfastq {
file("**/outs/fastq_path/**/*") into mkfastqPaths
file("**/outs/**/*.fastq.gz") into fastqPaths
file("**/outs/fastq_path/Stats/Stats.json") into bqcPaths
file("version*.txt") into versionPaths_mkfastq
script:
......@@ -91,6 +92,7 @@ process mkfastq {
ulimit -a
module load cellranger/3.0.2
module load bcl2fastq/2.19.1
sh $baseDir/scripts/versions_mkfastq.sh
cellranger mkfastq --id="${bcl.baseName}" --run=$bcl --csv=$designPaths -r \$SLURM_CPUS_ON_NODE -p \$SLURM_CPUS_ON_NODE -w \$SLURM_CPUS_ON_NODE
"""
}
......@@ -105,6 +107,7 @@ process fastqc {
output:
file("*fastqc.*") into fqcPaths
file("version*.txt") into versionPaths_fastqc
script:
......@@ -118,6 +121,30 @@ process fastqc {
}
process versions {
publishDir "$outDir/${task.process}", mode: 'copy'
input:
file versionPaths_mkfastq
file versionPaths_fastqc
output:
file("*.yaml") into yamlPaths
script:
"""
hostname
ulimit -a
module load python/3.6.1-2-anaconda
echo $workflow.nextflow.version > version_nextflow.txt
python3 $baseDir/scripts/generate_versions.py -f version_*.txt -o versions
"""
}
process multiqc {
publishDir "$outDir/${task.process}", mode: 'copy'
......@@ -125,6 +152,7 @@ process multiqc {
file bqcPaths
file fqcPaths
file yamlPaths
output:
......
......@@ -2,3 +2,5 @@
find . -name '*.fastq.gz' | awk '{printf("fastqc \"%s\"\n", $0)}' | parallel -j 25 --verbose
#find . -name '*fastqc.*' | xargs -I '{}' mv '{}' ./
fastqc --version |& grep 'FastQC v' | sed -n -e 's/^FastQC v//p' > version_fastqc.txt
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''Make YAML of software versions.'''
from __future__ import print_function
from collections import OrderedDict
import re
import logging
import argparse
import numpy as np
EPILOG = '''
For more details:
%(prog)s --help
'''
# SETTINGS
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.INFO)
SOFTWARE_REGEX = {
'Nextflow': ['version_nextflow.txt', r"(\S+)"],
'cellranger mkfastq': ['version_cellranger.mkfastq.txt', r"(\S+)"],
'bcl2fastq': ['version_bcl2fastq.txt', r"(\S+)"],
'fastqc': ['version_fastqc.txt', r"(\S+)"],
}
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-f', '--files',
help="The version files.",
required=True,
nargs='*')
parser.add_argument('-o', '--output',
help="The out file name.",
required=True)
args = parser.parse_args()
return args
def check_files(files):
'''Check if version files are found.'''
logger.info("Running file check.")
software_files = np.array(list(SOFTWARE_REGEX.values()))[:,0]
extra_files = set(files) - set(software_files)
if len(extra_files) > 0:
logger.error('Missing regex: %s', list(extra_files))
raise Exception("Missing regex: %s" % list(extra_files))
def main():
args = get_args()
files = args.files
output = args.output
out_filename = output + '_mqc.yaml'
results = OrderedDict()
results['Nextflow'] = '<span style="color:#999999;\">N/A</span>'
results['cellranger mkfastq'] = '<span style="color:#999999;\">N/A</span>'
results['bcl2fastq'] = '<span style="color:#999999;\">N/A</span>'
results['fastqc'] = '<span style="color:#999999;\">N/A</span>'
# Check for version files:
check_files(files)
# Search each file using its regex
for k, v in SOFTWARE_REGEX.items():
with open(v[0]) as x:
versions = x.read()
match = re.search(v[1], versions)
if match:
results[k] = "v{}".format(match.group(1))
# Dump to YAML
print(
'''
id: 'Software Versions'
section_name: 'Software Versions'
section_href: 'https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_mkfastq/'
plot_type: 'html'
description: 'are collected at run time from the software output.'
data: |
<dl class="dl-horizontal">
'''
, file = open(out_filename, "w"))
for k, v in results.items():
print(" <dt>{}</dt><dd>{}</dd>".format(k, v), file = open(out_filename, "a"))
print(" </dl>", file = open(out_filename, "a"))
if __name__ == '__main__':
main()
#!/bin/bash
cellranger mkfastq --version | grep 'cellranger mkfastq ' | sed 's/.*(\(.*\))/\1/' > version_cellranger.mkfastq.txt
bcl2fastq --version |& grep 'bcl2fastq v' | sed -n -e 's/^bcl2fastq v//p' > version_bcl2fastq.txt
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment