From ff9999f2364acf383390ad6b5fb0f135660ce2e6 Mon Sep 17 00:00:00 2001 From: Venkat Malladi <venkat.malladi@utsouthwestern.edu> Date: Fri, 8 Mar 2019 20:54:00 -0600 Subject: [PATCH] Add in software version parsing. --- workflow/conf/biohpc.config | 2 +- workflow/main.nf | 47 +++++++++++++++++--- workflow/scripts/annotate_peaks.R | 5 +++ workflow/scripts/call_peaks_macs.py | 40 ++++++++++++++--- workflow/scripts/convert_reads.py | 18 ++++++++ workflow/scripts/diff_peaks.R | 5 +++ workflow/scripts/experiment_qc.py | 9 ++++ workflow/scripts/map_qc.py | 30 +++++++++++++ workflow/scripts/map_reads.py | 21 +++++++++ workflow/scripts/motif_search.py | 19 +++++++++ workflow/scripts/overlap_peaks.py | 9 ++++ workflow/scripts/software_report.py | 66 +++++++++++++++++++++++++++++ workflow/scripts/trim_reads.py | 18 ++++++++ workflow/scripts/xcor.py | 26 ++++++++++++ 14 files changed, 302 insertions(+), 13 deletions(-) create mode 100644 workflow/scripts/software_report.py diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index 2c0f1c3..07407ef 100644 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -41,7 +41,7 @@ process { executor = 'local' } withName: callPeaksMACS { - module = ['python/3.6.1-2-anaconda', 'macs/2.1.0-20151222', 'phantompeakqualtools/1.2', 'UCSC_userApps/v317', 'bedtools/2.26.0'] + module = ['python/3.6.1-2-anaconda', 'macs/2.1.0-20151222', 'UCSC_userApps/v317', 'bedtools/2.26.0'] queue = '128GB,256GB,256GBv1' } withName: consensusPeaks { diff --git a/workflow/main.nf b/workflow/main.nf index a8c5027..ddeb067 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -99,7 +99,8 @@ process trimReads { output: set sampleId, file('*.fq.gz'), experimentId, biosample, factor, treatment, replicate, controlId into trimmedReads - file('*trimming_report.txt') into trimgalore_results + file('*trimming_report.txt') into trimgaloreResults + file('version_*.txt') into trimReadsVersions script: @@ -131,6 +132,7 @@ process alignReads { set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into mappedReads file '*.flagstat.qc' into mappedReadsStats + file('version_*.txt') into alignReadsVersions script: @@ -164,6 +166,7 @@ process filterReads { file '*.flagstat.qc' into dedupReadsStats file '*.pbc.qc' into dedupReadsComplexity file '*.dedup.qc' into dupReads + file('version_*.txt') into filterReadsVersions script: @@ -198,7 +201,8 @@ process experimentQC { output: - file '*.{pdf,npz}' into deepToolsStats + file '*.{pdf,npz}' into experimentQCStats + file('version_*.txt') into experimentQCVersions script: @@ -221,6 +225,7 @@ process convertReads { output: set sampleId, file('*.tagAlign.gz'), file('*.bed{pe,se}.gz'), experimentId, biosample, factor, treatment, replicate, controlId into tagReads + file('version_*.txt') into convertReadsVersions script: @@ -250,7 +255,8 @@ process crossReads { output: set sampleId, seTagAlign, tagAlign, file('*.cc.qc'), experimentId, biosample, factor, treatment, replicate, controlId into xcorReads - set file('*.cc.qc'), file('*.cc.plot.pdf') into xcorReadsStats + set file('*.cc.qc'), file('*.cc.plot.pdf') into crossReadsStats + file('version_*.txt') into crossReadsVersions script: @@ -342,7 +348,8 @@ process callPeaksMACS { output: set sampleId, file('*.narrowPeak'), file('*.fc_signal.bw'), file('*.pvalue_signal.bw'), experimentId, biosample, factor, treatment, replicate, controlId into experimentPeaks - file '*.xls' into summit + file '*.xls' into callPeaksMACSsummit + file('version_*.txt') into callPeaksMACSVersions script: @@ -383,6 +390,7 @@ process consensusPeaks { file 'design_diffPeaks.csv' into designDiffPeaks file 'design_annotatePeaks.tsv' into designAnnotatePeaks, designMotifSearch file 'unique_experiments.csv' into uniqueExperiments + file('version_*.txt') into consensusPeaksVersions script: @@ -404,6 +412,7 @@ process peakAnnotation { output: file "*chipseeker*" into peakAnnotation + file('version_*.txt') into peakAnnotationVersions script: @@ -413,7 +422,7 @@ process peakAnnotation { } -// Motif Search Peaks +// Motif Search Peaks process motifSearch { publishDir "$outDir/${task.process}", mode: 'copy' @@ -426,6 +435,7 @@ process motifSearch { file "*memechip" into motifSearch file "*narrowPeak" into filteredPeaks + file('version_*.txt') into motifSearchVersions script: @@ -454,6 +464,7 @@ process diffPeaks { file '*_diffbind.csv' into diffPeaksCounts file '*.pdf' into diffPeaksStats file 'normcount_peaksets.txt' into normCountPeaks + file('version_*.txt') into diffPeaksVersions when: noUniqueExperiments > 1 @@ -463,3 +474,29 @@ process diffPeaks { Rscript $baseDir/scripts/diff_peaks.R $designDiffPeaks """ } + +// Collect Software Versions +process softwareVersions { + + input: + + trimReadsVersions + alignReadsVersions + filterReadsVersions + convertReadsVersions + crossReadsVersions + callPeaksMACSVersions + consensusPeaksVersions + peakAnnotationVersions + motifSearchVersions + diffPeaksVersions + experimentQCVersions + + output: + file 'software_versions_mqc.yaml' into softwareVersions + + script: + """ + software_report.py > software_versions_mqc.yaml + """ +} diff --git a/workflow/scripts/annotate_peaks.R b/workflow/scripts/annotate_peaks.R index 4bc8d79..ff826b6 100644 --- a/workflow/scripts/annotate_peaks.R +++ b/workflow/scripts/annotate_peaks.R @@ -35,6 +35,11 @@ if(genome_assembly=='GRCh37') { annodb <- 'org.Hs.eg.db' } +# Output version of ChIPseeker +chipseeker_version = packageVersion('ChIPseeker') +write.table(paste("Version", chipseeker_version), file = "version_ChIPseeker.txt", sep = "\t", + row.names = FALSE, col.names = FALSE) + # Load design file design <- read.csv(design_file, sep ='\t') files <- as.list(as.character(design$Peaks)) diff --git a/workflow/scripts/call_peaks_macs.py b/workflow/scripts/call_peaks_macs.py index 17b1414..38e03cc 100644 --- a/workflow/scripts/call_peaks_macs.py +++ b/workflow/scripts/call_peaks_macs.py @@ -5,6 +5,7 @@ import os import argparse import shutil +import subprocess import logging import utils from xcor import xcor as calculate_xcor @@ -69,16 +70,19 @@ def check_tools(): logger.info('Checking for required libraries and components on this system') - r_path = shutil.which("R") - if r_path: - logger.info('Found R: %s', r_path) - else: - logger.error('Missing R') - raise Exception('Missing R') - macs_path = shutil.which("macs2") if r_path: logger.info('Found MACS2: %s', macs_path) + + # Get Version + macs_version_command = "macs2 --version" + macs_version = subprocess.check_output(macs_version_command, shell=True, stderr=subprocess.STDOUT) + + # Write to file + macs_file = open("version_macs.txt", "wb") + macs_file.write(macs_version) + macs_file.close() + else: logger.error('Missing MACS2') raise Exception('Missing MACS2') @@ -86,6 +90,18 @@ def check_tools(): bg_bw_path = shutil.which("bedGraphToBigWig") if bg_bw_path: logger.info('Found bedGraphToBigWig: %s', bg_bw_path) + + # Get Version + bg_bw_version_command = "bedGraphToBigWig" + try: + subprocess.check_output(bg_bw_version_command, shell=True, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + bg_bw_version = e.output + + # Write to file + bg_bw_file = open("version_bedGraphToBigWig.txt", "wb") + bg_bw_file.write(bg_bw_version) + bg_bw_file.close() else: logger.error('Missing bedGraphToBigWig') raise Exception('Missing bedGraphToBigWig') @@ -93,6 +109,16 @@ def check_tools(): bedtools_path = shutil.which("bedtools") if bedtools_path: logger.info('Found bedtools: %s', bedtools_path) + + # Get Version + bedtools_version_command = "bedtools --version" + bedtools_version = subprocess.check_output(bedtools_version_command, shell=True) + + # Write to file + bedtools_file = open("version_bedtools.txt", "wb") + bedtools_file.write(bedtools_version) + bedtools_file.close() + else: logger.error('Missing bedtools') raise Exception('Missing bedtools') diff --git a/workflow/scripts/convert_reads.py b/workflow/scripts/convert_reads.py index bba7c6f..43fe5cf 100644 --- a/workflow/scripts/convert_reads.py +++ b/workflow/scripts/convert_reads.py @@ -54,6 +54,15 @@ def check_tools(): bedtools_path = shutil.which("bedtools") if bedtools_path: logger.info('Found bedtools: %s', bedtools_path) + + # Get Version + bedtools_version_command = "bedtools --version" + bedtools_version = subprocess.check_output(bedtools_version_command, shell=True) + + # Write to file + bedtools_file = open("version_bedtools.txt", "wb") + bedtools_file.write(bedtools_version) + bedtools_file.close() else: logger.error('Missing bedtools') raise Exception('Missing bedtools') @@ -61,6 +70,15 @@ def check_tools(): samtools_path = shutil.which("samtools") if samtools_path: logger.info('Found samtools: %s', samtools_path) + + # Get Version + samtools_version_command = "samtools --version" + samtools_version = subprocess.check_output(samtools_version_command, shell=True) + + # Write to file + samtools_file = open("version_samtools.txt", "wb") + samtools_file.write(samtools_version) + samtools_file.close() else: logger.error('Missing samtools') raise Exception('Missing samtools') diff --git a/workflow/scripts/diff_peaks.R b/workflow/scripts/diff_peaks.R index aae1d24..68fda56 100644 --- a/workflow/scripts/diff_peaks.R +++ b/workflow/scripts/diff_peaks.R @@ -11,6 +11,11 @@ if (length(args) != 1) { stop("Usage: diff_peaks.R annotate_design.tsv ", call.=FALSE) } +# Output version of DiffBind +diffibind_version = packageVersion('DiffBind') +write.table(paste("Version", diffibind_version), file = "version_DiffBind.txt", sep = "\t", + row.names = FALSE, col.names = FALSE) + # Build DBA object from design file data <- dba(sampleSheet=args[1]) data <- dba.count(data) diff --git a/workflow/scripts/experiment_qc.py b/workflow/scripts/experiment_qc.py index 466f847..cf22338 100644 --- a/workflow/scripts/experiment_qc.py +++ b/workflow/scripts/experiment_qc.py @@ -52,6 +52,15 @@ def check_tools(): deeptools_path = shutil.which("deeptools") if deeptools_path: logger.info('Found deeptools: %s', deeptools_path) + + # Get Version + deeptools_version_command = "deeptools --version" + deeptools_version = subprocess.check_output(deeptools_version_command, shell=True, stderr=subprocess.STDOUT) + + # Write to file + deeptools_file = open("version_deeptools.txt", "wb") + deeptools_file.write(deeptools_version) + deeptools_file.close() else: logger.error('Missing deeptools') raise Exception('Missing deeptools') diff --git a/workflow/scripts/map_qc.py b/workflow/scripts/map_qc.py index 920e009..ab63c42 100644 --- a/workflow/scripts/map_qc.py +++ b/workflow/scripts/map_qc.py @@ -62,6 +62,15 @@ def check_tools(): samtools_path = shutil.which("samtools") if samtools_path: logger.info('Found samtools: %s', samtools_path) + + # Get Version + samtools_version_command = "samtools --version" + samtools_version = subprocess.check_output(samtools_version_command, shell=True) + + # Write to file + samtools_file = open("version_samtools.txt", "wb") + samtools_file.write(samtools_version) + samtools_file.close() else: logger.error('Missing samtools') raise Exception('Missing samtools') @@ -69,6 +78,18 @@ def check_tools(): sambamba_path = shutil.which("sambamba") if sambamba_path: logger.info('Found sambamba: %s', sambamba_path) + + # Get Version + sambamba_version_command = "sambamba" + try: + subprocess.check_output(sambamba_version_command, shell=True, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + sambamba_version = e.output + + # Write to file + sambamba_file = open("version_sambamba.txt", "wb") + sambamba_file.write(sambamba_version) + sambamba_file.close() else: logger.error('Missing sambamba') raise Exception('Missing sambamba') @@ -76,6 +97,15 @@ def check_tools(): bedtools_path = shutil.which("bedtools") if bedtools_path: logger.info('Found bedtools: %s', bedtools_path) + + # Get Version + bedtools_version_command = "bedtools --version" + bedtools_version = subprocess.check_output(bedtools_version_command, shell=True) + + # Write to file + bedtools_file = open("version_bedtools.txt", "wb") + bedtools_file.write(bedtools_version) + bedtools_file.close() else: logger.error('Missing bedtools') raise Exception('Missing bedtools') diff --git a/workflow/scripts/map_reads.py b/workflow/scripts/map_reads.py index a1f8161..3ba41dc 100644 --- a/workflow/scripts/map_reads.py +++ b/workflow/scripts/map_reads.py @@ -70,6 +70,18 @@ def check_tools(): bwa_path = shutil.which("bwa") if bwa_path: logger.info('Found bwa: %s', bwa_path) + + # Get Version + bwa_version_command = "bwa" + try: + subprocess.check_output(bwa_version_command, shell=True, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + bwa_version = e.output + + # Write to file + bwa_file = open("version_bwa.txt", "wb") + bwa_file.write(bwa_version) + bwa_file.close() else: logger.error('Missing bwa') raise Exception('Missing bwa') @@ -77,6 +89,15 @@ def check_tools(): samtools_path = shutil.which("samtools") if samtools_path: logger.info('Found samtools: %s', samtools_path) + + # Get Version + samtools_version_command = "samtools --version" + samtools_version = subprocess.check_output(samtools_version_command, shell=True) + + # Write to file + samtools_file = open("version_samtools.txt", "wb") + samtools_file.write(samtools_version) + samtools_file.close() else: logger.error('Missing samtools') raise Exception('Missing samtools') diff --git a/workflow/scripts/motif_search.py b/workflow/scripts/motif_search.py index 02e316f..0c2e3f8 100644 --- a/workflow/scripts/motif_search.py +++ b/workflow/scripts/motif_search.py @@ -6,6 +6,7 @@ import os import argparse import logging import shutil +import subprocess from multiprocessing import Pool import pandas as pd import utils @@ -63,6 +64,15 @@ def check_tools(): meme_path = shutil.which("meme") if meme_path: logger.info('Found meme: %s', meme_path) + + # Get Version + memechip_version_command = "meme-chip --version" + memechip_version = subprocess.check_output(memechip_version_command, shell=True) + + # Write to file + meme_file = open("version_memechip.txt", "wb") + meme_file.write("Version %s" % (memechip_version)) + meme_file.close() else: logger.error('Missing meme') raise Exception('Missing meme') @@ -70,6 +80,15 @@ def check_tools(): bedtools_path = shutil.which("bedtools") if bedtools_path: logger.info('Found bedtools: %s', bedtools_path) + + # Get Version + bedtools_version_command = "bedtools --version" + bedtools_version = subprocess.check_output(bedtools_version_command, shell=True) + + # Write to file + bedtools_file = open("version_bedtools.txt", "wb") + bedtools_file.write(bedtools_version) + bedtools_file.close() else: logger.error('Missing bedtools') raise Exception('Missing bedtools') diff --git a/workflow/scripts/overlap_peaks.py b/workflow/scripts/overlap_peaks.py index 61f0c2e..71438f7 100644 --- a/workflow/scripts/overlap_peaks.py +++ b/workflow/scripts/overlap_peaks.py @@ -49,6 +49,15 @@ def check_tools(): bedtools_path = shutil.which("bedtools") if bedtools_path: logger.info('Found bedtools: %s', bedtools_path) + + # Get Version + bedtools_version_command = "bedtools --version" + bedtools_version = subprocess.check_output(bedtools_version_command, shell=True) + + # Write to file + bedtools_file = open("version_bedtools.txt", "wb") + bedtools_file.write(bedtools_version) + bedtools_file.close() else: logger.error('Missing bedtools') raise Exception('Missing bedtools') diff --git a/workflow/scripts/software_report.py b/workflow/scripts/software_report.py new file mode 100644 index 0000000..1b0a3b8 --- /dev/null +++ b/workflow/scripts/software_report.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from __future__ import print_function +from collections import OrderedDict +import re + +regexes = { + 'Trim Galore!': ['version_trimgalore.txt', r"version (\S+)"], + 'Cutadapt': ['version_cutadapt.txt', r"Version (\S+)"], + 'BWA': ['version_bwa.txt', r"Version: (\S+)"], + 'Samtools': ['version_samtools.txt', r"samtools (\S+)"], + 'Sambamba': ['version_sambamba.txt', r"sambamba (\S+)"], + 'BEDTools': ['version_bedtools.txt', r"bedtools v(\S+)"], + 'R': ['version_r.txt', r"R version (\S+)"], + 'SPP': ['version_spp.txt', r"\[1\] ‘(1.14)’"], + 'MACS2': ['version_macs.txt', r"macs2 (\S+)"], + 'bedGraphToBigWig': ['version_bedGraphToBigWig.txt', r"bedGraphToBigWig v (\S+)"], + 'ChIPseeker': ['version_ChIPseeker.txt', r"Version (\S+)\""], + 'MEME-ChIP': ['version_memechip.txt', r"Version (\S+)"], + 'DiffBind': ['version_DiffBind.txt', r"Version (\S+)"], + 'deepTools': ['version_deeptools.txt', r"deeptools (\S+)"], +} + +results = OrderedDict() +results['Trim Galore!'] = '<span style="color:#999999;\">N/A</span>' +results['Cutadapt'] = '<span style="color:#999999;\">N/A</span>' +results['BWA'] = '<span style="color:#999999;\">N/A</span>' +results['Trim Galore!'] = '<span style="color:#999999;\">N/A</span>' +results['Cutadapt'] = '<span style="color:#999999;\">N/A</span>' +results['BWA'] = '<span style="color:#999999;\">N/A</span>' +results['Samtools'] = '<span style="color:#999999;\">N/A</span>' +results['Sambamba'] = '<span style="color:#999999;\">N/A</span>' +results['BEDTools'] = '<span style="color:#999999;\">N/A</span>' +results['R'] = '<span style="color:#999999;\">N/A</span>' +results['SPP'] = '<span style="color:#999999;\">N/A</span>' +results['MACS2'] = '<span style="color:#999999;\">N/A</span>' +results['bedGraphToBigWig'] = '<span style="color:#999999;\">N/A</span>' +results['ChIPseeker'] = '<span style="color:#999999;\">N/A</span>' +results['MEME-ChIP'] = '<span style="color:#999999;\">N/A</span>' +results['DiffBind'] = '<span style="color:#999999;\">N/A</span>' +results['deepTools'] = '<span style="color:#999999;\">N/A</span>' + +# Search each file using its regex +for k, v in regexes.items(): + with open(v[0]) as x: + versions = x.read() + match = re.search(v[1], versions) + if match: + results[k] = "v{}".format(match.group(1)) + +# Dump to YAML +print( + ''' + id: 'BICF ChIP-seq Analaysis' + section_name: 'BICF ChIP-seq Analaysis Software Versions' + section_href: 'https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/' + plot_type: 'html' + description: 'are collected at run time from the software output.' + data: | + <dl class="dl-horizontal"> + ''' +) +for k,v in results.items(): + print(" <dt>{}</dt><dd>{}</dd>".format(k,v)) +print(" </dl>") diff --git a/workflow/scripts/trim_reads.py b/workflow/scripts/trim_reads.py index e31ec93..e4461d5 100644 --- a/workflow/scripts/trim_reads.py +++ b/workflow/scripts/trim_reads.py @@ -54,6 +54,15 @@ def check_tools(): trimgalore_path = shutil.which("trim_galore") if trimgalore_path: logger.info('Found trimgalore: %s', trimgalore_path) + + # Get Version + trim_version_command = "trim_galore --version" + trimgalore_version = subprocess.check_output(trim_version_command, shell=True) + + # Write to file + trimgalore_file = open("version_trimgalore.txt", "wb") + trimgalore_file.write(trimgalore_version) + trimgalore_file.close() else: logger.error('Missing trimgalore') raise Exception('Missing trimgalore') @@ -61,6 +70,15 @@ def check_tools(): cutadapt_path = shutil.which("cutadapt") if cutadapt_path: logger.info('Found cutadapt: %s', cutadapt_path) + + # Get Version + cutadapt_version_command = "cutadapt --version" + cutadapt_version = subprocess.check_output(cutadapt_version_command, shell=True) + + # Write to file + cutadapt_file = open("version_cutadapt.txt", "wb") + cutadapt_file.write("Version %s" % (cutadapt_version)) + cutadapt_file.close() else: logger.error('Missing cutadapt') raise Exception('Missing cutadapt') diff --git a/workflow/scripts/xcor.py b/workflow/scripts/xcor.py index ac2ff65..eddfc0d 100644 --- a/workflow/scripts/xcor.py +++ b/workflow/scripts/xcor.py @@ -5,6 +5,7 @@ import os import argparse import shutil +import subprocess import logging from multiprocessing import cpu_count import utils @@ -59,10 +60,35 @@ def check_tools(): r_path = shutil.which("R") if r_path: logger.info('Found R: %s', r_path) + + # Get Version + r_version_command = "R --version" + r_version = subprocess.check_output(r_version_command, shell=True) + + # Write to file + r_file = open("version_r.txt", "wb") + r_file.write(r_version) + r_file.close() else: logger.error('Missing R') raise Exception('Missing R') + phantompeak_path = shutil.which("run_spp.R") + if phantompeak_path: + logger.info('Found phantompeak: %s', phantompeak_path) + + # Get Version + spp_version_command = "R -e \"packageVersion('spp')\"" + spp_version = subprocess.check_output(spp_version_command, shell=True, stderr=subprocess.STDOUT) + + # Write to file + spp_file = open("version_spp.txt", "wb") + spp_file.write(spp_version) + spp_file.close() + else: + logger.error('Missing phantompeak') + raise Exception('Missing phantompeak') + def xcor(tag, paired): '''Use spp to calculate cross-correlation stats.''' -- GitLab