From a7cb17d80642890ac3c22e75ba9b967e7b13c837 Mon Sep 17 00:00:00 2001 From: Venkat Malladi <venkat.malladi@utsouthwestern.edu> Date: Sun, 21 Apr 2019 11:37:47 -0500 Subject: [PATCH] Add in multiqc reports. --- docs/references.md | 3 + workflow/conf/biohpc.config | 4 +- workflow/conf/multiqc_config.yaml | 106 ++++++++++++++---- workflow/main.nf | 39 ++++--- workflow/scripts/generate_versions.py | 2 + .../test_generate_software_references.py | 2 +- .../tests/test_generate_software_versions.py | 2 +- 7 files changed, 118 insertions(+), 40 deletions(-) diff --git a/docs/references.md b/docs/references.md index 6998a59..a5eba7d 100644 --- a/docs/references.md +++ b/docs/references.md @@ -47,3 +47,6 @@ 15. **DiffBind**: * Stark R., and G. Brown. 2011. DiffBind: differential binding analysis of ChIP-Seq peak data. [http://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf](http://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf). doi:[10.18129/B9.bioc.DiffBind](https://dx.doi.org/10.18129/B9.bioc.DiffBind) * Ross-Innes C. S., R. Stark, A. E. Teschendorff, K. A. Holmes, H. R. Ali, M. J. Dunning, G. D. Brown, O. Gojis, I. O. Ellis, A. R. Green, S. Ali, S. Chin, C. Palmieri, C. Caldas, and J. S. Carroll. 2012. Differential oestrogen receptor binding is associated with clinical outcome in breast cancer. Nature 481: 389-393. doi:[10.1038/nature10730](https://dx.doi.org/10.1038/nature10730) + +16. **MultiQc**: + * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354 ](https://dx.doi.org/10.1093/bioinformatics/btw354) diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index 5d388c6..6e2e885 100644 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -60,8 +60,8 @@ process { module = ['python/3.6.1-2-anaconda', 'meme/4.11.1-gcc-openmpi', 'bedtools/2.26.0'] cpus = 32 } - withName: softwareReport { - module = ['python/3.6.1-2-anaconda', 'pandoc/2.7'] + withName: multiqcReport { + module = ['python/3.6.1-2-anaconda', 'pandoc/2.7', 'multiqc/1.7'] executor = 'local' } } diff --git a/workflow/conf/multiqc_config.yaml b/workflow/conf/multiqc_config.yaml index cd40e9c..bab17e5 100644 --- a/workflow/conf/multiqc_config.yaml +++ b/workflow/conf/multiqc_config.yaml @@ -2,36 +2,96 @@ title: BICF ChIP-seq Analysis Report report_comment: > - This report has been generated by the <a href="https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/" target="_blank">BICF/chipseq_analysis</a> - pipeline. - -report_section_order: - software_versions: - order: -1000 - -report_section_order: - software_references: - order: -1000 - + This report has been generated by the <a href="https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/" target="_blank">BICF/chipseq_analysis</a> + pipeline. extra_fn_clean_exts: - - '_R1' - - '_R2' - - 'pbc.qc' + - 'pbc.qc' + - 'cc.qc' fn_ignore_files: - - '*dedup.flagstat.qc' + - '*dedup.flagstat.qc' custom_data: - library_complexity: - file_format: 'tsv' - id: 'library_complexity' - contents: 'TotalReadPairs DistinctReadPairs OneReadPair TwoReadPairs NRF PBC1 PBC2' - section_name: 'Library complexity' - plot_type: 'generalstats' + library_complexity: + file_format: 'tsv' + id: 'library_complexity' + contents: 'TotalReadPairs DistinctReadPairs OneReadPair TwoReadPairs NRF PBC1 PBC2' + section_name: 'Library complexity' + plot_type: 'generalstats' + pconfig: + TotalReadPairs: + decimalPlaces: 0 + shared_key: read_count + DistinctReadPairs: + decimalPlaces: 0 + shared_key: read_count + NRF: + decimalPlaces: 2 + PBC1: + decimalPlaces: 2 + PBC2: + decimalPlaces: 2 sp: phantompeakqualtools/out: - fn: '*cc.qc' + fn: '*cc.qc' + library_complexity: + fn: '*pbc.qc' + macs2: + fn: '*_peaks.xls' + + +report_section_order: + cutadapt: + order: -1000 + Samtools: + order: -1100 + Software_Versions: + order: -1200 + Software_References: + order: -1300 + +table_columns_placement: library_complexity: - fn: '*pbc.qc' + TotalReadPairs: 1100 + DistinctReadPairs: 1200 + NRF: 1300 + PBC1: 1400 + PBC2: 1500 + phantompeakqualtools: + Estimated_Fragment_Length_bp: 1600 + NSC: 1700 + RSC: 1800 + +table_columns_visible: + cutadapt: + percent_trimmed: False + library_complexity: + OneReadPair: False + TwoReadPairs: False + +table_cond_formatting_rules: + library_complexity_mqc-generalstats-library_complexity-NRF: + pass: + - gt: 0.8 + warn: + - lt: 0.8 + fail: + - lt: 0.5 + library_complexity_mqc-generalstats-library_complexity-PBC1: + pass: + - gt: 0.8 + warn: + - lt: 0.8 + fail: + - lt: 0.5 + library_complexity_mqc-generalstats-library_complexity-PBC2: + pass: + - gt: 3 + warn: + - lt: 3 + fail: + - lt: 1 + +thousandsSep_format: '' diff --git a/workflow/main.nf b/workflow/main.nf index 85cce33..3045aaa 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -16,6 +16,7 @@ params.astrocyte = 'false' params.skipDiff = false params.skipMotif = false params.references = "$baseDir/../docs/references.md" +params.multiqc = "$baseDir/conf/multiqc_config.yaml" // Assign variables if astrocyte if (params.astrocyte) { @@ -67,6 +68,7 @@ topPeakCount = params.topPeakCount skipDiff = params.skipDiff skipMotif = params.skipMotif references = params.references +multiqc = params.multiqc if (params.pairedEnd == 'false'){ pairedEnd = false @@ -563,35 +565,46 @@ process diffPeaks { """ } -// Collect Software Versions and references -process softwareReport { +// Generate Multiqc Report, gerernate Software Versions and references +process multiqcReport { publishDir "$outDir/${task.process}", mode: 'copy' input: - file ('trimReads_vf/*') from trimReadsVersions.first() - file ('alignReads_vf/*') from alignReadsVersions.first() - file ('filterReads_vf/*') from filterReadsVersions.first() - file ('convertReads_vf/*') from convertReadsVersions.first() - file ('crossReads_vf/*') from crossReadsVersions.first() - file ('callPeaksMACS_vf/*') from callPeaksMACSVersions.first() - file ('consensusPeaks_vf/*') from consensusPeaksVersions.first() - file ('peakAnnotation_vf/*') from peakAnnotationVersions.first() - file ('motifSearch_vf/*') from motifSearchVersions.first().ifEmpty() - file ('diffPeaks_vf/*') from diffPeaksVersions.first().ifEmpty() - file ('experimentQC_vf/*') from experimentQCVersions.first() + file ('trimReads_vf/*') from trimReadsVersions.first() + file ('alignReads_vf/*') from alignReadsVersions.first() + file ('filterReads_vf/*') from filterReadsVersions.first() + file ('convertReads_vf/*') from convertReadsVersions.first() + file ('crossReads_vf/*') from crossReadsVersions.first() + file ('callPeaksMACS_vf/*') from callPeaksMACSVersions.first() + file ('consensusPeaks_vf/*') from consensusPeaksVersions.first() + file ('peakAnnotation_vf/*') from peakAnnotationVersions.first() + file ('motifSearch_vf/*') from motifSearchVersions.first().ifEmpty() + file ('diffPeaks_vf/*') from diffPeaksVersions.first().ifEmpty() + file ('experimentQC_vf/*') from experimentQCVersions.first() + file ('trimReads/*') from trimgaloreResults.collect() + file ('alignReads/*') from mappedReadsStats.collect() + file ('filterReads/*') from dedupReadsComplexity.collect() + file ('crossReads/*') from crossReadsStats.collect() output: file('software_versions_mqc.yaml') into softwareVersions file('software_references_mqc.yaml') into softwareReferences + file "multiqc_report.html" into multiqcReport + file "*_data" in multiqcData script: """ + module load python/3.6.1-2-anaconda + module load pandoc/2.7 + module load multiqc/1.7 echo $workflow.nextflow.version > version_nextflow.txt + multiqc --version > version_multiqc.txt python3 $baseDir/scripts/generate_references.py -r $references -o software_references python3 $baseDir/scripts/generate_versions.py -o software_versions + multiqc -c $multiqc . """ } diff --git a/workflow/scripts/generate_versions.py b/workflow/scripts/generate_versions.py index f6d3f62..661b266 100644 --- a/workflow/scripts/generate_versions.py +++ b/workflow/scripts/generate_versions.py @@ -40,6 +40,7 @@ SOFTWARE_REGEX = { 'MEME-ChIP': ['motifSearch_vf/version_memechip.txt', r"Version (\S+)"], 'DiffBind': ['diffPeaks_vf/version_DiffBind.txt', r"Version (\S+)\""], 'deepTools': ['experimentQC_vf/version_deeptools.txt', r"deeptools (\S+)"], + 'MultiQC': ['version_multiqc.txt', r"multiqc, version (\S+)"], } @@ -100,6 +101,7 @@ def main(): results['MEME-ChIP'] = '<span style="color:#999999;\">Not Run</span>' results['DiffBind'] = '<span style="color:#999999;\">Not Run</span>' results['deepTools'] = '<span style="color:#999999;\">Not Run</span>' + results['MultiQC'] = '<span style="color:#999999;\">Not Run</span>' # list all files files = glob.glob('**/*.txt', recursive=True) diff --git a/workflow/tests/test_generate_software_references.py b/workflow/tests/test_generate_software_references.py index a627887..0235f2e 100644 --- a/workflow/tests/test_generate_software_references.py +++ b/workflow/tests/test_generate_software_references.py @@ -6,7 +6,7 @@ import utils import yaml test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ - '/../output/softwareReport/' + '/../output/multiqcReport/' @pytest.mark.singleend diff --git a/workflow/tests/test_generate_software_versions.py b/workflow/tests/test_generate_software_versions.py index 276fc96..1048e49 100644 --- a/workflow/tests/test_generate_software_versions.py +++ b/workflow/tests/test_generate_software_versions.py @@ -6,7 +6,7 @@ import utils import yaml test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ - '/../output/softwareReport/' + '/../output/multiqcReport/' @pytest.mark.singleend -- GitLab