From a7cb17d80642890ac3c22e75ba9b967e7b13c837 Mon Sep 17 00:00:00 2001
From: Venkat Malladi <venkat.malladi@utsouthwestern.edu>
Date: Sun, 21 Apr 2019 11:37:47 -0500
Subject: [PATCH] Add in multiqc reports.

---
 docs/references.md                            |   3 +
 workflow/conf/biohpc.config                   |   4 +-
 workflow/conf/multiqc_config.yaml             | 106 ++++++++++++++----
 workflow/main.nf                              |  39 ++++---
 workflow/scripts/generate_versions.py         |   2 +
 .../test_generate_software_references.py      |   2 +-
 .../tests/test_generate_software_versions.py  |   2 +-
 7 files changed, 118 insertions(+), 40 deletions(-)

diff --git a/docs/references.md b/docs/references.md
index 6998a59..a5eba7d 100644
--- a/docs/references.md
+++ b/docs/references.md
@@ -47,3 +47,6 @@
 15. **DiffBind**:
   * Stark R., and G. Brown. 2011. DiffBind: differential binding analysis of ChIP-Seq peak data. [http://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf](http://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf). doi:[10.18129/B9.bioc.DiffBind](https://dx.doi.org/10.18129/B9.bioc.DiffBind)
   * Ross-Innes C. S., R. Stark, A. E. Teschendorff, K. A. Holmes, H. R. Ali, M. J. Dunning,  G. D. Brown, O. Gojis, I. O. Ellis, A. R. Green, S. Ali, S. Chin, C. Palmieri, C. Caldas, and J. S. Carroll. 2012. Differential oestrogen receptor binding is associated with clinical outcome in breast cancer. Nature 481: 389-393. doi:[10.1038/nature10730](https://dx.doi.org/10.1038/nature10730)
+
+16. **MultiQc**:
+  * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354 ](https://dx.doi.org/10.1093/bioinformatics/btw354)
diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config
index 5d388c6..6e2e885 100644
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -60,8 +60,8 @@ process {
     module = ['python/3.6.1-2-anaconda', 'meme/4.11.1-gcc-openmpi', 'bedtools/2.26.0']
     cpus = 32
   }
-  withName: softwareReport {
-    module = ['python/3.6.1-2-anaconda', 'pandoc/2.7']
+  withName: multiqcReport {
+    module = ['python/3.6.1-2-anaconda', 'pandoc/2.7', 'multiqc/1.7']
     executor = 'local'
   }
 }
diff --git a/workflow/conf/multiqc_config.yaml b/workflow/conf/multiqc_config.yaml
index cd40e9c..bab17e5 100644
--- a/workflow/conf/multiqc_config.yaml
+++ b/workflow/conf/multiqc_config.yaml
@@ -2,36 +2,96 @@
 title: BICF ChIP-seq Analysis Report
 
 report_comment: >
-    This report has been generated by the <a href="https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/" target="_blank">BICF/chipseq_analysis</a>
-    pipeline.
-
-report_section_order:
-    software_versions:
-        order: -1000
-
-report_section_order:
-    software_references:
-        order: -1000
-
+  This report has been generated by the <a href="https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/" target="_blank">BICF/chipseq_analysis</a>
+  pipeline.
 
 extra_fn_clean_exts:
-    - '_R1'
-    - '_R2'
-    - 'pbc.qc'
+  - 'pbc.qc'
+  - 'cc.qc'
 
 fn_ignore_files:
-    - '*dedup.flagstat.qc'
+  - '*dedup.flagstat.qc'
 
 custom_data:
-    library_complexity:
-      file_format: 'tsv'
-      id: 'library_complexity'
-      contents: 'TotalReadPairs  DistinctReadPairs       OneReadPair     TwoReadPairs    NRF     PBC1    PBC2'
-      section_name: 'Library complexity'
-      plot_type: 'generalstats'
+  library_complexity:
+    file_format: 'tsv'
+    id: 'library_complexity'
+    contents: 'TotalReadPairs  DistinctReadPairs       OneReadPair     TwoReadPairs    NRF     PBC1    PBC2'
+    section_name: 'Library complexity'
+    plot_type: 'generalstats'
+    pconfig:
+        TotalReadPairs:
+          decimalPlaces: 0
+          shared_key: read_count
+        DistinctReadPairs:
+          decimalPlaces: 0
+          shared_key: read_count
+        NRF:
+          decimalPlaces: 2
+        PBC1:
+          decimalPlaces: 2
+        PBC2:
+          decimalPlaces: 2
 
 sp:
     phantompeakqualtools/out:
-        fn: '*cc.qc'
+      fn: '*cc.qc'
+    library_complexity:
+      fn: '*pbc.qc'
+    macs2:
+      fn: '*_peaks.xls'
+
+
+report_section_order:
+    cutadapt:
+      order: -1000
+    Samtools:
+      order: -1100
+    Software_Versions:
+      order: -1200
+    Software_References:
+      order: -1300
+
+table_columns_placement:
     library_complexity:
-        fn: '*pbc.qc'
+      TotalReadPairs: 1100
+      DistinctReadPairs: 1200
+      NRF: 1300
+      PBC1: 1400
+      PBC2: 1500
+    phantompeakqualtools:
+      Estimated_Fragment_Length_bp: 1600
+      NSC: 1700
+      RSC: 1800
+
+table_columns_visible:
+  cutadapt:
+    percent_trimmed: False
+  library_complexity:
+    OneReadPair: False
+    TwoReadPairs: False
+
+table_cond_formatting_rules:
+    library_complexity_mqc-generalstats-library_complexity-NRF:
+      pass:
+        - gt: 0.8
+      warn:
+        - lt: 0.8
+      fail:
+        - lt: 0.5
+    library_complexity_mqc-generalstats-library_complexity-PBC1:
+      pass:
+        - gt: 0.8
+      warn:
+        - lt: 0.8
+      fail:
+        - lt: 0.5
+    library_complexity_mqc-generalstats-library_complexity-PBC2:
+      pass:
+        - gt: 3
+      warn:
+        - lt: 3
+      fail:
+        - lt: 1
+
+thousandsSep_format: ''
diff --git a/workflow/main.nf b/workflow/main.nf
index 85cce33..3045aaa 100644
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -16,6 +16,7 @@ params.astrocyte = 'false'
 params.skipDiff = false
 params.skipMotif = false
 params.references = "$baseDir/../docs/references.md"
+params.multiqc =  "$baseDir/conf/multiqc_config.yaml"
 
 // Assign variables if astrocyte
 if (params.astrocyte) {
@@ -67,6 +68,7 @@ topPeakCount = params.topPeakCount
 skipDiff = params.skipDiff
 skipMotif = params.skipMotif
 references = params.references
+multiqc = params.multiqc
 
 if (params.pairedEnd == 'false'){
   pairedEnd = false
@@ -563,35 +565,46 @@ process diffPeaks {
   """
 }
 
-// Collect Software Versions and references
-process softwareReport {
+// Generate Multiqc Report, gerernate Software Versions and references
+process multiqcReport {
 
   publishDir "$outDir/${task.process}", mode: 'copy'
 
   input:
 
-    file ('trimReads_vf/*') from trimReadsVersions.first()
-    file ('alignReads_vf/*') from alignReadsVersions.first()
-    file ('filterReads_vf/*') from filterReadsVersions.first()
-    file ('convertReads_vf/*') from convertReadsVersions.first()
-    file ('crossReads_vf/*') from crossReadsVersions.first()
-    file ('callPeaksMACS_vf/*') from callPeaksMACSVersions.first()
-    file ('consensusPeaks_vf/*') from consensusPeaksVersions.first()
-    file ('peakAnnotation_vf/*') from peakAnnotationVersions.first()
-    file ('motifSearch_vf/*') from motifSearchVersions.first().ifEmpty()
-    file ('diffPeaks_vf/*') from diffPeaksVersions.first().ifEmpty()
-    file ('experimentQC_vf/*') from experimentQCVersions.first()
+  file ('trimReads_vf/*') from trimReadsVersions.first()
+  file ('alignReads_vf/*') from alignReadsVersions.first()
+  file ('filterReads_vf/*') from filterReadsVersions.first()
+  file ('convertReads_vf/*') from convertReadsVersions.first()
+  file ('crossReads_vf/*') from crossReadsVersions.first()
+  file ('callPeaksMACS_vf/*') from callPeaksMACSVersions.first()
+  file ('consensusPeaks_vf/*') from consensusPeaksVersions.first()
+  file ('peakAnnotation_vf/*') from peakAnnotationVersions.first()
+  file ('motifSearch_vf/*') from motifSearchVersions.first().ifEmpty()
+  file ('diffPeaks_vf/*') from diffPeaksVersions.first().ifEmpty()
+  file ('experimentQC_vf/*') from experimentQCVersions.first()
+  file ('trimReads/*') from trimgaloreResults.collect()
+  file ('alignReads/*') from mappedReadsStats.collect()
+  file ('filterReads/*') from dedupReadsComplexity.collect()
+  file ('crossReads/*') from crossReadsStats.collect()
 
   output:
 
   file('software_versions_mqc.yaml') into softwareVersions
   file('software_references_mqc.yaml') into softwareReferences
+  file "multiqc_report.html" into multiqcReport
+  file "*_data" in multiqcData
 
   script:
 
   """
+  module load python/3.6.1-2-anaconda
+  module load pandoc/2.7
+  module load multiqc/1.7
   echo $workflow.nextflow.version > version_nextflow.txt
+  multiqc --version > version_multiqc.txt
   python3 $baseDir/scripts/generate_references.py -r $references -o software_references
   python3 $baseDir/scripts/generate_versions.py -o software_versions
+  multiqc -c $multiqc .
   """
 }
diff --git a/workflow/scripts/generate_versions.py b/workflow/scripts/generate_versions.py
index f6d3f62..661b266 100644
--- a/workflow/scripts/generate_versions.py
+++ b/workflow/scripts/generate_versions.py
@@ -40,6 +40,7 @@ SOFTWARE_REGEX = {
     'MEME-ChIP': ['motifSearch_vf/version_memechip.txt', r"Version (\S+)"],
     'DiffBind': ['diffPeaks_vf/version_DiffBind.txt', r"Version (\S+)\""],
     'deepTools': ['experimentQC_vf/version_deeptools.txt', r"deeptools (\S+)"],
+    'MultiQC': ['version_multiqc.txt', r"multiqc, version (\S+)"],
 }
 
 
@@ -100,6 +101,7 @@ def main():
     results['MEME-ChIP'] = '<span style="color:#999999;\">Not Run</span>'
     results['DiffBind'] = '<span style="color:#999999;\">Not Run</span>'
     results['deepTools'] = '<span style="color:#999999;\">Not Run</span>'
+    results['MultiQC'] = '<span style="color:#999999;\">Not Run</span>'
 
     # list all files
     files = glob.glob('**/*.txt', recursive=True)
diff --git a/workflow/tests/test_generate_software_references.py b/workflow/tests/test_generate_software_references.py
index a627887..0235f2e 100644
--- a/workflow/tests/test_generate_software_references.py
+++ b/workflow/tests/test_generate_software_references.py
@@ -6,7 +6,7 @@ import utils
 import yaml
 
 test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
-                '/../output/softwareReport/'
+                '/../output/multiqcReport/'
 
 
 @pytest.mark.singleend
diff --git a/workflow/tests/test_generate_software_versions.py b/workflow/tests/test_generate_software_versions.py
index 276fc96..1048e49 100644
--- a/workflow/tests/test_generate_software_versions.py
+++ b/workflow/tests/test_generate_software_versions.py
@@ -6,7 +6,7 @@ import utils
 import yaml
 
 test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
-                '/../output/softwareReport/'
+                '/../output/multiqcReport/'
 
 
 @pytest.mark.singleend
-- 
GitLab