Add in multiqc reports.

a7cb17d8 · Venkat Malladi · a939b2d0 · a7cb17d8 · a7cb17d8 · a7cb17d8
Commit a7cb17d8 authored 6 years ago by Venkat Malladi
--- a/docs/references.md
+++ b/docs/references.md
@@ -47,3 +47,6 @@
 15. **DiffBind**:
  * Stark R., and G. Brown. 2011. DiffBind: differential binding analysis of ChIP-Seq peak data. [http://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf](http://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf). doi:[10.18129/B9.bioc.DiffBind](https://dx.doi.org/10.18129/B9.bioc.DiffBind)
  * Ross-Innes C. S., R. Stark, A. E. Teschendorff, K. A. Holmes, H. R. Ali, M. J. Dunning,  G. D. Brown, O. Gojis, I. O. Ellis, A. R. Green, S. Ali, S. Chin, C. Palmieri, C. Caldas, and J. S. Carroll. 2012. Differential oestrogen receptor binding is associated with clinical outcome in breast cancer. Nature 481: 389-393. doi:[10.1038/nature10730](https://dx.doi.org/10.1038/nature10730)
+16. **MultiQc**:
+  * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354 ](https://dx.doi.org/10.1093/bioinformatics/btw354)
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -60,8 +60,8 @@ process {
    module = ['python/3.6.1-2-anaconda', 'meme/4.11.1-gcc-openmpi', 'bedtools/2.26.0']
    cpus = 32
  }
-  withName: softwareReport {
+  withName: multiqcReport {
-    module = ['python/3.6.1-2-anaconda', 'pandoc/2.7']
+    module = ['python/3.6.1-2-anaconda', 'pandoc/2.7', 'multiqc/1.7']
    executor = 'local'
  }
 }

--- a/workflow/conf/multiqc_config.yaml
+++ b/workflow/conf/multiqc_config.yaml
@@ -2,36 +2,96 @@
 title: BICF ChIP-seq Analysis Report
 report_comment: >
-    This report has been generated by the <a href="https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/" target="_blank">BICF/chipseq_analysis</a>
+  This report has been generated by the <a href="https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/" target="_blank">BICF/chipseq_analysis</a>
-    pipeline.
+  pipeline.
-report_section_order:
-    software_versions:
-        order: -1000
-report_section_order:
-    software_references:
-        order: -1000
 extra_fn_clean_exts:
-    - '_R1'
+  - 'pbc.qc'
-    - '_R2'
+  - 'cc.qc'
-    - 'pbc.qc'
 fn_ignore_files:
-    - '*dedup.flagstat.qc'
+  - '*dedup.flagstat.qc'
 custom_data:
-    library_complexity:
+  library_complexity:
-      file_format: 'tsv'
+    file_format: 'tsv'
-      id: 'library_complexity'
+    id: 'library_complexity'
-      contents: 'TotalReadPairs  DistinctReadPairs       OneReadPair     TwoReadPairs    NRF     PBC1    PBC2'
+    contents: 'TotalReadPairs  DistinctReadPairs       OneReadPair     TwoReadPairs    NRF     PBC1    PBC2'
-      section_name: 'Library complexity'
+    section_name: 'Library complexity'
-      plot_type: 'generalstats'
+    plot_type: 'generalstats'
+    pconfig:
+        TotalReadPairs:
+          decimalPlaces: 0
+          shared_key: read_count
+        DistinctReadPairs:
+          decimalPlaces: 0
+          shared_key: read_count
+        NRF:
+          decimalPlaces: 2
+        PBC1:
+          decimalPlaces: 2
+        PBC2:
+          decimalPlaces: 2
 sp:
    phantompeakqualtools/out:
-        fn: '*cc.qc'
+      fn: '*cc.qc'
+    library_complexity:
+      fn: '*pbc.qc'
+    macs2:
+      fn: '*_peaks.xls'
+report_section_order:
+    cutadapt:
+      order: -1000
+    Samtools:
+      order: -1100
+    Software_Versions:
+      order: -1200
+    Software_References:
+      order: -1300
+table_columns_placement:
    library_complexity:
-        fn: '*pbc.qc'
+      TotalReadPairs: 1100
+      DistinctReadPairs: 1200
+      NRF: 1300
+      PBC1: 1400
+      PBC2: 1500
+    phantompeakqualtools:
+      Estimated_Fragment_Length_bp: 1600
+      NSC: 1700
+      RSC: 1800
+table_columns_visible:
+  cutadapt:
+    percent_trimmed: False
+  library_complexity:
+    OneReadPair: False
+    TwoReadPairs: False
+table_cond_formatting_rules:
+    library_complexity_mqc-generalstats-library_complexity-NRF:
+      pass:
+        - gt: 0.8
+      warn:
+        - lt: 0.8
+      fail:
+        - lt: 0.5
+    library_complexity_mqc-generalstats-library_complexity-PBC1:
+      pass:
+        - gt: 0.8
+      warn:
+        - lt: 0.8
+      fail:
+        - lt: 0.5
+    library_complexity_mqc-generalstats-library_complexity-PBC2:
+      pass:
+        - gt: 3
+      warn:
+        - lt: 3
+      fail:
+        - lt: 1
+thousandsSep_format: ''
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -16,6 +16,7 @@ params.astrocyte = 'false'
 params.skipDiff = false
 params.skipMotif = false
 params.references = "$baseDir/../docs/references.md"
+params.multiqc =  "$baseDir/conf/multiqc_config.yaml"
 // Assign variables if astrocyte
 if (params.astrocyte) {
@@ -67,6 +68,7 @@ topPeakCount = params.topPeakCount
 skipDiff = params.skipDiff
 skipMotif = params.skipMotif
 references = params.references
+multiqc = params.multiqc
 if (params.pairedEnd == 'false'){
  pairedEnd = false
@@ -563,35 +565,46 @@ process diffPeaks {
  """
 }
-// Collect Software Versions and references
+// Generate Multiqc Report, gerernate Software Versions and references
-process softwareReport {
+process multiqcReport {
  publishDir "$outDir/${task.process}", mode: 'copy'
  input:
-    file ('trimReads_vf/*') from trimReadsVersions.first()
+  file ('trimReads_vf/*') from trimReadsVersions.first()
-    file ('alignReads_vf/*') from alignReadsVersions.first()
+  file ('alignReads_vf/*') from alignReadsVersions.first()
-    file ('filterReads_vf/*') from filterReadsVersions.first()
+  file ('filterReads_vf/*') from filterReadsVersions.first()
-    file ('convertReads_vf/*') from convertReadsVersions.first()
+  file ('convertReads_vf/*') from convertReadsVersions.first()
-    file ('crossReads_vf/*') from crossReadsVersions.first()
+  file ('crossReads_vf/*') from crossReadsVersions.first()
-    file ('callPeaksMACS_vf/*') from callPeaksMACSVersions.first()
+  file ('callPeaksMACS_vf/*') from callPeaksMACSVersions.first()
-    file ('consensusPeaks_vf/*') from consensusPeaksVersions.first()
+  file ('consensusPeaks_vf/*') from consensusPeaksVersions.first()
-    file ('peakAnnotation_vf/*') from peakAnnotationVersions.first()
+  file ('peakAnnotation_vf/*') from peakAnnotationVersions.first()
-    file ('motifSearch_vf/*') from motifSearchVersions.first().ifEmpty()
+  file ('motifSearch_vf/*') from motifSearchVersions.first().ifEmpty()
-    file ('diffPeaks_vf/*') from diffPeaksVersions.first().ifEmpty()
+  file ('diffPeaks_vf/*') from diffPeaksVersions.first().ifEmpty()
-    file ('experimentQC_vf/*') from experimentQCVersions.first()
+  file ('experimentQC_vf/*') from experimentQCVersions.first()
+  file ('trimReads/*') from trimgaloreResults.collect()
+  file ('alignReads/*') from mappedReadsStats.collect()
+  file ('filterReads/*') from dedupReadsComplexity.collect()
+  file ('crossReads/*') from crossReadsStats.collect()
  output:
  file('software_versions_mqc.yaml') into softwareVersions
  file('software_references_mqc.yaml') into softwareReferences
+  file "multiqc_report.html" into multiqcReport
+  file "*_data" in multiqcData
  script:
  """
+  module load python/3.6.1-2-anaconda
+  module load pandoc/2.7
+  module load multiqc/1.7
  echo $workflow.nextflow.version > version_nextflow.txt
+  multiqc --version > version_multiqc.txt
  python3 $baseDir/scripts/generate_references.py -r $references -o software_references
  python3 $baseDir/scripts/generate_versions.py -o software_versions
+  multiqc -c $multiqc .
  """
 }
--- a/workflow/scripts/generate_versions.py
+++ b/workflow/scripts/generate_versions.py
@@ -40,6 +40,7 @@ SOFTWARE_REGEX = {
    'MEME-ChIP': ['motifSearch_vf/version_memechip.txt', r"Version (\S+)"],
    'DiffBind': ['diffPeaks_vf/version_DiffBind.txt', r"Version (\S+)\""],
    'deepTools': ['experimentQC_vf/version_deeptools.txt', r"deeptools (\S+)"],
+    'MultiQC': ['version_multiqc.txt', r"multiqc, version (\S+)"],
 }
@@ -100,6 +101,7 @@ def main():
    results['MEME-ChIP'] = '<span style="color:#999999;\">Not Run</span>'
    results['DiffBind'] = '<span style="color:#999999;\">Not Run</span>'
    results['deepTools'] = '<span style="color:#999999;\">Not Run</span>'
+    results['MultiQC'] = '<span style="color:#999999;\">Not Run</span>'
    # list all files
    files = glob.glob('**/*.txt', recursive=True)

--- a/workflow/tests/test_generate_software_references.py
+++ b/workflow/tests/test_generate_software_references.py
@@ -6,7 +6,7 @@ import utils
 import yaml
 test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
-                '/../output/softwareReport/'
+                '/../output/multiqcReport/'
 @pytest.mark.singleend

--- a/workflow/tests/test_generate_software_versions.py
+++ b/workflow/tests/test_generate_software_versions.py
@@ -6,7 +6,7 @@ import utils
 import yaml
 test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
-                '/../output/softwareReport/'
+                '/../output/multiqcReport/'
 @pytest.mark.singleend