From d907644c4b2519676346818032dcee0a17d4dd61 Mon Sep 17 00:00:00 2001
From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu>
Date: Fri, 14 Aug 2020 11:06:26 -0500
Subject: [PATCH] Add Seurat version to report and fix Seurat itteration

---
 docs/references.md                    |  7 +++-
 workflow/configs/cluster.config       |  6 +--
 workflow/main.nf                      | 59 +++++++++++++++------------
 workflow/scripts/generate_versions.py |  2 +
 workflow/scripts/versions_seurat.sh   |  9 ++++
 5 files changed, 52 insertions(+), 31 deletions(-)
 create mode 100644 workflow/scripts/versions_seurat.sh

diff --git a/docs/references.md b/docs/references.md
index 37f42d8..ec7341a 100644
--- a/docs/references.md
+++ b/docs/references.md
@@ -1,7 +1,7 @@
 ### References
 
 1. **Nextflow**:
-  * Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820)
+  * Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature biotechnology, 35(4), 316-319. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820)
 
 2. **cellranger**
   * Cellranger count [https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count)
@@ -9,5 +9,8 @@
 3. **python**:
   * Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com))
 
-4. **MultiQc**:
+4. **Seurat**:
+  * Stuart, T., Butler, A., Hoffman, P., Hafemeister, C., Papalexi, E., Mauck III, W. M., ... & Satija, R. (2019). Comprehensive integration of single-cell data. Cell, 177(7), 1888-1902. doi:[10.1016/j.cell.2019.05.031](https://doi.org/10.1016/j.cell.2019.05.031)
+
+5. **MultiQc**:
   * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
diff --git a/workflow/configs/cluster.config b/workflow/configs/cluster.config
index 2043421..fe73734 100644
--- a/workflow/configs/cluster.config
+++ b/workflow/configs/cluster.config
@@ -24,13 +24,13 @@ process {
   withName: count400 {
     queue = '128GB,256GB,256GBv1,384GB'
   }
+  withName: downstreamViz {
+    queue = '32GB'
+  }
   withName: versions {
     executor = 'local'
   }
   withName: multiqc {
     executor = 'local'
   }
-  withName: downstreamViz {
-    executor = '32GB'
-  }
 }
diff --git a/workflow/main.nf b/workflow/main.nf
index b532aa1..b9d4fa0 100755
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -88,6 +88,7 @@ filename_checkScript = Channel.fromPath("$baseDir/scripts/filename_check.sh")
 generate_versionsScript = Channel.fromPath("$baseDir/scripts/generate_versions.py")
 generate_referencesScript = Channel.fromPath("$baseDir/scripts/generate_references.py")
 versions_pythonScript = Channel.fromPath("$baseDir/scripts/versions_python.sh")
+versions_seuratScript = Channel.fromPath("$baseDir/scripts/versions_seurat.sh")
 downstream_vizScript = Channel.fromPath("$baseDir/scripts/downstream_viz.r")
 
 // Define report files
@@ -373,6 +374,37 @@ process count400 {
     }
 }
 
+// Collect all outputs reguardless of cellranger version
+filteredOut = filteredOut211.mix(filteredOut302, filteredOut310, filteredOut400)
+// Combine all inputs for downstreamViz
+downstreamVizIn = downstream_vizScript.combine(versions_seuratScript).combine(filteredOut)
+
+/*
+ * downstreamViz: create files for downstream use (eg. R Seurat object)
+ */
+process downstreamViz {
+  tag "${sample}"
+  publishDir "${outDir}/${task.process}", mode: 'copy', pattern: "*.rds"
+  module 'seurat/3.0.0'
+
+  input:
+    set file("*"), file("*"), sample, file("filtered/*"), file("clustering/graphclust/*"), file("clustering/kmeans_2_clusters/*"), file("clustering/kmeans_3_clusters/*"), file("clustering/kmeans_4_clusters/*"), file("clustering/kmeans_5_clusters/*"), file("clustering/kmeans_6_clusters/*"), file("clustering/kmeans_7_clusters/*"), file("clustering/kmeans_8_clusters/*"), file("clustering/kmeans_9_clusters/*"), file("clustering/kmeans_10_clusters/*"), file("pca/*"), file("tsne/*"), file("umap/*") from downstreamVizIn
+    //file downstream_vizScript
+    //file versions_seuratScript
+
+  output:
+    file "*.rds" into seuratPaths
+    file "version_seurat.txt" into version_seurat
+
+  script:
+    """
+    hostname
+    ulimit -a
+    seurat-Rscript downstream_viz.r --sample ${sample} --cellrangerVersion ${version}
+    bash versions_seurat.sh > version_seurat.txt
+    """
+}
+
 /*
  * versions: collect all versions into a single yml
  */
@@ -382,6 +414,7 @@ process versions {
 
   input:
     file versions_pythonScript
+    file version_seurat
     file generate_versionsScript
     file generate_referencesScript
 
@@ -428,29 +461,3 @@ process multiqc {
     multiqc -c ${multiqcConf} .
     """
 }
-
-// Collect all outputs reguardless of cellranger version
-filteredOut = filteredOut211.mix(filteredOut302, filteredOut310, filteredOut400)
-
-/*
- * downstreamViz: create files for downstream use (eg. R Seurat object)
- */
-process downstreamViz {
-  tag "${sample}"
-  publishDir "${outDir}/${task.process}", mode: 'copy'
-  module 'seurat/3.0.0'
-
-  input:
-    file downstream_vizScript
-    set sample, file("filtered/*"), file("clustering/graphclust/*"), file("clustering/kmeans_2_clusters/*"), file("clustering/kmeans_3_clusters/*"), file("clustering/kmeans_4_clusters/*"), file("clustering/kmeans_5_clusters/*"), file("clustering/kmeans_6_clusters/*"), file("clustering/kmeans_7_clusters/*"), file("clustering/kmeans_8_clusters/*"), file("clustering/kmeans_9_clusters/*"), file("clustering/kmeans_10_clusters/*"), file("pca/*"), file("tsne/*"), file("umap/*") from filteredOut
-
-  output:
-    file "*.rds" into seuratPaths
-
-  script:
-    """
-    hostname
-    ulimit -a
-    seurat-Rscript downstream_viz.r --sample ${sample} --cellrangerVersion ${version}
-    """
-}
diff --git a/workflow/scripts/generate_versions.py b/workflow/scripts/generate_versions.py
index 978aedc..8faa1fe 100755
--- a/workflow/scripts/generate_versions.py
+++ b/workflow/scripts/generate_versions.py
@@ -28,6 +28,7 @@ SOFTWARE_REGEX = {
     'Nextflow': ['version_nextflow.txt', r"(\S+)"],
     'cellranger count': ['version_cellranger.txt', r"(\S+)"],
     'python': ['version_python.txt', r"(\S+)"],
+    'seurat': ['version_seurat.txt', r"(\S+)"],
 }
 
 
@@ -77,6 +78,7 @@ def main():
     results['Nextflow'] = '<span style="color:#999999;\">N/A</span>'
     results['cellranger count'] = '<span style="color:#999999;\">N/A</span>'
     results['python'] = '<span style="color:#999999;\">N/A</span>'
+    results['seurat'] = '<span style="color:#999999;\">N/A</span>'
 
     # Check for version files:
     check_files(files)
diff --git a/workflow/scripts/versions_seurat.sh b/workflow/scripts/versions_seurat.sh
new file mode 100644
index 0000000..e0a261a
--- /dev/null
+++ b/workflow/scripts/versions_seurat.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+#versions_python.sh
+#*
+#* --------------------------------------------------------------------------
+#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/LICENSE)
+#* --------------------------------------------------------------------------
+#*
+
+seurat-Rscript -e 'packageVersion("Seurat")' |& grep '\[1\] ' | sed -n -e 's/^\[1\] ‘//p' | tr -d '’'
-- 
GitLab