From 5677b2e2c3440fc6d05b4c497eb97d5b90b41142 Mon Sep 17 00:00:00 2001
From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu>
Date: Sun, 21 Jun 2020 13:50:56 -0500
Subject: [PATCH] Major cleanup update

---
 .gitlab-ci.yml                        | 18 ++++++-------
 README.md                             |  4 +--
 docs/references.md                    | 12 ++++-----
 workflow/conf/aws.config              |  3 +--
 workflow/conf/ondemand.config         |  3 +++
 workflow/conf/spot.config             |  3 +++
 workflow/main.nf                      | 38 +++++++++++++++++----------
 workflow/nextflow.config              | 19 ++++++++++----
 workflow/scripts/generate_versions.py | 12 +++++----
 workflow/scripts/versions_python.sh   |  9 +++++++
 10 files changed, 78 insertions(+), 43 deletions(-)
 create mode 100644 workflow/conf/ondemand.config
 create mode 100644 workflow/conf/spot.config
 create mode 100644 workflow/scripts/versions_python.sh

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 19d9073..598e601 100755
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -3,7 +3,7 @@ before_script:
   - module load python/3.6.1-2-anaconda
   - pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1
   - module load singularity/3.0.2
-  - module load nextflow/19.09.0
+  - module load nextflow/20.01.0
   - mkdir -p test_data/hu.v2s1r500
   - mkdir -p test_data/hu.v3s1r500
   - mkdir -p test_data/mu.v3s1r500
@@ -30,7 +30,7 @@ astrocyte_cli:
   artifacts:
     expire_in: 2 days
   retry:
-    max: 1
+    max: 0
     when:
       - always
 
@@ -52,7 +52,7 @@ astrocyte_cli:
       - test/outs/web_summary.html
     expire_in: 2 days
   retry:
-    max: 1
+    max: 0
     when:
       - always
 
@@ -74,7 +74,7 @@ astrocyte_cli:
       - test/outs/web_summary.html
     expire_in: 2 days
   retry:
-    max: 1
+    max: 0
     when:
       - always
 
@@ -96,7 +96,7 @@ astrocyte_cli:
       - test/outs/web_summary.html
     expire_in: 2 days
   retry:
-    max: 1
+    max: 0
     when:
       - always
 
@@ -120,7 +120,7 @@ astrocyte_cli:
       - test/outs/web_summary.html
     expire_in: 2 days
   retry:
-    max: 1
+    max: 0
     when:
       - always
 
@@ -144,7 +144,7 @@ GRCh38-3.0.0:
       - workflow/output/multiqc/run/multiqc_report.html
     expire_in: 2 days
   retry:
-    max: 1
+    max: 0
     when:
       - always
 
@@ -168,7 +168,7 @@ mm10-3.0.0:
       - workflow/output/multiqc/run/multiqc_report.html
     expire_in: 2 days
   retry:
-    max: 1
+    max: 0
     when:
       - always
 
@@ -191,6 +191,6 @@ mm10-3.0.0:
       - workflow/output/multiqc/run/multiqc_report.html
     expire_in: 2 days
   retry:
-    max: 1
+    max: 0
     when:
       - always
diff --git a/README.md b/README.md
index e8fe72a..1edd5a8 100755
--- a/README.md
+++ b/README.md
@@ -111,8 +111,8 @@ To Run:
   ```
 * Design example:
 
-| Sample  | fastq_R1                           | fastq_R2                           |
-|---------|------------------------------------|------------------------------------|
+| Sample | fastq_R1 | fastq_R2 |
+|--------|----------|----------|
 | sample1 | pbmc_1k_v2_S1_L001_R1_001.fastq.gz | pbmc_1k_v2_S1_L001_R2_001.fastq.gz |
 | sample2 | pbmc_1k_v2_S2_L001_R1_001.fastq.gz | pbmc_1k_v2_S2_L001_R2_001.fastq.gz |
 | sample2 | pbmc_1k_v2_S2_L002_R1_001.fastq.gz | pbmc_1k_v2_S2_L002_R2_001.fastq.gz |
diff --git a/docs/references.md b/docs/references.md
index ea483c4..37f42d8 100644
--- a/docs/references.md
+++ b/docs/references.md
@@ -1,13 +1,13 @@
 ### References
 
-1. **python**:
-  * Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com))
+1. **Nextflow**:
+  * Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820)
 
 2. **cellranger**
   * Cellranger count [https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count)
 
-3. **MultiQc**:
-  * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
+3. **python**:
+  * Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com))
 
-4. **Nextflow**:
-  * Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820)
+4. **MultiQc**:
+  * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
diff --git a/workflow/conf/aws.config b/workflow/conf/aws.config
index 6caee14..767a7d2 100644
--- a/workflow/conf/aws.config
+++ b/workflow/conf/aws.config
@@ -1,4 +1,4 @@
-workDir = 's3://'
+workDir = 's3://gudmap.rbk/work'
 aws.client.storageEncryption = 'AES256'
 aws {
   region = ''
@@ -9,7 +9,6 @@ aws {
 
 process {
   executor = 'awsbatch'
-  queue = 'default-'
   cpus = 1
   memory = '1 GB'
 
diff --git a/workflow/conf/ondemand.config b/workflow/conf/ondemand.config
new file mode 100644
index 0000000..d89352b
--- /dev/null
+++ b/workflow/conf/ondemand.config
@@ -0,0 +1,3 @@
+process {
+  queue = 'highpriority-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc'
+}
diff --git a/workflow/conf/spot.config b/workflow/conf/spot.config
new file mode 100644
index 0000000..6f1bfe0
--- /dev/null
+++ b/workflow/conf/spot.config
@@ -0,0 +1,3 @@
+process {
+  queue = 'default-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc'
+}
diff --git a/workflow/main.nf b/workflow/main.nf
index b796771..23b126d 100755
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -8,6 +8,15 @@ main.nf
 *
 */
 
+//  ########  ####  ######  ######## 
+//  ##     ##  ##  ##    ## ##       
+//  ##     ##  ##  ##       ##       
+//  ########   ##  ##       ######   
+//  ##     ##  ##  ##       ##       
+//  ##     ##  ##  ##    ## ##       
+//  ########  ####  ######  ##       
+
+
 // Define Input variables
 params.name = "run"
 params.fastq = "test_data/mu.v3s1r500/*.fastq.gz"
@@ -55,6 +64,7 @@ if (params.astrocyte) {
 params.genomeLocationFull = params.genomeLocation+params.genome
 
 // Define variables from input
+pipelineVersion = "2.x.x-indev"
 name = params.name
 designLocation = Channel
   .fromPath(params.designFile)
@@ -81,6 +91,7 @@ references = "${baseDir}/../docs/references.md"
  * trackStart: track start of pipeline
  */
 params.ci = false
+params.dev = false
 process trackStart {
   script:
   """
@@ -91,11 +102,13 @@ process trackStart {
   curl -H 'Content-Type: application/json' -X PUT -d '{ \
       "sessionId": "${workflow.sessionId}", \
       "pipeline": "cellranger_count", \
+      "pipelineVersion": "${pipelineVersion}", \
       "start": "${workflow.start}", \
       "astrocyte": ${params.astrocyte}, \
       "status": "started", \
       "nextflowVersion": "${workflow.nextflow.version}",
-      "ci": ${params.ci}}' \
+      "ci": ${params.ci},
+      "dev": ${params.dev}}' \
   "https://xku43pcwnf.execute-api.us-east-1.amazonaws.com/ProdDeploy/pipeline-tracking"
   """
 }
@@ -105,7 +118,6 @@ process trackStart {
  */
 process checkDesignFile {
   tag "${name}"
-  container = 'bicf/python3:2.0.0'
 
   input:
     file designLocation
@@ -164,10 +176,9 @@ chemistryParam310 = chemistryParam
  * count211: run cellranger count version 2.1.1
  */
 process count211 {
-  queue '128GB,256GB,256GBv1,384GB'
   tag "${sample}"
   publishDir "${outDir}/${task.process}", mode: 'copy'
-  container 'bicf/cellranger2.1.1:2.0.0'
+  queue '128GB,256GB,256GBv1,384GB'
 
   input:
     set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples211
@@ -213,7 +224,6 @@ process count220 {
   queue '128GB,256GB,256GBv1,384GB'
   tag "${sample}"
   publishDir "${outDir}/${task.process}", mode: 'copy'
-  container 'bicf/cellranger2.2.0:2.0.0'
 
   input:
     set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples220
@@ -256,10 +266,9 @@ process count220 {
  * count302: run cellranger count version 3.0.2
  */
 process count302 {
-  queue '128GB,256GB,256GBv1,384GB'
   tag "${sample}"
   publishDir "${outDir}/${task.process}", mode: 'copy'
-  container 'bicf/cellranger3.0.2:2.0.0'
+  queue '128GB,256GB,256GBv1,384GB'
 
   input:
     set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz") from samples302
@@ -302,10 +311,9 @@ process count302 {
  * count310: run cellranger count version 3.1.0
  */
 process count310 {
-  queue '128GB,256GB,256GBv1,384GB'
   tag "${sample}"
   publishDir "${outDir}/${task.process}", mode: 'copy'
-  container 'bicf/cellranger3.1.0:2.0.0'
+  queue '128GB,256GB,256GBv1,384GB'
 
   input:
     set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz") from samples310
@@ -345,13 +353,13 @@ process count310 {
 }
 
 /*
- * versions: collect too versions into a single yml
+ * versions: collect all versions into a single yml
  */
 process versions {
   tag "${name}"
-  container 'bicf/python3:2.0.0'
 
   input:
+    file versions_pythonScript
 
   output:
     file("*.yaml") into yamlPaths
@@ -359,10 +367,12 @@ process versions {
   script:
     """
     hostname
+    ulimit -u 16384
     ulimit -a
-    echo ${workflow.nextflow.version} > version_nextflow.txt
-    echo ${version} > version_cellranger.txt
-    multiqc --version | tr -d 'multiqc, version ' > version_multiqc.txt
+    echo "${workflow.nextflow.version}" > version_nextflow.txt
+    echo "${pipelineVersion}" > version_pipeline.txt
+    echo "${version}" > version_cellranger.txt
+    bash versions_python.sh > version_python.txt
     python3 "${baseDir}/scripts/generate_versions.py" -f version_*.txt -o versions
     python3 "${baseDir}/scripts/generate_references.py" -r "${references}" -o references
     """
diff --git a/workflow/nextflow.config b/workflow/nextflow.config
index b05454d..4e2de2d 100644
--- a/workflow/nextflow.config
+++ b/workflow/nextflow.config
@@ -1,15 +1,24 @@
 profiles {
+  standard {
+    includeConfig 'configs/biohpc.config'
+  }
   biohpc {
-    includeConfig 'conf/biohpc.config'
+    includeConfig 'configs/biohpc.config'
   }
   local {
-    includeConfig 'conf/local.config'
+    includeConfig 'configs/local.config'
   }
   cluster {
-    includeConfig 'conf/cluster.config'
+    includeConfig 'configs/cluster.config'
   }
   aws {
-    includeConfig 'conf/aws.config'
+    includeConfig 'configs/aws.config'
+  }
+  ondemand {
+    includeConfig 'configs/ondemand.config'
+  }
+  spot {
+    includeConfig 'configs/spot.config'
   }
 }
 
@@ -62,6 +71,6 @@ manifest {
   homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count'
   description = 'This pipeline is a wrapper for the cellranger count tool from 10x Genomics. It takes fastq files from 10x Genomics Single Cell Gene Expression libraries, performs alignment, filtering, barcode counting, and UMI counting. It uses the Chromium cellular barcodes to generate gene-barcode matrices, determine clusters, and perform gene expression analysis.'
   mainScript = 'main.nf'
-  version = 'publish_2.0.4'
+  version = '2.x.x-indev'
   nextflowVersion = '>=0.31.0'
 }
diff --git a/workflow/scripts/generate_versions.py b/workflow/scripts/generate_versions.py
index ddcda53..978aedc 100755
--- a/workflow/scripts/generate_versions.py
+++ b/workflow/scripts/generate_versions.py
@@ -24,9 +24,10 @@ logger.propagate = False
 logger.setLevel(logging.INFO)
 
 SOFTWARE_REGEX = {
+    'Pipeline': ['version_pipeline.txt', r"(\S+)"],
     'Nextflow': ['version_nextflow.txt', r"(\S+)"],
-    'Cellranger Count': ['version_cellranger.txt', r"(\S+)"],
-    'MultiQC': ['version_multiqc.txt', r"(\S+)"],
+    'cellranger count': ['version_cellranger.txt', r"(\S+)"],
+    'python': ['version_python.txt', r"(\S+)"],
 }
 
 
@@ -72,9 +73,10 @@ def main():
     out_filename = output + '_mqc.yaml'
 
     results = OrderedDict()
+    results['Pipeline'] = '<span style="color:#999999;\">N/A</span>'
     results['Nextflow'] = '<span style="color:#999999;\">N/A</span>'
-    results['Cellranger Count'] = '<span style="color:#999999;\">N/A</span>'
-    results['MultiQC'] = '<span style="color:#999999;\">N/A</span>'
+    results['cellranger count'] = '<span style="color:#999999;\">N/A</span>'
+    results['python'] = '<span style="color:#999999;\">N/A</span>'
 
     # Check for version files:
     check_files(files)
@@ -106,4 +108,4 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()
diff --git a/workflow/scripts/versions_python.sh b/workflow/scripts/versions_python.sh
new file mode 100644
index 0000000..ff79391
--- /dev/null
+++ b/workflow/scripts/versions_python.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+#versions_python.sh
+#*
+#* --------------------------------------------------------------------------
+#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/LICENSE)
+#* --------------------------------------------------------------------------
+#*
+
+python --version |& grep 'Python ' | sed -n -e 's/^Python //p'
-- 
GitLab