From 68ed6324e542bc6ffd1a01534f5e0e167009002d Mon Sep 17 00:00:00 2001
From: Felix Perez <felix.perez@utsouthwestern.edu>
Date: Thu, 4 Apr 2024 08:58:12 -0500
Subject: [PATCH] Replace the shell block with a script block in the runSource
 process.

---
 .gitignore                        |  2 +-
 astrocyte_pkg.yml                 | 16 ++++------------
 docs/index.md                     |  1 -
 workflow/configs/biohpc.config    | 25 +++----------------------
 workflow/main.nf                  | 29 ++++++++++++++---------------
 workflow/scripts/checkJobState.sh |  2 +-
 6 files changed, 23 insertions(+), 52 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0ee8eb3..1066ccd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -101,7 +101,7 @@ ENV/
 # Mac OS
 .DS_Store
 
-# nextflow analysis folders/files
+# Nextflow analysis folders/files
 workflow/dag*.dot*
 workflow/trace*.txt*
 workflow/.nextflow/
diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml
index 3413971..89b6ed8 100644
--- a/astrocyte_pkg.yml
+++ b/astrocyte_pkg.yml
@@ -42,11 +42,11 @@ minimum_astrocyte_version: '2.0.1'
 # The Nextflow version that requires to run this workflow.  For old pipelines, which do not have this label
 # a default value of 0.31.0 will be assigned automatically. Please make sure the requested nextflow version is available
 # in the module list.
-nextflow_version: '0.31.0'
+nextflow_version: '22.04.5'
 # (Optional) The Nextflow config file to use for this workflow. If provided, the file should exist in workflow/configs
 nextflow_config: 'biohpc.config'
 # The container to use for this workflow, none/singularity. If omitted, the default value 'none' will be used.
-container: 'singularity'
+container: 'none'
 # The version of singularity to use. This is required if container == 'singularity'
 singularity_version: '3.9.9'
 
@@ -73,16 +73,8 @@ documentation_files:
 # A list of cluster environment modules that this workflow requires to run.
 # Specify versioned module names to ensure reproducability.
 workflow_modules:
-  - Test
-
-# A list of container images required to run this workflow.
-# Specify full path and version names to ensure reproducibility.
-# This keyword is required when 'container' is specified in Astrocyte 0.4.1 and above.
-# Singularity supports different registries, please specify the protocol to use.
-# Such as, "docker://", "shub://", "library://", etc. We encourage you to use the GitLab
-# container registry of BioHPC to save and manage your container images.
-#workflow_containers:
-#  - docker://git.biohpc.swmed.edu:5050/s219741/astrocyte-atac-source/atac:0.0.1
+  - 'python/3.8.x-anaconda'
+  - 'openjdk/18'
 
 # A list of parameters used by the workflow, defining how to present them,
 # options etc in the web interface. For each parameter:
diff --git a/docs/index.md b/docs/index.md
index 77b3eef..8946c95 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -8,7 +8,6 @@ This repo is used to wrap the existing ATAC-seq pipeline listed below (Runner),
 - The ATAC-seq Runner workflow, 'astrocyte-atac-runner] (https://git.biohpc.swmed.edu/s219741/astrocyte-atac-runner). This repo contains the original ATAC-seq pipeline developed by the ENCODE team.
 
 ## The ATAC-seq Runner workflow
-<!-- TODO: Fill out intro to ATAC-seq pipeline. Which commands do we use to run this pipeline? What will the NextFlow script use? (AS) (DONE)-->
 
 This pipeline is designed for automated end-to-end quality control and processing of ATAC-seq. The pipeline can be run end-to-end, starting from raw FASTQ files all the way to peak calling and signal track generation using a single caper submit command. One can also start the pipeline from intermediate stages (for example, using alignment files as input). The pipeline supports both single-end and paired-end data as well as replicated or non-replicated datasets. The outputs produced by the pipeline include 1) formatted HTML reports that include quality control measures specifically designed for ATAC-seq and DNase-seq data, 2) analysis of reproducibility, 3) stringent and relaxed thresholding of peaks, 4) fold-enrichment and pvalue signal tracks.
 
diff --git a/workflow/configs/biohpc.config b/workflow/configs/biohpc.config
index a83de14..74db254 100755
--- a/workflow/configs/biohpc.config
+++ b/workflow/configs/biohpc.config
@@ -1,30 +1,11 @@
-/*
-singularity {
-  enabled = true
-  runOptions = ''
-  // Below connects the experimental atac container to BioHPC's Slurm job scheduler.
-  // runOptions = '\
-  // --bind /cm/shared/apps/slurm/16.05.8,/etc/slurm,/cm/shared/apps/slurm/var/etc/,/usr/lib64/libreadline.so.6 \
-  // --bind /usr/lib64/libhistory.so.6,/usr/lib64/libtinfo.so.5,/var/run/munge,/usr/lib64/libmunge.so.2 \
-  // --bind /usr/lib64/libmunge.so.2.0.0,/cm/shared/apps/slurm/16.05.8/lib64/slurm/'
-
-                                             // Please do NOT use "--disable-cache" in this runOptions. 
-                                              // Starting from version 2.0.0, the astrocyte_cli will clean up the cache automatically.
-  // runOptions = '--bind /vagrant:/vagrant'  // Use this one for vagrant development env only
-  cacheDir = "$baseDir/images/singularity"    // Singularity images specified in `workflow_containers` of astrocyte_pkg.yml will be saved to 
-                                              // this folder automatically, before running the workflow. The images will be renamed as
-                                              // "NAME-TAG.img", e.g. ubuntu-latest.img, centos-centos8.img, r-4.1.1.img, etc.
-}*/
-
 process {
     executor = 'slurm'
     clusterOptions = '--hold --no-kill'
     queue = 'super'
-    beforeScript = 'ulimit -Ss unlimited'
 
     withName:runSource {
-        // Experimental containerized version of the caper software.
-        // container = 'docker://git.biohpc.swmed.edu:5050/s219741/astrocyte-atac-source/atac:0.0.1'
+        module = ['python/3.8.x-anaconda', 'openjdk/18']
         executor = 'local'
     }
-}
+       
+}
\ No newline at end of file
diff --git a/workflow/main.nf b/workflow/main.nf
index f2894ee..14468da 100644
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -26,15 +26,14 @@ process runSource {
 
     output:
     file '*'
-    
-    shell:
-    '''
-    export PATH="/cm/shared/apps/openjdk/18/jdk-18/bin:/cm/shared/apps/python/3.8.x-anaconda/condabin:/cm/shared/apps/python/3.8.x-anaconda/bin:$PATH"
-    python --version > python_version.txt
-    java -version 2> java_version_before.txt
+
+    script:
+    """
+    module load python/3.8.x-anaconda
+    module load openjdk/18
 
     # Enable the use of bash-specific conda commands in this shell.
-    eval "$(conda shell.bash hook)"
+    eval "\$(conda shell.bash hook)"
 
     # Create a temporary conda environment for caper.
     conda create -y -c bioconda -c defaults -c conda-forge --name astrocyte-atac-caper python=3.8.18
@@ -51,20 +50,20 @@ process runSource {
     caper --version > caper_version.txt
 
     # Launch the ATAC-seq leader job.
-    jobsubmit=$(caper hpc submit !{baseDir}/external_repo/astrocyte-atac-runner/atac.wdl -i !{inputJson} --singularity --leader-job-name atac-source)
+    jobsubmit=\$(caper hpc submit $baseDir/external_repo/astrocyte-atac-runner/atac.wdl -i $inputJson --singularity --leader-job-name atac-source)
 
     # Monitor the state of the leader job; if it enters the COMPLETED, FAILED, or CANCELLED state, then finish the workflow process.
-    state=$(bash !{baseDir}/scripts/checkJobState.sh "${jobsubmit}")
-    echo "Lead Job state check $(date) - State: $state" >> lead_job_check.txt
-    while [[ "$state" != *"COMPLETED"* ]] && [[ "$state" != *"FAILED"* ]] && [[ "$state" != *"CANCELLED"* ]]; do
+    state=\$(bash $baseDir/scripts/checkJobState.sh "\$jobsubmit")
+    echo "Lead Job state check \$(date) - State: \$state" >> lead_job_check.txt
+    while [[ "\$state" != *"COMPLETED"* ]] && [[ "\$state" != *"FAILED"* ]] && [[ "\$state" != *"CANCELLED"* ]]; do
         sleep 15
-        state=$(bash !{baseDir}/scripts/checkJobState.sh "${jobsubmit}")
-        echo "Lead Job state check $(date) - State: $state" >> lead_job_check.txt
+        state=\$(bash $baseDir/scripts/checkJobState.sh "\$jobsubmit")
+        echo "Lead Job state check \$(date) - State: \$state" >> lead_job_check.txt
     done
-    
+
     # Deactivate the temporary caper conda environment and delete it.
     conda deactivate
     conda remove --name astrocyte-atac-caper --all
     rm -rf ~/.caper/
-    '''
+    """
 }
diff --git a/workflow/scripts/checkJobState.sh b/workflow/scripts/checkJobState.sh
index d3e8d21..2ebde08 100755
--- a/workflow/scripts/checkJobState.sh
+++ b/workflow/scripts/checkJobState.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Get the jobID of the caper lead job from the input txt file.
+# Get the jobID of the caper lead job from the given input string.
 read -ra line <<< "$1"
 jobID=${line[3]}
 
-- 
GitLab