From 68ed6324e542bc6ffd1a01534f5e0e167009002d Mon Sep 17 00:00:00 2001 From: Felix Perez <felix.perez@utsouthwestern.edu> Date: Thu, 4 Apr 2024 08:58:12 -0500 Subject: [PATCH] Replace the shell block with a script block in the runSource process. --- .gitignore | 2 +- astrocyte_pkg.yml | 16 ++++------------ docs/index.md | 1 - workflow/configs/biohpc.config | 25 +++---------------------- workflow/main.nf | 29 ++++++++++++++--------------- workflow/scripts/checkJobState.sh | 2 +- 6 files changed, 23 insertions(+), 52 deletions(-) diff --git a/.gitignore b/.gitignore index 0ee8eb3..1066ccd 100644 --- a/.gitignore +++ b/.gitignore @@ -101,7 +101,7 @@ ENV/ # Mac OS .DS_Store -# nextflow analysis folders/files +# Nextflow analysis folders/files workflow/dag*.dot* workflow/trace*.txt* workflow/.nextflow/ diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml index 3413971..89b6ed8 100644 --- a/astrocyte_pkg.yml +++ b/astrocyte_pkg.yml @@ -42,11 +42,11 @@ minimum_astrocyte_version: '2.0.1' # The Nextflow version that requires to run this workflow. For old pipelines, which do not have this label # a default value of 0.31.0 will be assigned automatically. Please make sure the requested nextflow version is available # in the module list. -nextflow_version: '0.31.0' +nextflow_version: '22.04.5' # (Optional) The Nextflow config file to use for this workflow. If provided, the file should exist in workflow/configs nextflow_config: 'biohpc.config' # The container to use for this workflow, none/singularity. If omitted, the default value 'none' will be used. -container: 'singularity' +container: 'none' # The version of singularity to use. This is required if container == 'singularity' singularity_version: '3.9.9' @@ -73,16 +73,8 @@ documentation_files: # A list of cluster environment modules that this workflow requires to run. # Specify versioned module names to ensure reproducability. workflow_modules: - - Test - -# A list of container images required to run this workflow. -# Specify full path and version names to ensure reproducibility. -# This keyword is required when 'container' is specified in Astrocyte 0.4.1 and above. -# Singularity supports different registries, please specify the protocol to use. -# Such as, "docker://", "shub://", "library://", etc. We encourage you to use the GitLab -# container registry of BioHPC to save and manage your container images. -#workflow_containers: -# - docker://git.biohpc.swmed.edu:5050/s219741/astrocyte-atac-source/atac:0.0.1 + - 'python/3.8.x-anaconda' + - 'openjdk/18' # A list of parameters used by the workflow, defining how to present them, # options etc in the web interface. For each parameter: diff --git a/docs/index.md b/docs/index.md index 77b3eef..8946c95 100644 --- a/docs/index.md +++ b/docs/index.md @@ -8,7 +8,6 @@ This repo is used to wrap the existing ATAC-seq pipeline listed below (Runner), - The ATAC-seq Runner workflow, 'astrocyte-atac-runner] (https://git.biohpc.swmed.edu/s219741/astrocyte-atac-runner). This repo contains the original ATAC-seq pipeline developed by the ENCODE team. ## The ATAC-seq Runner workflow -<!-- TODO: Fill out intro to ATAC-seq pipeline. Which commands do we use to run this pipeline? What will the NextFlow script use? (AS) (DONE)--> This pipeline is designed for automated end-to-end quality control and processing of ATAC-seq. The pipeline can be run end-to-end, starting from raw FASTQ files all the way to peak calling and signal track generation using a single caper submit command. One can also start the pipeline from intermediate stages (for example, using alignment files as input). The pipeline supports both single-end and paired-end data as well as replicated or non-replicated datasets. The outputs produced by the pipeline include 1) formatted HTML reports that include quality control measures specifically designed for ATAC-seq and DNase-seq data, 2) analysis of reproducibility, 3) stringent and relaxed thresholding of peaks, 4) fold-enrichment and pvalue signal tracks. diff --git a/workflow/configs/biohpc.config b/workflow/configs/biohpc.config index a83de14..74db254 100755 --- a/workflow/configs/biohpc.config +++ b/workflow/configs/biohpc.config @@ -1,30 +1,11 @@ -/* -singularity { - enabled = true - runOptions = '' - // Below connects the experimental atac container to BioHPC's Slurm job scheduler. - // runOptions = '\ - // --bind /cm/shared/apps/slurm/16.05.8,/etc/slurm,/cm/shared/apps/slurm/var/etc/,/usr/lib64/libreadline.so.6 \ - // --bind /usr/lib64/libhistory.so.6,/usr/lib64/libtinfo.so.5,/var/run/munge,/usr/lib64/libmunge.so.2 \ - // --bind /usr/lib64/libmunge.so.2.0.0,/cm/shared/apps/slurm/16.05.8/lib64/slurm/' - - // Please do NOT use "--disable-cache" in this runOptions. - // Starting from version 2.0.0, the astrocyte_cli will clean up the cache automatically. - // runOptions = '--bind /vagrant:/vagrant' // Use this one for vagrant development env only - cacheDir = "$baseDir/images/singularity" // Singularity images specified in `workflow_containers` of astrocyte_pkg.yml will be saved to - // this folder automatically, before running the workflow. The images will be renamed as - // "NAME-TAG.img", e.g. ubuntu-latest.img, centos-centos8.img, r-4.1.1.img, etc. -}*/ - process { executor = 'slurm' clusterOptions = '--hold --no-kill' queue = 'super' - beforeScript = 'ulimit -Ss unlimited' withName:runSource { - // Experimental containerized version of the caper software. - // container = 'docker://git.biohpc.swmed.edu:5050/s219741/astrocyte-atac-source/atac:0.0.1' + module = ['python/3.8.x-anaconda', 'openjdk/18'] executor = 'local' } -} + +} \ No newline at end of file diff --git a/workflow/main.nf b/workflow/main.nf index f2894ee..14468da 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -26,15 +26,14 @@ process runSource { output: file '*' - - shell: - ''' - export PATH="/cm/shared/apps/openjdk/18/jdk-18/bin:/cm/shared/apps/python/3.8.x-anaconda/condabin:/cm/shared/apps/python/3.8.x-anaconda/bin:$PATH" - python --version > python_version.txt - java -version 2> java_version_before.txt + + script: + """ + module load python/3.8.x-anaconda + module load openjdk/18 # Enable the use of bash-specific conda commands in this shell. - eval "$(conda shell.bash hook)" + eval "\$(conda shell.bash hook)" # Create a temporary conda environment for caper. conda create -y -c bioconda -c defaults -c conda-forge --name astrocyte-atac-caper python=3.8.18 @@ -51,20 +50,20 @@ process runSource { caper --version > caper_version.txt # Launch the ATAC-seq leader job. - jobsubmit=$(caper hpc submit !{baseDir}/external_repo/astrocyte-atac-runner/atac.wdl -i !{inputJson} --singularity --leader-job-name atac-source) + jobsubmit=\$(caper hpc submit $baseDir/external_repo/astrocyte-atac-runner/atac.wdl -i $inputJson --singularity --leader-job-name atac-source) # Monitor the state of the leader job; if it enters the COMPLETED, FAILED, or CANCELLED state, then finish the workflow process. - state=$(bash !{baseDir}/scripts/checkJobState.sh "${jobsubmit}") - echo "Lead Job state check $(date) - State: $state" >> lead_job_check.txt - while [[ "$state" != *"COMPLETED"* ]] && [[ "$state" != *"FAILED"* ]] && [[ "$state" != *"CANCELLED"* ]]; do + state=\$(bash $baseDir/scripts/checkJobState.sh "\$jobsubmit") + echo "Lead Job state check \$(date) - State: \$state" >> lead_job_check.txt + while [[ "\$state" != *"COMPLETED"* ]] && [[ "\$state" != *"FAILED"* ]] && [[ "\$state" != *"CANCELLED"* ]]; do sleep 15 - state=$(bash !{baseDir}/scripts/checkJobState.sh "${jobsubmit}") - echo "Lead Job state check $(date) - State: $state" >> lead_job_check.txt + state=\$(bash $baseDir/scripts/checkJobState.sh "\$jobsubmit") + echo "Lead Job state check \$(date) - State: \$state" >> lead_job_check.txt done - + # Deactivate the temporary caper conda environment and delete it. conda deactivate conda remove --name astrocyte-atac-caper --all rm -rf ~/.caper/ - ''' + """ } diff --git a/workflow/scripts/checkJobState.sh b/workflow/scripts/checkJobState.sh index d3e8d21..2ebde08 100755 --- a/workflow/scripts/checkJobState.sh +++ b/workflow/scripts/checkJobState.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Get the jobID of the caper lead job from the input txt file. +# Get the jobID of the caper lead job from the given input string. read -ra line <<< "$1" jobID=${line[3]} -- GitLab