diff --git a/.gitignore b/.gitignore index 0ee8eb31fed6a000d253a18e8c9648776c984624..1066ccd6fb3d32c8f6f0705e50074e0d9008bfdb 100644 --- a/.gitignore +++ b/.gitignore @@ -101,7 +101,7 @@ ENV/ # Mac OS .DS_Store -# nextflow analysis folders/files +# Nextflow analysis folders/files workflow/dag*.dot* workflow/trace*.txt* workflow/.nextflow/ diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml index 34139718b39e4bc571ae992cbc92f232470d1763..89b6ed8e1e650f751bc1742aa913e1d5912c98f5 100644 --- a/astrocyte_pkg.yml +++ b/astrocyte_pkg.yml @@ -42,11 +42,11 @@ minimum_astrocyte_version: '2.0.1' # The Nextflow version that requires to run this workflow. For old pipelines, which do not have this label # a default value of 0.31.0 will be assigned automatically. Please make sure the requested nextflow version is available # in the module list. -nextflow_version: '0.31.0' +nextflow_version: '22.04.5' # (Optional) The Nextflow config file to use for this workflow. If provided, the file should exist in workflow/configs nextflow_config: 'biohpc.config' # The container to use for this workflow, none/singularity. If omitted, the default value 'none' will be used. -container: 'singularity' +container: 'none' # The version of singularity to use. This is required if container == 'singularity' singularity_version: '3.9.9' @@ -73,16 +73,8 @@ documentation_files: # A list of cluster environment modules that this workflow requires to run. # Specify versioned module names to ensure reproducability. workflow_modules: - - Test - -# A list of container images required to run this workflow. -# Specify full path and version names to ensure reproducibility. -# This keyword is required when 'container' is specified in Astrocyte 0.4.1 and above. -# Singularity supports different registries, please specify the protocol to use. -# Such as, "docker://", "shub://", "library://", etc. We encourage you to use the GitLab -# container registry of BioHPC to save and manage your container images. -#workflow_containers: -# - docker://git.biohpc.swmed.edu:5050/s219741/astrocyte-atac-source/atac:0.0.1 + - 'python/3.8.x-anaconda' + - 'openjdk/18' # A list of parameters used by the workflow, defining how to present them, # options etc in the web interface. For each parameter: diff --git a/docs/index.md b/docs/index.md index 77b3eefda3d5d1c1fbbbda328a2c0d5123c3d9da..8946c95dc5fe1219df31722d8286d9c68303fb17 100644 --- a/docs/index.md +++ b/docs/index.md @@ -8,7 +8,6 @@ This repo is used to wrap the existing ATAC-seq pipeline listed below (Runner), - The ATAC-seq Runner workflow, 'astrocyte-atac-runner] (https://git.biohpc.swmed.edu/s219741/astrocyte-atac-runner). This repo contains the original ATAC-seq pipeline developed by the ENCODE team. ## The ATAC-seq Runner workflow -<!-- TODO: Fill out intro to ATAC-seq pipeline. Which commands do we use to run this pipeline? What will the NextFlow script use? (AS) (DONE)--> This pipeline is designed for automated end-to-end quality control and processing of ATAC-seq. The pipeline can be run end-to-end, starting from raw FASTQ files all the way to peak calling and signal track generation using a single caper submit command. One can also start the pipeline from intermediate stages (for example, using alignment files as input). The pipeline supports both single-end and paired-end data as well as replicated or non-replicated datasets. The outputs produced by the pipeline include 1) formatted HTML reports that include quality control measures specifically designed for ATAC-seq and DNase-seq data, 2) analysis of reproducibility, 3) stringent and relaxed thresholding of peaks, 4) fold-enrichment and pvalue signal tracks. diff --git a/workflow/configs/biohpc.config b/workflow/configs/biohpc.config index a83de14b7693b52b800bf7fe3d8229fbcd608eec..74db2545c8d67e5f4f92ae45ebd5538712fca0d3 100755 --- a/workflow/configs/biohpc.config +++ b/workflow/configs/biohpc.config @@ -1,30 +1,11 @@ -/* -singularity { - enabled = true - runOptions = '' - // Below connects the experimental atac container to BioHPC's Slurm job scheduler. - // runOptions = '\ - // --bind /cm/shared/apps/slurm/16.05.8,/etc/slurm,/cm/shared/apps/slurm/var/etc/,/usr/lib64/libreadline.so.6 \ - // --bind /usr/lib64/libhistory.so.6,/usr/lib64/libtinfo.so.5,/var/run/munge,/usr/lib64/libmunge.so.2 \ - // --bind /usr/lib64/libmunge.so.2.0.0,/cm/shared/apps/slurm/16.05.8/lib64/slurm/' - - // Please do NOT use "--disable-cache" in this runOptions. - // Starting from version 2.0.0, the astrocyte_cli will clean up the cache automatically. - // runOptions = '--bind /vagrant:/vagrant' // Use this one for vagrant development env only - cacheDir = "$baseDir/images/singularity" // Singularity images specified in `workflow_containers` of astrocyte_pkg.yml will be saved to - // this folder automatically, before running the workflow. The images will be renamed as - // "NAME-TAG.img", e.g. ubuntu-latest.img, centos-centos8.img, r-4.1.1.img, etc. -}*/ - process { executor = 'slurm' clusterOptions = '--hold --no-kill' queue = 'super' - beforeScript = 'ulimit -Ss unlimited' withName:runSource { - // Experimental containerized version of the caper software. - // container = 'docker://git.biohpc.swmed.edu:5050/s219741/astrocyte-atac-source/atac:0.0.1' + module = ['python/3.8.x-anaconda', 'openjdk/18'] executor = 'local' } -} + +} \ No newline at end of file diff --git a/workflow/main.nf b/workflow/main.nf index f2894ee9bf29b1376c42dc4c5a88f11a2491eda3..14468da223a7a0c6177bd37e996f9a9fb5d417c2 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -26,15 +26,14 @@ process runSource { output: file '*' - - shell: - ''' - export PATH="/cm/shared/apps/openjdk/18/jdk-18/bin:/cm/shared/apps/python/3.8.x-anaconda/condabin:/cm/shared/apps/python/3.8.x-anaconda/bin:$PATH" - python --version > python_version.txt - java -version 2> java_version_before.txt + + script: + """ + module load python/3.8.x-anaconda + module load openjdk/18 # Enable the use of bash-specific conda commands in this shell. - eval "$(conda shell.bash hook)" + eval "\$(conda shell.bash hook)" # Create a temporary conda environment for caper. conda create -y -c bioconda -c defaults -c conda-forge --name astrocyte-atac-caper python=3.8.18 @@ -51,20 +50,20 @@ process runSource { caper --version > caper_version.txt # Launch the ATAC-seq leader job. - jobsubmit=$(caper hpc submit !{baseDir}/external_repo/astrocyte-atac-runner/atac.wdl -i !{inputJson} --singularity --leader-job-name atac-source) + jobsubmit=\$(caper hpc submit $baseDir/external_repo/astrocyte-atac-runner/atac.wdl -i $inputJson --singularity --leader-job-name atac-source) # Monitor the state of the leader job; if it enters the COMPLETED, FAILED, or CANCELLED state, then finish the workflow process. - state=$(bash !{baseDir}/scripts/checkJobState.sh "${jobsubmit}") - echo "Lead Job state check $(date) - State: $state" >> lead_job_check.txt - while [[ "$state" != *"COMPLETED"* ]] && [[ "$state" != *"FAILED"* ]] && [[ "$state" != *"CANCELLED"* ]]; do + state=\$(bash $baseDir/scripts/checkJobState.sh "\$jobsubmit") + echo "Lead Job state check \$(date) - State: \$state" >> lead_job_check.txt + while [[ "\$state" != *"COMPLETED"* ]] && [[ "\$state" != *"FAILED"* ]] && [[ "\$state" != *"CANCELLED"* ]]; do sleep 15 - state=$(bash !{baseDir}/scripts/checkJobState.sh "${jobsubmit}") - echo "Lead Job state check $(date) - State: $state" >> lead_job_check.txt + state=\$(bash $baseDir/scripts/checkJobState.sh "\$jobsubmit") + echo "Lead Job state check \$(date) - State: \$state" >> lead_job_check.txt done - + # Deactivate the temporary caper conda environment and delete it. conda deactivate conda remove --name astrocyte-atac-caper --all rm -rf ~/.caper/ - ''' + """ } diff --git a/workflow/scripts/checkJobState.sh b/workflow/scripts/checkJobState.sh index d3e8d219222acf4679cb5aaa126ace232f91a804..2ebde08e1ecf196112f542466d7eb7dff7d32572 100755 --- a/workflow/scripts/checkJobState.sh +++ b/workflow/scripts/checkJobState.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Get the jobID of the caper lead job from the input txt file. +# Get the jobID of the caper lead job from the given input string. read -ra line <<< "$1" jobID=${line[3]}