diff --git a/.gitignore b/.gitignore index 7756350a2c67d0e3d37642c65031fe63689233da..0ee8eb31fed6a000d253a18e8c9648776c984624 100644 --- a/.gitignore +++ b/.gitignore @@ -102,10 +102,13 @@ ENV/ .DS_Store # nextflow analysis folders/files -pipeline_trace*.txt* -.nextflow*.log* -report*.html* -timeline*.html* -/workflow/output/* -/work/* -/.nextflow/* +workflow/dag*.dot* +workflow/trace*.txt* +workflow/.nextflow/ +workflow/.nextflow*.log* +workflow/report*.html* +workflow/timeline*.html* +workflow/output/* +workflow/work/* +workflow/images/ +workflow/cleanup.sh diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml index da8cb77fc6ac9791ead218425c51215927c87d46..bc1eb2c358835658a79c5995f07aef775fe8daf1 100644 --- a/astrocyte_pkg.yml +++ b/astrocyte_pkg.yml @@ -7,13 +7,13 @@ # ----------------------------------------------------------------------------- # A unique identifier for the workflow package, text/underscores only -name: 'atac-seq-source' +name: 'atac_seq_source' # Who wrote this? author: 'Achisha Saikia, Felix Perez, Peng Lian' # A contact email address for questions email: 'achisha.saikia@utsouthwestern.edu, felix.perez@utsouthwestern.edu, biohpc-help@utsouthwestern.edu' # A more informative title for the workflow package -title: 'ATAC-seq Source Workflow" +title: 'ATAC-seq Source Workflow' # A summary of the workflow package in plain text description: | # TODO: Please describe the workflow. (AS) @@ -24,19 +24,24 @@ citation: | used, and the overall pipeline doi: 12.3456/zenodo.9876543. # TODO: Review "individual programs". Please cite in publications: - The oringal ATAC-seq pipeline was developed by the ENCODE team, and adapted to the Astrocyte platform by a collaboration between the Lee Lab in Bioinformatics at UT Southwestern Medical Center & BioHPC. - -# The minimum Astrocyte version that requires to run this workflow. For old pipelines, which do not have this label + The oringal ATAC-seq pipeline was developed by the ENCODE team, and + adapted to the Astrocyte Workflow platform by a collaboration + at UT Southwestern Medical Center between + the Lee (Jeon) Lab in Bioinformatics & BioHPC. + +# The minimum Astrocyte version that requires to run this workflow. For old pipelines, which do not have this label # a default value of 0.3.1 will be assigned automatically. A request of minimum version less than 0.4.0 will be ignored. minimum_astrocyte_version: '2.0.1' -# The Nextflow version that requires to run this workflow. For old pipelines, which do not have this label +# The Nextflow version that requires to run this workflow. For old pipelines, which do not have this label # a default value of 0.31.0 will be assigned automatically. Please make sure the requested nextflow version is available # in the module list. -nextflow_version: '22.04.5' +nextflow_version: '0.31.0' # (Optional) The Nextflow config file to use for this workflow. If provided, the file should exist in workflow/configs nextflow_config: 'biohpc.config' # The container to use for this workflow, none/singularity. If omitted, the default value 'none' will be used. -container: 'none' +container: 'singularity' +# The version of singularity to use. This is required if container == 'singularity' +singularity_version: '3.9.9' # ----------------------------------------------------------------------------- @@ -63,6 +68,15 @@ documentation_files: workflow_modules: - Test +# A list of container images required to run this workflow. +# Specify full path and version names to ensure reproducibility. +# This keyword is required when 'container' is specified in Astrocyte 0.4.1 and above. +# Singularity supports different registries, please specify the protocol to use. +# Such as, "docker://", "shub://", "library://", etc. We encourage you to use the GitLab +# container registry of BioHPC to save and manage your container images. +workflow_containers: + - docker://git.biohpc.swmed.edu:5050/s219741/astrocyte-atac-source/atac:0.0.1 + # A list of parameters used by the workflow, defining how to present them, # options etc in the web interface. For each parameter: # @@ -88,7 +102,7 @@ workflow_modules: # SELECT TYPE # choices: A set of choices presented to the user for the parameter. # Each choice is a pair of value and description, e.g. -# +# # choices: # - [ 'myval', 'The first option'] # - [ 'myval', 'The second option'] @@ -99,25 +113,11 @@ workflow_modules: workflow_parameters: - id: input_json - # START HERE NEXT TIME - type: string - required: false - default: "A" + type: file + required: true description: | - A parameter that need to be passed to the above command + The input JSON used to supply the list of fastq.gz files for caper to process in the ATAC-seq pipeline. - - id: parameter2 - type: string - required: false - default: "B" - description: | - A parameter that need to be passed to the above command - - - id: parameter_nonopt - type: files - required: false - description: | - A non-optinal parameter that need to be passed to the above command # ----------------------------------------------------------------------------- # SHINY APP CONFIGURATION @@ -128,22 +128,21 @@ workflow_parameters: # Name of the R module that the vizapp will run against # A containerized version is recommanded. -vizapp_r_module: 'R/4.1.1-img' +#vizapp_r_module: 'R/4.1.1-img' # List of any CRAN packages, not provided by the modules, that must be made # available to the vizapp -vizapp_cran_packages: - - shiny - - shinyFiles - - plotly +#vizapp_cran_packages: +# - shiny +# - shinyFiles +# - plotly # # List of any Bioconductor packages, not provided by the modules, that must be made # available to the vizapp -vizapp_bioc_packages: - - +#vizapp_bioc_packages: +# - # # List of any packages to install from GitHub using devtools, that must be # made availavle to the vizapp -vizapp_github_packages: - - - +#vizapp_github_packages: +# - diff --git a/workflow/configs/biohpc.config b/workflow/configs/biohpc.config old mode 100644 new mode 100755 index 4f2921ae0e59bf5aa79168ae463e3a6c0227fae5..465d7e7bd09bbd7b6bcf8541c74fc87b271d0b73 --- a/workflow/configs/biohpc.config +++ b/workflow/configs/biohpc.config @@ -1,9 +1,22 @@ +singularity { + enabled = true + runOptions = ' -B /cm/shared/apps/slurm/16.05.8 -B /etc/slurm -B /cm/shared/apps/slurm/var/etc/ -B /usr/lib64/libreadline.so.6 -B /usr/lib64/libhistory.so.6 -B /usr/lib64/libtinfo.so.5 -B /var/run/munge -B /usr/lib64/libmunge.so.2 -B /usr/lib64/libmunge.so.2.0.0 -B /cm/shared/apps/slurm/16.05.8/lib64/slurm/ -B /cm/shared/apps/slurm/16.05.8/lib64' + // Please do NOT use "--disable-cache" in this runOptions. + // Starting from version 2.0.0, the astrocyte_cli will clean up the cache automatically. + // runOptions = '--bind /vagrant:/vagrant' // Use this one for vagrant development env only + cacheDir = "$baseDir/images/singularity" // Singularity images specified in `workflow_containers` of astrocyte_pkg.yml will be saved to + // this folder automatically, before running the workflow. The images will be renamed as + // "NAME-TAG.img", e.g. ubuntu-latest.img, centos-centos8.img, r-4.1.1.img, etc. +} + process { - // Uncomment these two options to run the workflow on BioHPC cluster - // You can - // executor = 'slurm' - // queue = 'super' + executor = 'slurm' + clusterOptions = '--hold --no-kill' + queue = 'super' + beforeScript = 'ulimit -Ss unlimited' - // To run the workflow on local computer (for testing) - executor = 'local' + withName:runSource { + container = 'docker://git.biohpc.swmed.edu:5050/s219741/astrocyte-atac-source/atac:0.0.1' + executor = 'local' + } } diff --git a/workflow/main.nf b/workflow/main.nf index 25f81fe3d0b0420c385dbadde44baf0404daba65..734b08d987be1c6728308c943e3c79f21d14b27a 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -1,15 +1,32 @@ /* * Copyright (c) 2024. The University of Texas Southwestern Medical Center * - * TODO: (AC) Brief description of ATAC-seq + * TODO: (AS) Brief description of ATAC-seq * * @authors * Achisha Saikia, Felix Perez * */ - // caper hpc submit atac.wdl -i "${INPUT_JSON}" --singularity --leader-job-name ANY_GOOD_LEADER_JOB_NAME` +// Note - $baseDir is the location of this workflow's main.nf file. +params.input_json = "$baseDir/../test_data/example_input_json/ENCSR356KRQ_subsampled.json" - // The executable script in the Runner repo - //params = "caper hpc submit $baseDir/external_repo/atac.wdl" - //params.input_json = \ No newline at end of file +inputJsonFile = Channel.fromPath( params.input_json ) + +process runSource { + + publishDir "$baseDir/output", mode: 'copy' + + input: + file inputJson from inputJsonFile + + output: + file '*' + + """ + export LD_LIBRARY_PATH=/usr/lib64/:$LD_LIBRARY_PATH + sinfo -V > slurm_version.txt + caper --version > caper_version.txt + caper hpc submit $baseDir/external_repo/astrocyte-atac-runner/atac.wdl -i $inputJson --singularity --leader-job-name atac-source 1> batch_job.txt 2>> caper_err.txt + """ +} \ No newline at end of file