diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 66e1fd4a0cf9d2c584c4efaceeeec5cc3f47384a..f69a33854b99af4b0332ebb2885cbe74e502ab52 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,14 +6,24 @@ before_script: stages: - unit + - astrocyte - integration user_configuration: stage: unit script: - - pytest -m unit - pytest -m unit --cov=./workflow/scripts + +astrocyte: + stage: astrocyte + script: + - module load astrocyte/0.3.0 + - module unload nextflow + - cd .. + - astrocyte_cli validate atacseq_analysis + + single_end_human: stage: integration only: diff --git a/README.md b/README.md index 42ebb3ae3e154553740da378c43cd9e512f8a3d0..e728031e0d6d298751b0d195eeafe81efad37360 100644 --- a/README.md +++ b/README.md @@ -25,11 +25,11 @@ $ git clone git@git.biohpc.swmed.edu:BICF/Astrocyte/atacseq_analysis.git ``` ## Input files -##### 1) Fastq Files +### Fastq Files + You will need the full path to the files for the Bash Scipt -## Design file +### Design file + The Design file is a tab-delimited file with 4 columns for Single-End and 5 columns for Paired-End. Letter, numbers, and underlines can be used in the names. However, the names must begin with a letter. Columns must be as follows: 1. sample_id - The id of the sample. This will be the header in output files, please make sure it is concise @@ -37,7 +37,7 @@ $ git clone git@git.biohpc.swmed.edu:BICF/Astrocyte/atacseq_analysis.git 3. replicate - Replicate number 4. fastq_read1 - Name of fastq file 1 for SE or PE data 5. fastq_read2 - Name of fastq file 2 for PE data - + + See [HERE](/docs/design_ENCSR451NAE_PE.txt) for an example design file, paired-end + See [HERE](/docs/design_ENCSR265ZXX_SE.txt) for an example design file, single-end @@ -112,9 +112,8 @@ If you find an error, please let the [BICF](mailto:BICF@UTSouthwestern.edu) know ## Citation -Please cite individual programs and versions used [HERE](docs/references.md), and the pipeline doi: coming soon. Please cite in publications: Pipeline was developed by BICF from funding provided by Cancer Prevention and Research Institute of Texas (RP150596). +Please cite individual programs and versions of pipeline used [HERE](docs/references.md), and the overall pipeline doi: 10.5281/zenodo.3526149. Please cite in publications: Pipeline was developed by BICF from funding provided by Cancer Prevention and Research Institute of Texas (RP150596). ### Credits This example worklow is derived from original scripts kindly contributed by the Bioinformatic Core Facility ([BICF](https://www.utsouthwestern.edu/labs/bioinformatics/)), in the [Department of Bioinformatics](https://www.utsouthwestern.edu/departments/bioinformatics/). - diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml index b031e84e2e43a04f29c6a43d01dd675d22ce85d5..e60e5f07e5d8b78a17e38ceb91710f47f80c35bd 100644 --- a/astrocyte_pkg.yml +++ b/astrocyte_pkg.yml @@ -46,12 +46,12 @@ workflow_modules: - 'bwa/intel/0.7.12' - 'samtools/1.4.1' - 'sambamba/0.6.6' - - 'bedtools/2.26.0' + - 'bedtools/2.25.0' - 'deeptools/2.5.0.1' - 'phantompeakqualtools/1.2' - 'macs/2.1.0-20151222' - 'UCSC_userApps/v317' - - 'singularity/2.6.1' + - 'R/3.3.2-gccmkl' - 'pandoc/2.7' - 'singularity/3.0.2' @@ -95,7 +95,7 @@ workflow_parameters: description: | One or more input FASTQ files from a ATAC-seq expereiment and a design file with the link bewetwen the same file name and sample id - regex: ".*(fastq|fq)*" + regex: ".*(fastq|fq)*gz" - id: pairedEnd type: select @@ -117,7 +117,7 @@ workflow_parameters: - [ 'true', 'True'] - [ 'false', 'False'] description: | - The Blacklisted Regions aim to identify a comprehensive set of regions + The Blacklisted Regions aim to identify a comprehensive set of regions that have anomalous, unstructured, high signal/read counts in next gen sequencing experiments independent of cell line and type of experiment. If you would like these regions excluded from replicated peaks, select @@ -134,12 +134,22 @@ workflow_parameters: type: select choices: - [ 'GRCh38', 'Human GRCh38'] - - [ 'GRCh38', 'Mouse GRCh38'] + - [ 'GRCm38', 'Mouse GRCm38'] required: true description: | Reference species and genome used for alignment and subsequent analysis. + - id: astrocyte + type: select + choices: + - [ 'true', 'true' ] + required: true + default: 'true' + description: | + Ensure configuraton for astrocyte. + + # ----------------------------------------------------------------------------- # SHINY APP CONFIGURATION # ----------------------------------------------------------------------------- @@ -148,7 +158,7 @@ workflow_parameters: # The workflow must publish all final output into $baseDir # Name of the R module that the vizapp will run against -vizapp_r_module: 'R/3.2.1-intel' +vizapp_r_module: 'R/3.4.1-gccmkl' # List of any CRAN packages, not provided by the modules, that must be made # available to the vizapp @@ -158,8 +168,4 @@ vizapp_cran_packages: # List of any Bioconductor packages, not provided by the modules, # that must be made available to the vizapp -vizapp_bioc_packages: -# - qusage -# - ballgown -vizapp_github_packages: - - js229/Vennerable +vizapp_bioc_packages: [] diff --git a/docs/index.md b/docs/index.md index 42ebb3ae3e154553740da378c43cd9e512f8a3d0..2b881aa227a43faf545a78213a68ec9f55eb338f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,10 +1,4 @@ -# Astrocyte ATAC-seq analysis Workflow Package - -[![pipeline Status](https://git.biohpc.swmed.edu/BICF/Astrocyte/atacseq_analysis/badges/master/pipeline.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/atacseq_analysis/commits/master) -[![Coverage Report](https://git.biohpc.swmed.edu/BICF/Astrocyte/atacseq_analysis/badges/master/coverage.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/atacseq_analysis/commits/master) -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.24.0-brightgreen.svg -)](https://www.nextflow.io/) -[![Astrocyte](https://img.shields.io/badge/astrocyte-%E2%89%A50.1.0-blue.svg)](https://astrocyte-test.biohpc.swmed.edu/static/docs/index.html) +# BICF ATAC-seq Analysis Workflow ## Introduction @@ -12,22 +6,10 @@ BICF ATAC-seq is a bioinformatics best-practice analysis pipeline used for ATAC- The pipeline uses [Nextflow](https://www.nextflow.io), a bioinformatics workflow tool. It pre-processes raw data from FastQ inputs, aligns the reads and performs extensive quality-control on the results. -This pipeline is primarily used with a SLURM cluster on the [BioHPC Cluster](https://portal.biohpc.swmed.edu/content/). However, the pipeline should be able to run on any system that supports Nextflow. - -Additionally, the pipeline is designed to work with [Astrocyte Workflow System](https://astrocyte.biohpc.swmed.edu/static/docs/index.html) using a simple web interface. - -Current version of the software and issue reports are at -https://git.biohpc.swmed.edu/BICF/Astrocyte/atacseq_analysis - -To download the current (working not tagged) version of the software -```bash -$ git clone git@git.biohpc.swmed.edu:BICF/Astrocyte/atacseq_analysis.git -``` ## Input files ##### 1) Fastq Files - + You will need the full path to the files for the Bash Scipt - + + One or more input FASTQ files from a ATAC-seq experiment ## Design file + The Design file is a tab-delimited file with 4 columns for Single-End and 5 columns for Paired-End. Letter, numbers, and underlines can be used in the names. However, the names must begin with a letter. Columns must be as follows: @@ -37,7 +19,7 @@ $ git clone git@git.biohpc.swmed.edu:BICF/Astrocyte/atacseq_analysis.git 3. replicate - Replicate number 4. fastq_read1 - Name of fastq file 1 for SE or PE data 5. fastq_read2 - Name of fastq file 2 for PE data - + + See [HERE](/docs/design_ENCSR451NAE_PE.txt) for an example design file, paired-end + See [HERE](/docs/design_ENCSR265ZXX_SE.txt) for an example design file, single-end @@ -112,9 +94,8 @@ If you find an error, please let the [BICF](mailto:BICF@UTSouthwestern.edu) know ## Citation -Please cite individual programs and versions used [HERE](docs/references.md), and the pipeline doi: coming soon. Please cite in publications: Pipeline was developed by BICF from funding provided by Cancer Prevention and Research Institute of Texas (RP150596). +Please cite individual programs and versions of pipeline used [HERE](docs/references.md), and the overall pipeline doi: 10.5281/zenodo.3526149. Please cite in publications: Pipeline was developed by BICF from funding provided by Cancer Prevention and Research Institute of Texas (RP150596). ### Credits This example worklow is derived from original scripts kindly contributed by the Bioinformatic Core Facility ([BICF](https://www.utsouthwestern.edu/labs/bioinformatics/)), in the [Department of Bioinformatics](https://www.utsouthwestern.edu/departments/bioinformatics/). - diff --git a/docs/references.md b/docs/references.md index 34656dbf5dd5873aec671d436d2eabd5fc236e13..0ef24b27fd70a18cb9b648e575cdc0f28ff663ad 100644 --- a/docs/references.md +++ b/docs/references.md @@ -40,7 +40,11 @@ 13. **MultiQc**: * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354) -14. **Nextflow**: + +14. **BICF ChIP-seq Analysis Workflow**: + * Holly Ruess, Spencer D. Barnes and Venkat S. Malladi. 2020. BICF ATAC-seq Analysis Workflow (publish_2.0.0). Zenodo. doi:[10.5281/zenodo.3891417](https://doi.org/10.5281/zenodo.3891417) + +15. **Nextflow**: * Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., and Notredame, C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology, 35(4), 316. Please cite in publications: Pipeline was developed by BICF from funding provided by **Cancer Prevention and Research Institute of Texas (RP150596)**. diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index 8472bf6b235c1b27c71473a027cda61dcf4fec10..1a20d57547cef8f65929dccbf3ae51e87187fbf2 100644 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -50,7 +50,7 @@ process { executor = 'local' } withName: experimentQC { - module = ['python/3.6.1-2-anaconda', 'deeptools/2.5.0.1', 'samtools/1.4.1', 'bedtools/2.25.0', 'singularity/2.6.1'] + module = ['python/3.6.1-2-anaconda', 'deeptools/2.5.0.1', 'samtools/1.4.1', 'bedtools/2.25.0', 'singularity/3.0.2''] queue = '128GB,256GB,256GBv1' } withName: multiqcReport {