diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml index bc1eb2c358835658a79c5995f07aef775fe8daf1..cf9d8a32de8c8824f89f4575c5d1a3818d1f0092 100644 --- a/astrocyte_pkg.yml +++ b/astrocyte_pkg.yml @@ -9,12 +9,14 @@ # A unique identifier for the workflow package, text/underscores only name: 'atac_seq_source' # Who wrote this? -author: 'Achisha Saikia, Felix Perez, Peng Lian' +author: 'Felix Perez, Achisha Saikia, Peng Lian' # A contact email address for questions -email: 'achisha.saikia@utsouthwestern.edu, felix.perez@utsouthwestern.edu, biohpc-help@utsouthwestern.edu' +email: 'felix.perez@utsouthwestern.edu, achisha.saikia@utsouthwestern.edu, biohpc-help@utsouthwestern.edu' # A more informative title for the workflow package -title: 'ATAC-seq Source Workflow' -# A summary of the workflow package in plain text +title: 'ATAC-seq Source Workflow" + +This pipeline is designed for automated end-to-end quality control and processing of ATAC-seq. The pipeline can be run end-to-end, starting from raw FASTQ files all the way to peak calling and signal track generation using a single caper submit command. One can also start the pipeline from intermediate stages (for example, using alignment files as input). The pipeline supports both single-end and paired-end data as well as replicated or non-replicated datasets. The outputs produced by the pipeline include 1) formatted HTML reports that include quality control measures specifically designed for ATAC-seq and DNase-seq data, 2) analysis of reproducibility, 3) stringent and relaxed thresholding of peaks, 4) fold-enrichment and pvalue signal tracks. + description: | # TODO: Please describe the workflow. (AS) diff --git a/docs/index.md b/docs/index.md index 81bd63d24292a57ccbced426262c866175fb8c9e..77b3eefda3d5d1c1fbbbda328a2c0d5123c3d9da 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,6 @@ # ATAC-seq Astrocyte Workflow -<!-- TODO: Fill out intro (AS) --> +ATAC-seq is a bioinformatics best-practice analysis pipeline used for ATAC-seq (Assay for Transposase-Accessible Chromatin using sequencing) data analysis. It is built upon the ENCODE ATAC workflow written in workflow description language (wdl) and uses Nextflow to run in the astrocyte platform. ## Requirements - The ATAC-seq Source workflow ['astrocyte-atac-source] (https://git.biohpc.swmed.edu/s219741/astrocyte-atac-source). @@ -8,7 +8,9 @@ This repo is used to wrap the existing ATAC-seq pipeline listed below (Runner), - The ATAC-seq Runner workflow, 'astrocyte-atac-runner] (https://git.biohpc.swmed.edu/s219741/astrocyte-atac-runner). This repo contains the original ATAC-seq pipeline developed by the ENCODE team. ## The ATAC-seq Runner workflow -<!-- TODO: Fill out intro to ATAC-seq pipeline. Which commands do we use to run this pipeline? What will the NextFlow script use? (AS) --> +<!-- TODO: Fill out intro to ATAC-seq pipeline. Which commands do we use to run this pipeline? What will the NextFlow script use? (AS) (DONE)--> + +This pipeline is designed for automated end-to-end quality control and processing of ATAC-seq. The pipeline can be run end-to-end, starting from raw FASTQ files all the way to peak calling and signal track generation using a single caper submit command. One can also start the pipeline from intermediate stages (for example, using alignment files as input). The pipeline supports both single-end and paired-end data as well as replicated or non-replicated datasets. The outputs produced by the pipeline include 1) formatted HTML reports that include quality control measures specifically designed for ATAC-seq and DNase-seq data, 2) analysis of reproducibility, 3) stringent and relaxed thresholding of peaks, 4) fold-enrichment and pvalue signal tracks. On HPC, make sure that Caper's conf ~/.caper/default.conf is correctly configured to work with HPC. The following command will submit Caper as a leader job to SLURM with Singularity `caper hpc submit atac.wdl -i "${INPUT_JSON}" --singularity --leader-job-name ANY_GOOD_LEADER_JOB_NAME` diff --git a/workflow/main.nf b/workflow/main.nf index 734b08d987be1c6728308c943e3c79f21d14b27a..adc3f4bb718cd94cdd965e1f80caf985f165f3b2 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -1,10 +1,10 @@ /* * Copyright (c) 2024. The University of Texas Southwestern Medical Center * - * TODO: (AS) Brief description of ATAC-seq - * + * TODO: (AC) Brief description of ATAC-seq (DONE) + * ATAC-seq is a molecular biology technique that assesses chromatin accessibility in a genome. It uses a hyperactive Tn5 transposase to insert sequencing adapters into open chromatin regions, allowing researchers to identify and sequence these accessible genomic regions. ATAC-seq is widely used to study gene regulation, identify enhancers and promoters, and gain insights into chromatin structure. * @authors - * Achisha Saikia, Felix Perez + * Felix Perez, Achisha Saikia * */ @@ -29,4 +29,4 @@ process runSource { caper --version > caper_version.txt caper hpc submit $baseDir/external_repo/astrocyte-atac-runner/atac.wdl -i $inputJson --singularity --leader-job-name atac-source 1> batch_job.txt 2>> caper_err.txt """ -} \ No newline at end of file +}