Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • BICF/Astrocyte/chipseq_analysis
  • s190984/chipseq_analysis
  • astrocyte/workflows/bicf/chipseq_analysis
  • s219741/chipseq-analysis-containerized
Show changes
Commits on Source (149)
Showing
with 220 additions and 97 deletions
......@@ -108,5 +108,4 @@ report*.html*
timeline*.html*
/workflow/output/*
/work/*
/test_data/*
/.nextflow/*
......@@ -10,32 +10,37 @@ stages:
- single
- multiple
- skip
- cleanup
user_configuration:
stage: unit
script:
- pytest -m unit
- pytest -m unit --cov=./workflow/scripts
bash_tests:
stage: unit
script:
- module load singularity/3.0.2
- module load deeptools/2.5.0.1
- singularity run docker://bats/bats:v1.1.0 --tap workflow/tests/plot_profile.bats
astrocyte:
stage: astrocyte
script:
- module load astrocyte/0.1.0
- module load astrocyte/0.3.1
- module unload nextflow
- cd ..
- astrocyte_cli validate chipseq_analysis
artifacts:
expire_in: 2 days
after_script:
- rm -rf work/
single_end_mouse:
stage: single
only:
- master
script:
- nextflow run workflow/main.nf --astrocyte true -resume
- NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --astrocyte true --ci true --dev true
- pytest -m singleend
artifacts:
expire_in: 2 days
paired_end_human:
stage: single
......@@ -44,39 +49,55 @@ paired_end_human:
except:
- master
script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false -resume
- NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false --ci true --dev true
- pytest -m pairedend
artifacts:
expire_in: 2 days
single_end_diff:
stage: multiple
single_end_single_control:
stage: single
only:
- branches
except:
- master
script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --astrocyte false -resume
- NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_single_contol_SE.txt" --genome 'GRCh38' --pairedEnd false --astrocyte false --ci true --dev true
- pytest -m singlecontrol
single_end_diff:
stage: multiple
only:
- master
script:
- NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --astrocyte false --ci true --dev true
- pytest -m singleend
- pytest -m singlediff
artifacts:
expire_in: 2 days
paired_end_diff:
only:
- branches
except:
- master
stage: multiple
script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false -resume
- NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false -with-dag flowchart.pdf --ci true --dev true
- pytest -m pairedend
- pytest -m paireddiff
artifacts:
expire_in: 2 days
name: "$CI_JOB_NAME"
when: always
paths:
- flowchart.pdf
expire_in: 7 days
single_end_skip:
stage: skip
only:
- master
script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff true --skipMotif true --skipPlotProfile true --astrocyte false -resume
- NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff true --skipMotif true --skipPlotProfile true --astrocyte false --ci true --dev true
- pytest -m singleskip_true
artifacts:
expire_in: 2 days
cleanup_job:
stage: cleanup
script:
- cd $CI_BUILDS_DIR/$CI_RUNNER_SHORT_TOKEN/$CI_PROJECT_NAME
- rm -fr $CI_PIPELINE_ID/
......@@ -6,6 +6,7 @@ These are the most common things requested on pull requests (PRs).
- [ ] This comment contains a description of changes (with reason)
- [ ] If you've fixed a bug or added code that should be tested, add tests!
- [ ] Documentation in `docs` is updated
- [ ] Replace dag.png with the most recent CI pipleine integrated_pe artifact
- [ ] `CHANGELOG.md` is updated
- [ ] `README.md` is updated
- [ ] `LICENSE.md` is updated with new contributors
......@@ -2,14 +2,35 @@
All notable changes to this project will be documented in this file.
## [Unreleased]
- Fix references.md link in citation of README.md
## [publish_1.1.3 ] - 2020-08-16
### Updated
- Updated astrocyte to 0.3.1
### Fixed
- Fixed missing gene names in annotation
## [publish_1.1.2 ] - 2020-06-22
- Add pipeline tracking
## [publish_1.1.1 ] - 2020-04-23
### Added
- Add Nextflow to references.md
- Fix pool_and_psuedoreplicate.py to run single experiment
- Add test data
- Add test data for test_pool_and_pseudoreplicate
- Add PlotProfile Option
- Add Python version to MultiQC
- Add and Update tests
- Use GTF files instead of TxDb and org libraries in Annotate Peaks
- Make gtf and geneName files as param inputs
- Add test data for single control and single replicate
### Fixed
- Fix references.md link in citation of README.md
- Fix pool_and_psuedoreplicate.py to run single experiment
- Fix xcor to increase file size for --random-source
- Fix skip diff test for paired-end data
- Fix xcor to get lowest non zero value above 50
- Fix references to display in Multiqc report
- Update astrocyte testing to 0.2.0
## [publish_1.0.6 ] - 2019-05-31
### Added
......
# **CHIPseq Manual**
## Version 1.0.6
## May 31, 2019
# **ChIP-seq Manual**
## Version 1.1.2
## June 21, 2020
# BICF ChIP-seq Pipeline
[![Build Status](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/build.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
[![Coverage Report](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/coverage.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.24.0-brightgreen.svg
)](https://www.nextflow.io/)
[![Astrocyte](https://img.shields.io/badge/astrocyte-%E2%89%A50.1.0-blue.svg)](https://astrocyte-test.biohpc.swmed.edu/static/docs/index.html)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2648845.svg)](https://doi.org/10.5281/zenodo.2648845)
|*master*|*dev*|
|:-:|:-:|
|[![pipeline status](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/pipeline.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)|[![pipeline status](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/dev/pipeline.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/dev)|
|[![coverage report](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/coverage.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)|[![coverage report](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/dev/coverage.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/dev)|
[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.31.0-brightgreen)](https://www.nextflow.io/)
[![Astrocyte](https://img.shields.io/badge/astrocyte-%E2%89%A50.3.1-blue)](https://astrocyte-test.biohpc.swmed.edu/static/docs/index.html)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2648844.svg)](https://doi.org/10.5281/zenodo.2648844)
## Introduction
BICF ChIPseq is a bioinformatics best-practice analysis pipeline used for ChIP-seq (chromatin immunoprecipitation sequencing) data analysis at [BICF](http://www.utsouthwestern.edu/labs/bioinformatics/) at [UT Southwestern Department of Bioinformatics](http://www.utsouthwestern.edu/departments/bioinformatics/).
BICF ChIP-seq is a bioinformatics best-practice analysis pipeline used for ChIP-seq (chromatin immunoprecipitation sequencing) data analysis at [BICF](http://www.utsouthwestern.edu/labs/bioinformatics/) at [UT Southwestern Department of Bioinformatics](http://www.utsouthwestern.edu/departments/bioinformatics/).
The pipeline uses [Nextflow](https://www.nextflow.io), a bioinformatics workflow tool. It pre-processes raw data from FastQ inputs, aligns the reads and performs extensive quality-control on the results.
......@@ -58,7 +60,10 @@ $ git clone git@git.biohpc.swmed.edu:BICF/Astrocyte/chipseq_analysis.git
- --designFile '/path/to/file/design.txt',
- --genome 'GRCm38', 'GRCh38', or 'GRCh37' (if you need to use another genome contact the [BICF](mailto:BICF@UTSouthwestern.edu))
- --pairedEnd 'true' or 'false' (where 'true' is PE and 'false' is SE; default 'false')
- --outDir (optional) path and folder name of the output data, example: /home2/s000000/Desktop/Chipseq_output (if not specficied will be under workflow/output/)
- --skipDiff 'true' or 'false' (where 'true' is skip differential peak and 'false' is do peak differential peak calling; default 'false')
- --skipMotif 'true' or 'false' (where 'true' is skip motif calling and 'false' is do motif calling; default 'false')
- --skipPlotProfile 'true' or 'false' (where 'true' is skip metageneplot for TSS and 'false' is do metageneplot for TSS; default 'false')
- --outDir (optional) path and folder name of the output data, example: /home2/s000000/Desktop/Chipseq_output (if not specified will be under workflow/output/)
## Pipeline
+ There are 11 steps to the pipeline
......@@ -73,6 +78,7 @@ $ git clone git@git.biohpc.swmed.edu:BICF/Astrocyte/chipseq_analysis.git
9. Annotate all peaks using ChipSeeker
10. Calculate Differential Binding Activity with DiffBind (If more than 1 rep in more than 1 experiment)
11. Use MEME-ChIP to find motifs in original peaks
12. Plot enrichment of signal around TSS
See [FLOWCHART](docs/flowchart.pdf)
......
......@@ -9,7 +9,7 @@
# A unique identifier for the workflow package, text/underscores only
name: 'chipseq_analysis_bicf'
# Who wrote this?
author: 'Holly Ruess, Spencer D. Barnes, Beibei Chen and Venkat Malladi'
author: 'Holly Ruess, Spencer D. Barnes, Jeremy A. Mathews, Beibei Chen and Venkat Malladi'
# A contact email address for questions
email: 'bicf@utsouthwestern.edu'
# A more informative title for the workflow package
......@@ -52,6 +52,7 @@ workflow_modules:
- 'R/3.3.2-gccmkl'
- 'meme/4.11.1-gcc-openmpi'
- 'pandoc/2.7'
- 'singularity/3.0.2'
# A list of parameters used by the workflow, defining how to present them,
......
No preview for this file type
......@@ -20,6 +20,7 @@ Report issues to the Bioinformatic Core Facility [BICF](mailto:BICF@UTSouthweste
9. Annotate all peaks using ChipSeeker
10. Calculate Differential Binding Activity with DiffBind (If more than 1 rep in more than 1 experiment)
11. Use MEME-ChIP to find motifs in original peaks
12. Plot enrichment of signal around TSS
## Workflow Parameters
......
......@@ -52,7 +52,7 @@
* Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
17. **BICF ChIP-seq Analysis Workflow**:
* Spencer D. Barnes, Holly Ruess, Jeremy A. Mathews, Beibei Chen, and Venkat S. Malladi. 2019. BICF ChIP-seq Analysis Workflow (publish_1.0.5). Zenodo. doi:[10.5281/zenodo.2648844](https://doi.org/10.5281/zenodo.2648844)
* Spencer D. Barnes, Holly Ruess, Jeremy A. Mathews, Beibei Chen, and Venkat S. Malladi. 2020. BICF ChIP-seq Analysis Workflow (publish_1.1.3). Zenodo. doi:[10.5281/zenodo.3986942](https://doi.org/10.5281/zenodo.3986942)
18. **Nextflow**:
* Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., and Notredame, C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology, 35(4), 316.
......
File added
File added
sample_id experiment_id biosample factor treatment replicate control_id fastq_read1
ENCLB497XZB ENCSR000DXB Panc1 H3K4me3 None 1 ENCLB304SBJ ENCFF001GBW.fastq.gz
ENCLB304SBJ ENCSR000DXC Panc1 Control None 1 ENCLB304SBJ ENCFF001HWJ.fastq.gz
......@@ -25,3 +25,9 @@ wget https://www.encodeproject.org/files/ENCFF161HBP/@@download/ENCFF161HBP.fast
wget https://www.encodeproject.org/files/ENCFF776KZU/@@download/ENCFF776KZU.fastq.gz
wget https://www.encodeproject.org/files/ENCFF119KHM/@@download/ENCFF119KHM.fastq.gz
echo "Done with Paired-end"
echo "Downloading Single-end data set Human ENCSR000DXB and ENCSR000DXC"
wget https://www.encodeproject.org/files/ENCFF001GBW/@@download/ENCFF001GBW.fastq.gz
wget https://www.encodeproject.org/files/ENCFF001GBV/@@download/ENCFF001GBV.fastq.gz
wget https://www.encodeproject.org/files/ENCFF001HWJ/@@download/ENCFF001HWJ.fastq.gz
echo "Done with Single-end"
Test.20.tagAlign.gz 18588987 0,20,33 0.211525291335199,0.211232019956852,0.211139666755398 35 0.2123067 1500 0.209429 1.01001 0.7284536 0
......@@ -2,6 +2,7 @@ process {
executor = 'slurm'
queue = 'super'
clusterOptions = '--hold'
beforeScript= 'ulimit -Ss unlimited'
// Process specific configuration
withName: checkDesignFile {
......@@ -65,7 +66,7 @@ process {
cpus = 32
}
withName: multiqcReport {
module = ['python/3.6.1-2-anaconda', 'pandoc/2.7', 'multiqc/1.7']
module = ['python/3.6.1-2-anaconda', 'pandoc/2.7', 'singularity/3.0.2']
executor = 'local'
}
}
......@@ -74,25 +75,28 @@ params {
// Reference file paths on BioHPC
genomes {
'GRCh38' {
bwa = '/project/shared/bicf_workflow_ref/GRCh38'
bwa = '/project/shared/bicf_workflow_ref/human/GRCh38'
genomesize = 'hs'
chromsizes = '/project/shared/bicf_workflow_ref/GRCh38/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/GRCh38/genome.fa'
gtf = '/project/shared/bicf_workflow_ref/GRCh38/gencode.gtf'
chromsizes = '/project/shared/bicf_workflow_ref/human/GRCh38/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/human/GRCh38/genome.fa'
gtf = '/project/shared/bicf_workflow_ref/human/GRCh38/gencode.v25.chr_patch_hapl_scaff.annotation.gtf'
geneNames = '/project/shared/bicf_workflow_ref/human/GRCh38/genenames.txt'
}
'GRCh37' {
bwa = '/project/shared/bicf_workflow_ref/GRCh37'
bwa = '/project/shared/bicf_workflow_ref/human/GRCh37'
genomesize = 'hs'
chromsizes = '/project/shared/bicf_workflow_ref/GRCh37/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/GRCh37/genome.fa'
gtf = '/project/shared/bicf_workflow_ref/GRCh37/gencode.gtf'
chromsizes = '/project/shared/bicf_workflow_ref/human/GRCh37/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/human/GRCh37/genome.fa'
gtf = '/project/shared/bicf_workflow_ref/human/GRCh37/gencode.v19.chr_patch_hapl_scaff.annotation.gtf'
geneNames = '/project/shared/bicf_workflow_ref/human/GRCh37/genenames.txt'
}
'GRCm38' {
bwa = '/project/shared/bicf_workflow_ref/GRCm38'
bwa = '/project/shared/bicf_workflow_ref/mouse/GRCm38'
genomesize = 'mm'
chromsizes = '/project/shared/bicf_workflow_ref/GRCm38/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/GRCm38/genome.fa'
gtf = '/project/shared/bicf_workflow_ref/GRCm38/gencode.gtf'
chromsizes = '/project/shared/bicf_workflow_ref/mouse/GRCm38/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/mouse/GRCm38/genome.fa'
gtf = '/project/shared/bicf_workflow_ref/mouse/GRCm38/gencode.vM20.annotation.gtf'
geneNames = '/project/shared/bicf_workflow_ref/mouse/GRCm38/genenames.txt'
}
}
}
......
......@@ -28,26 +28,43 @@ params.skipMotif = false
params.skipPlotProfile = false
params.references = "$baseDir/../docs/references.md"
params.multiqc = "$baseDir/conf/multiqc_config.yaml"
params.ci = false
params.dev = false
// Assign variables if astrocyte
if (params.astrocyte) {
print("Running under astrocyte")
referenceLocation = "/project/shared/bicf_workflow_ref"
params.bwaIndex = "$referenceLocation/$params.genome"
params.chromSizes = "$referenceLocation/$params.genome/genomefile.txt"
params.fasta = "$referenceLocation/$params.genome/genome.fa"
params.gtf = "$referenceLocation/$params.genome/gencode.gtf"
if (params.genome == 'GRCh37' || params.genome == 'GRCh38') {
if (params.genome == 'GRCh37') {
params.bwaIndex = "$referenceLocation/human/$params.genome"
params.chromSizes = "$referenceLocation/human/$params.genome/genomefile.txt"
params.fasta = "$referenceLocation/human/$params.genome/genome.fa"
params.gtf = "$referenceLocation/human/$params.genome/gencode.v19.chr_patch_hapl_scaff.annotation.gtf"
params.geneNames = "$referenceLocation/human/$params.genome/genenames.txt"
params.genomeSize = 'hs'
} else if (params.genome == 'GRCm38') {
params.bwaIndex = "$referenceLocation/mouse/$params.genome"
params.chromSizes = "$referenceLocation/mouse/$params.genome/genomefile.txt"
params.fasta = "$referenceLocation/mouse/$params.genome/genome.fa"
params.gtf = "$referenceLocation/mouse/$params.genome/gencode.vM20.annotation.gtf"
params.geneNames = "$referenceLocation/mouse/$params.genome/genenames.txt"
params.genomeSize = 'mm'
} else if (params.genome == 'GRCh38') {
params.bwaIndex = "$referenceLocation/human/$params.genome"
params.chromSizes = "$referenceLocation/human/$params.genome/genomefile.txt"
params.fasta = "$referenceLocation/human/$params.genome/genome.fa"
params.gtf = "$referenceLocation/human/$params.genome/gencode.v25.chr_patch_hapl_scaff.annotation.gtf"
params.geneNames = "$referenceLocation/human/$params.genome/genenames.txt"
params.genomeSize = 'hs'
}
} else {
params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false
params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?: false : false
params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
params.gtf = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
params.gtf = params.genome ? params.genomes[ params.genome ].gtf ?: false : false
params.geneNames = params.genome ? params.genomes[ params.genome ].geneNames ?: false : false
}
......@@ -84,7 +101,33 @@ skipMotif = params.skipMotif
skipPlotProfile = params.skipPlotProfile
references = params.references
multiqc = params.multiqc
gtfFile = Channel.fromPath(params.gtf)
gtfFile = params.gtf
geneNames = params.geneNames
/*
* trackStart: track start of pipeline
*/
process trackStart {
script:
"""
hostname
ulimit -a
curl -H 'Content-Type: application/json' -X PUT -d '{ \
"sessionId": "${workflow.sessionId}", \
"pipeline": "chipseq_analysis", \
"start": "${workflow.start}", \
"astrocyte": ${params.astrocyte}, \
"status": "started", \
"nextflowVersion": "${workflow.nextflow.version}", \
"pipelineVersion": "1.1.2", \
"ci": ${params.ci}, \
"dev": ${params.dev}}' \
"https://xku43pcwnf.execute-api.us-east-1.amazonaws.com/ProdDeploy/pipeline-tracking"
"""
}
// Check design file for errors
process checkDesignFile {
......@@ -468,8 +511,7 @@ process plotProfile {
input:
file ("*.pooled.fc_signal.bw") from bigwigs.collect()
file gtf from gtfFile
file bigWigList from bigwigs.collect()
output:
......@@ -482,7 +524,7 @@ process plotProfile {
script:
"""
module load deeptools/2.5.0.1
bash $baseDir/scripts/plotProfile.sh
bash $baseDir/scripts/plot_profile.sh -g $gtfFile
"""
}
......@@ -534,7 +576,7 @@ process peakAnnotation {
"""
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $genome
Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $gtfFile $geneNames
"""
}
......@@ -637,12 +679,12 @@ process multiqcReport {
"""
module load python/3.6.1-2-anaconda
module load pandoc/2.7
module load multiqc/1.7
module load singularity/3.0.2
echo $workflow.nextflow.version > version_nextflow.txt
multiqc --version > version_multiqc.txt
singularity exec /project/shared/bicf_workflow_ref/singularity_images/bicf-multiqc-2.0.0.img multiqc --version > version_multiqc.txt
python --version &> version_python.txt
python3 $baseDir/scripts/generate_references.py -r $references -o software_references
python3 $baseDir/scripts/generate_versions.py -o software_versions
multiqc -c $multiqc .
singularity exec /project/shared/bicf_workflow_ref/singularity_images/bicf-multiqc-2.0.0.img multiqc -c $multiqc .
"""
}
......@@ -4,11 +4,28 @@ profiles {
}
}
trace {
enabled = true
file = 'pipeline_trace.txt'
fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
}
timeline {
enabled = true
file = 'timeline.html'
}
report {
enabled = true
file = 'report.html'
}
manifest {
name = 'chipseq_analysis'
description = 'BICF ChIP-seq Analysis Workflow.'
homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis'
version = '1.0.6'
version = '1.1.2'
mainScript = 'main.nf'
nextflowVersion = '>=0.31.0'
}
......@@ -6,40 +6,27 @@
#* --------------------------------------------------------------------------
#*
#Currently Human or Mouse
# Load libraries
library("ChIPseeker")
# Currently mouse or human
library("TxDb.Hsapiens.UCSC.hg19.knownGene")
library("TxDb.Mmusculus.UCSC.mm10.knownGene")
library("TxDb.Hsapiens.UCSC.hg38.knownGene")
library("org.Hs.eg.db")
library("org.Mm.eg.db")
library(GenomicFeatures)
# Create parser object
args <- commandArgs(trailingOnly=TRUE)
# Check input args
if (length(args) != 2) {
stop("Usage: annotate_peaks.R annotate_design.tsv genome_assembly", call.=FALSE)
if (length(args) != 3) {
stop("Usage: annotate_peaks.R annotate_design.tsv gtf geneNames", call.=FALSE)
}
design_file <- args[1]
genome_assembly <- args[2]
gtf <- args[2]
geneNames <- args[3]
# Load UCSC Known Genes
if(genome_assembly=='GRCh37') {
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
annodb <- 'org.Hs.eg.db'
} else if(genome_assembly=='GRCm38') {
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
annodb <- 'org.Mm.eg.db'
} else if(genome_assembly=='GRCh38') {
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
annodb <- 'org.Hs.eg.db'
}
txdb <- makeTxDbFromGFF(gtf)
sym <- read.table(geneNames, header=T, sep='\t') [,4:5]
# Output version of ChIPseeker
chipseeker_version = packageVersion('ChIPseeker')
......@@ -54,18 +41,19 @@ names(files) <- design$Condition
# Granges of files
peaks <- lapply(files, readPeakFile, as = "GRanges", header = FALSE)
peakAnnoList <- lapply(peaks, annotatePeak, TxDb=txdb, annoDb=annodb, tssRegion=c(-3000, 3000), verbose=FALSE)
peakAnnoList <- lapply(peaks, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE)
column_names <- c("chr", "start", "end", "width", "strand_1", "name", "score", "strand", "signalValue",
column_names <- c("geneId","chr", "start", "end", "width", "strand_1", "name", "score", "strand", "signalValue",
"pValue", "qValue", "peak", "annotation", "geneChr", "geneStart", "geneEnd",
"geneLength" ,"geneStrand", "geneId", "transcriptId", "distanceToTSS",
"ENSEMBL", "symbol", "geneName")
"geneLength" ,"geneStrand", "transcriptId", "distanceToTSS", "symbol")
for(index in c(1:length(peakAnnoList))) {
filename <- paste(names(peaks)[index], ".chipseeker_annotation.tsv", sep="")
df <- as.data.frame(peakAnnoList[[index]])
colnames(df) <- column_names
write.table(df[ , !(names(df) %in% c('strand_1'))], filename, sep="\t" ,quote=F, row.names=F)
df$geneId <- sapply(strsplit(as.character(df$geneId), split = "\\."), "[[", 1)
df_final <- merge(df, sym, by.x="geneId", by.y="ensembl", all.x=T)
colnames(df_final) <- column_names
write.table(df_final[ , !(names(df_final) %in% c('strand_1'))], filename, sep="\t" ,quote=F, row.names=F)
# Draw individual plots
......
......@@ -138,8 +138,20 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes)
with open(xcor, 'r') as xcor_fh:
firstline = xcor_fh.readline()
frag_lengths = firstline.split()[2] # third column
fragment_length = frag_lengths.split(',')[0] # grab first value
logger.info("Fraglen %s", fragment_length)
frag_lengths_array = frag_lengths.split(',')
fragment_length = 0
fragment = False
# Loop through all values of fragment length
for f in frag_lengths.split(','):
fragment_length = f
logger.info("Fraglen %s", fragment_length)
if int(fragment_length) > 50:
fragment = True
break
if fragment == False:
logger.info('Error in cross-correlation analysis: %s', frag_lengths_array)
raise Exception("Error in cross-correlation analysis: %s" % frag_lengths_array)
# Generate narrow peaks and preliminary signal tracks
......
......@@ -46,7 +46,7 @@ SOFTWARE_REGEX = {
'MEME-ChIP': ['motifSearch_vf/version_memechip.txt', r"Version (\S+)"],
'DiffBind': ['diffPeaks_vf/version_DiffBind.txt', r"Version (\S+)\""],
'deepTools': ['experimentQC_vf/version_deeptools.txt', r"deeptools (\S+)"],
'Python': ['version_python.txt', r"python, version (\S+)"],
'Python': ['version_python.txt', r"Python (\S+)"],
'MultiQC': ['version_multiqc.txt', r"multiqc, version (\S+)"],
}
......