Skip to content
Snippets Groups Projects
Commit 3482c761 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Merge branch '24-astrocyte' into 'master'

Resolve "Test Astrocyte"

Closes #24

See merge request !25
parents a7b7e90c 34b002a0
Branches
Tags
1 merge request!25Resolve "Test Astrocyte"
Pipeline #3579 failed with stages
in 4 minutes and 59 seconds
......@@ -6,6 +6,7 @@ before_script:
stages:
- unit
- astrocyte
- single
- multiple
- skip
......@@ -16,12 +17,22 @@ user_configuration:
- pytest -m unit
- pytest -m unit --cov=./workflow/scripts
astrocyte:
stage: astrocyte
script:
- module load astrocyte/0.1.0
- module unload nextflow
- cd ..
- astrocyte_cli validate chipseq_analysis
artifacts:
expire_in: 2 days
single_end_mouse:
stage: single
only:
- master
script:
- nextflow run workflow/main.nf -resume
- nextflow run workflow/main.nf --astrocyte 'false' -resume
- pytest -m singleend
artifacts:
expire_in: 2 days
......@@ -33,7 +44,7 @@ paired_end_human:
except:
- master
script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true -resume
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte 'false' -resume
- pytest -m pairedend
artifacts:
expire_in: 2 days
......@@ -45,7 +56,7 @@ single_end_diff:
except:
- master
script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' -resume
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --astrocyte 'false' -resume
- pytest -m singlediff
artifacts:
expire_in: 2 days
......@@ -55,7 +66,7 @@ paired_end_diff:
- master
stage: multiple
script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true -resume
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte 'false' -resume
- pytest -m paireddiff
artifacts:
expire_in: 2 days
......@@ -65,7 +76,7 @@ single_end_skip:
only:
- master
script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff true --skipMotif true -resume
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff true --skipMotif true --astrocyte 'false' -resume
- pytest -m singleskip_true
artifacts:
expire_in: 2 days
......@@ -11,7 +11,7 @@ name: 'chipseq_analysis_bicf'
# Who wrote this?
author: 'Beibei Chen and Venkat Malladi'
# A contact email address for questions
email: 'biohpc-help@utsouthwestern.edu'
email: 'bicf@utsouthwestern.edu'
# A more informative title for the workflow package
title: 'BICF ChIP-seq Analysis Workflow'
# A summary of the workflow package in plain text
......@@ -27,7 +27,7 @@ description: |
# web interface. These files are in the 'docs' subdirectory. The first file
# listed will be used as a documentation index and is index.md by convention
documentation_files:
- ['index.md', 'chipseq-analysis']
- 'index.md'
# -----------------------------------------------------------------------------
# NEXTFLOW WORKFLOW CONFIGURATION
......@@ -42,15 +42,17 @@ workflow_modules:
- 'python/3.6.1-2-anaconda'
- 'trimgalore/0.4.1'
- 'bwa/intel/0.7.12'
- 'samtools/1.6'
- 'sambamba/0.6.6'
- 'bedtools/2.26.0'
- 'deeptools/2.5.0.1'
- 'phantompeakqualtools/1.2'
- 'macs/2.1.0-20151222'
- 'UCSC_userApps/v317'
- 'R/3.4.1-gccmkl'
- 'R/3.3.2-gccmkl'
- 'meme/4.11.1-gcc-openmpi'
- 'python/2.7.x-anaconda'
- 'pandoc/2.7'
# A list of parameters used by the workflow, defining how to present them,
# options etc in the web interface. For each parameter:
......@@ -92,7 +94,8 @@ workflow_parameters:
description: |
One or more input FASTQ files from a ChIP-seq expereiment and a design
file with the link bewetwen the same file name and sample id
regex: ".*(fastq|fq)*"
regex: ".*(fastq|fq)*gz"
min: 2
- id: pairedEnd
type: select
......@@ -113,18 +116,27 @@ workflow_parameters:
description: |
A design file listing sample id, fastq files, corresponding control id
and additional information about the sample.
regex: ".*tsv"
regex: ".*txt"
- id: genome
type: select
required: true
choices:
- [ 'GRCh38', 'Human GRCh38']
- [ 'GRCh37', 'Human GRCh37']
- [ 'GRCm38', 'Mouse GRCm38']
required: true
description: |
Reference species and genome used for alignment and subsequent analysis.
- id: astrocyte
type: select
choices:
- [ 'true', 'true' ]
required: true
default: 'true'
description: |
Ensure configuraton for astrocyte.
# -----------------------------------------------------------------------------
# SHINY APP CONFIGURATION
......@@ -144,8 +156,4 @@ vizapp_cran_packages:
# List of any Bioconductor packages, not provided by the modules,
# that must be made available to the vizapp
vizapp_bioc_packages:
- qusage
# - ballgown
vizapp_github_packages:
- js229/Vennerable
vizapp_bioc_packages: []
## Create new env in specific folder
```shell
conda create -p /project/shared/bicf_workflow_ref/chipseq_bchen4/ -c r r-essentials
#Add channels
conda config --add channels conda-forge
conda config --add channels r
conda config --add channels bioconda
pip install --user twobitreader
conda install -c r r-xml
```
Install bioconductor in R console:
```R
source("http://bioconductor.org/biocLite.R")
biocLite()
biocLite(c("DiffBind","ChIPseeker"))
```
\ No newline at end of file
......@@ -5,22 +5,39 @@
// Define Input variables
params.reads = "$baseDir/../test_data/*.fastq.gz"
params.pairedEnd = false
params.pairedEnd = 'false'
params.designFile = "$baseDir/../test_data/design_ENCSR238SGC_SE.txt"
params.genome = 'GRCm38'
params.genomes = []
params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false
params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?: false : false
params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
params.cutoffRatio = 1.2
params.outDir= "$baseDir/output"
params.extendReadsLen = 100
params.topPeakCount = 600
params.astrocyte = 'false'
params.skipDiff = false
params.skipMotif = false
params.references = "$baseDir/../docs/references.md"
// Assign variables if astrocyte
if (params.astrocyte) {
print("Running under astrocyte")
referenceLocation = "/project/shared/bicf_workflow_ref"
params.bwaIndex = "$referenceLocation/$genome"
params.chromSizes = "$referenceLocation/$genome/genomefile.txt"
params.fasta = "$referenceLocation/$genome/genome.fa.txt"
if (params.genome == 'GRCh37' || params.genome == 'GRCh38') {
params.genomeSize = 'hs'
} else if (params.chromSizes == 'GRCm38') {
params.genomeSize = 'mm'
}
} else {
params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false
params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?: false : false
params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
}
// Check inputs
if( params.bwaIndex ){
bwaIndex = Channel
......@@ -38,7 +55,6 @@ readsList = Channel
.collectFile( name: 'fileList.tsv', newLine: true )
// Define regular variables
pairedEnd = params.pairedEnd
designFile = params.designFile
genomeSize = params.genomeSize
genome = params.genome
......@@ -52,6 +68,12 @@ skipDiff = params.skipDiff
skipMotif = params.skipMotif
references = params.references
if (params.pairedEnd == 'false'){
pairedEnd = false
} else {
pairedEnd = true
}
// Check design file for errors
process checkDesignFile {
......@@ -70,11 +92,13 @@ process checkDesignFile {
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
python3 $baseDir/scripts/check_design.py -d $designFile -f $readsList -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
python $baseDir/scripts/check_design.py -d $designFile -f $readsList
"""
}
......@@ -112,11 +136,15 @@ process trimReads {
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load trimgalore/0.4.1
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -s $sampleId -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
module load trimgalore/0.4.1
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} -s $sampleId
"""
}
......@@ -126,6 +154,7 @@ process trimReads {
// Align trimmed reads using bwa
process alignReads {
queue '128GB,256GB,256GBv1'
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
......@@ -144,11 +173,17 @@ process alignReads {
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load bwa/intel/0.7.12
module load samtools/1.6
python3 $baseDir/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -s $sampleId -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
module load bwa/intel/0.7.12
module load samtools/1.6
python3 $baseDir/scripts/map_reads.py -f $reads -r ${index}/genome.fa -s $sampleId
"""
}
......@@ -158,6 +193,7 @@ process alignReads {
// Dedup reads using sambamba
process filterReads {
queue '128GB,256GB,256GBv1'
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
......@@ -178,11 +214,19 @@ process filterReads {
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load samtools/1.6
module load sambamba/0.6.6
module load bedtools/2.26.0
python3 $baseDir/scripts/map_qc.py -b $mapped -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
module load samtools/1.6
module load sambamba/0.6.6
module load bedtools/2.26.0
python3 $baseDir/scripts/map_qc.py -b $mapped
"""
}
......@@ -199,6 +243,7 @@ dedupReads
// Quality Metrics using deeptools
process experimentQC {
queue '128GB,256GB,256GBv1'
publishDir "$outDir/${task.process}", mode: 'copy'
input:
......@@ -213,6 +258,8 @@ process experimentQC {
script:
"""
module load python/3.6.1-2-anaconda
module load deeptools/2.5.0.1
python3 $baseDir/scripts/experiment_qc.py -d $dedupDesign -e $extendReadsLen
"""
......@@ -221,6 +268,7 @@ process experimentQC {
// Convert reads to bam
process convertReads {
queue '128GB,256GB,256GBv1'
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
......@@ -237,11 +285,17 @@ process convertReads {
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load samtools/1.6
module load bedtools/2.26.0
python3 $baseDir/scripts/convert_reads.py -b $deduped -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
module load samtools/1.6
module load bedtools/2.26.0
python3 $baseDir/scripts/convert_reads.py -b $deduped
"""
}
......@@ -268,6 +322,8 @@ process crossReads {
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load phantompeakqualtools/1.2
python3 $baseDir/scripts/xcor.py -t $seTagAlign -p
"""
}
......@@ -301,6 +357,7 @@ process defineExpDesignFiles {
script:
"""
module load python/3.6.1-2-anaconda
python3 $baseDir/scripts/experiment_design.py -d $xcorDesign
"""
......@@ -326,11 +383,13 @@ process poolAndPsuedoReads {
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio
"""
}
......@@ -361,11 +420,21 @@ process callPeaksMACS {
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load macs/2.1.0-20151222
module load UCSC_userApps/v317
module load bedtools/2.26.0
module load phantompeakqualtools/1.2
python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
module load macs/2.1.0-20151222
module load UCSC_userApps/v317
module load bedtools/2.26.0
module load phantompeakqualtools/1.2
python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes
"""
}
......@@ -401,6 +470,8 @@ process consensusPeaks {
script:
"""
module load python/3.6.1-2-anaconda
module load bedtools/2.26.0
python3 $baseDir/scripts/overlap_peaks.py -d $peaksDesign -f $preDiffDesign
"""
......@@ -423,6 +494,7 @@ process peakAnnotation {
script:
"""
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $genome
"""
......@@ -444,11 +516,13 @@ process motifSearch {
file('version_*.txt') into motifSearchVersions
when:
!skipMotif
script:
"""
module load R/3.3.2-gccmkl
python3 $baseDir/scripts/motif_search.py -d $designMotifSearch -g $fasta -p $topPeakCount
"""
}
......@@ -476,10 +550,15 @@ process diffPeaks {
file('version_*.txt') into diffPeaksVersions
when:
noUniqueExperiments > 1 && !skipDiff
script:
"""
module load python/3.6.1-2-anaconda
module load meme/4.11.1-gcc-openmpi
module load bedtools/2.26.0
Rscript $baseDir/scripts/diff_peaks.R $designDiffPeaks
"""
}
......@@ -514,6 +593,5 @@ process softwareReport {
echo $workflow.nextflow.version > version_nextflow.txt
python3 $baseDir/scripts/generate_references.py -r $references -o software_references
python3 $baseDir/scripts/generate_versions.py -o software_versions
"""
}
......@@ -3,3 +3,12 @@ profiles {
includeConfig 'conf/biohpc.config'
}
}
manifest {
name = 'chipseq_analysis'
description = 'BICF ChIP-seq Analysis Workflow.'
homePage = 'https://github.com/nf-core/rnaseq'
version = '1.0.0'
mainScript = 'main.nf'
nextflowVersion = '>=0.31.0'
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment