Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • BICF/Astrocyte/chipseq_analysis
  • s190984/chipseq_analysis
  • astrocyte/workflows/bicf/chipseq_analysis
  • s219741/chipseq-analysis-containerized
Show changes
Showing
with 1081 additions and 138 deletions
File added
File added
File added
File added
File added
File added
sample_id experiment_id biosample factor treatment replicate control_id fastq_read1
ENCLB144FDT ENCSR238SGC limb H3K4me1 None 1 ENCLB304SBJ ENCFF833BLU.fastq.gz
ENCLB831RUI ENCSR238SGC limb H3K4me1 None 2 ENCLB410VVO ENCFF646LXU.fastq.gz
ENCLB304SBJ ENCSR687ALB limb Control None 1 ENCLB304SBJ ENCFF524CAC.fastq.gz
ENCLB410VVO ENCSR687ALB limb Control None 2 ENCLB410VVO ENCFF163AJI.fastq.gz
sample_id experiment_id biosample factor treatment replicate control_id fastq_read1 fastq_read2
ENCLB637LZP ENCSR729LGA MCF-7 SP1 None 1 ENCLB678IDC ENCFF957SQS.fastq.gz ENCFF582IOZ.fastq.gz
ENCLB568IYX ENCSR729LGA MCF-7 SP1 None 2 ENCLB336TVW ENCFF330MCZ.fastq.gz ENCFF293YFE.fastq.gz
ENCLB678IDC ENCSR217LRF MCF-7 Control None 1 ENCLB678IDC ENCFF002DTU.fastq.gz ENCFF002EFI.fastq.gz
ENCLB336TVW ENCSR217LRF MCF-7 Control None 2 ENCLB336TVW ENCFF002EFG.fastq.gz ENCFF002DTS.fastq.gz
sample_id experiment_id biosample factor treatment replicate control_id fastq_read1 fastq_read2
ENCLB637LZP ENCSR729LGA MCF-7 SP1 None 1 ENCLB678IDC ENCFF957SQS.fastq.gz ENCFF582IOZ.fastq.gz
ENCLB568IYX ENCSR729LGA MCF-7 SP1 None 2 ENCLB336TVW ENCFF330MCZ.fastq.gz ENCFF293YFE.fastq.gz
ENCLB678IDC ENCSR217LRF MCF-7 Control None 1 ENCLB678IDC ENCFF002DTU.fastq.gz ENCFF002EFI.fastq.gz
ENCLB336TVW ENCSR217LRF MCF-7 Control None 2 ENCLB336TVW ENCFF002EFG.fastq.gz ENCFF002DTS.fastq.gz
ENCLB552ACZ ENCSR757EMK MCF-7 SUZ12 None 1 ENCLB678IDC ENCFF833EZX.fastq.gz ENCFF161HBP.fastq.gz
ENCLB872TQR ENCSR757EMK MCF-7 SUZ12 None 2 ENCLB336TVW ENCFF776KZU.fastq.gz ENCFF119KHM.fastq.gz
sample_id experiment_id biosample factor treatment replicate control_id fastq_read1
ENCLB144FDT ENCSR238SGC limb H3K4me1 None 1 ENCLB304SBJ ENCFF833BLU.fastq.gz
ENCLB831RUI ENCSR238SGC limb H3K4me1 None 2 ENCLB410VVO ENCFF646LXU.fastq.gz
ENCLB304SBJ ENCSR687ALB limb Control None 1 ENCLB304SBJ ENCFF524CAC.fastq.gz
ENCLB410VVO ENCSR687ALB limb Control None 2 ENCLB410VVO ENCFF163AJI.fastq.gz
ENCLB140BPV ENCSR272GNQ midbrain H3K4me1 None 1 ENCLB841FLB ENCFF278VQW.fastq.gz
ENCLB785CNN ENCSR272GNQ midbrain H3K4me1 None 2 ENCLB735ZEL ENCFF466CFM.fastq.gz
ENCLB841FLB ENCSR842LMA midbrain Control None 1 ENCLB841FLB ENCFF914QXH.fastq.gz
ENCLB735ZEL ENCSR842LMA midbrain Control None 2 ENCLB735ZEL ENCFF942UQV.fastq.gz
sample_id experiment_id biosample factor treatment replicate control_id fastq_read1
ENCLB497XZB ENCSR000DXB Panc1 H3K4me3 None 1 ENCLB304SBJ ENCFF001GBW.fastq.gz
ENCLB304SBJ ENCSR000DXC Panc1 Control None 1 ENCLB304SBJ ENCFF001HWJ.fastq.gz
echo "Downloading Single-end data set Mouse ENCSR238SGC and ENCSR687ALB"
wget https://www.encodeproject.org/files/ENCFF833BLU/@@download/ENCFF833BLU.fastq.gz
wget https://www.encodeproject.org/files/ENCFF646LXU/@@download/ENCFF646LXU.fastq.gz
wget https://www.encodeproject.org/files/ENCFF524CAC/@@download/ENCFF524CAC.fastq.gz
wget https://www.encodeproject.org/files/ENCFF163AJI/@@download/ENCFF163AJI.fastq.gz
echo "Downloading Single-end data set Mouse ENCSR272GNQ and ENCSR842LMA"
wget https://www.encodeproject.org/files/ENCFF278VQW/@@download/ENCFF278VQW.fastq.gz
wget https://www.encodeproject.org/files/ENCFF466CFM/@@download/ENCFF466CFM.fastq.gz
wget https://www.encodeproject.org/files/ENCFF914QXH/@@download/ENCFF914QXH.fastq.gz
wget https://www.encodeproject.org/files/ENCFF942UQV/@@download/ENCFF942UQV.fastq.gz
echo "Done with Single-end"
echo "Downloading Paired-end data set Human ENCSR729LGA and ENCSR217LRF"
wget https://www.encodeproject.org/files/ENCFF957SQS/@@download/ENCFF957SQS.fastq.gz
wget https://www.encodeproject.org/files/ENCFF582IOZ/@@download/ENCFF582IOZ.fastq.gz
wget https://www.encodeproject.org/files/ENCFF330MCZ/@@download/ENCFF330MCZ.fastq.gz
wget https://www.encodeproject.org/files/ENCFF293YFE/@@download/ENCFF293YFE.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002DTU/@@download/ENCFF002DTU.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002EFI/@@download/ENCFF002EFI.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002EFG/@@download/ENCFF002EFG.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002DTS/@@download/ENCFF002DTS.fastq.gz
echo "Downloading Paired-end data set Human ENCSR757EMK"
wget https://www.encodeproject.org/files/ENCFF833EZX/@@download/ENCFF833EZX.fastq.gz
wget https://www.encodeproject.org/files/ENCFF161HBP/@@download/ENCFF161HBP.fastq.gz
wget https://www.encodeproject.org/files/ENCFF776KZU/@@download/ENCFF776KZU.fastq.gz
wget https://www.encodeproject.org/files/ENCFF119KHM/@@download/ENCFF119KHM.fastq.gz
echo "Done with Paired-end"
echo "Downloading Single-end data set Human ENCSR000DXB and ENCSR000DXC"
wget https://www.encodeproject.org/files/ENCFF001GBW/@@download/ENCFF001GBW.fastq.gz
wget https://www.encodeproject.org/files/ENCFF001GBV/@@download/ENCFF001GBV.fastq.gz
wget https://www.encodeproject.org/files/ENCFF001HWJ/@@download/ENCFF001HWJ.fastq.gz
echo "Done with Single-end"
Test.20.tagAlign.gz 18588987 0,20,33 0.211525291335199,0.211232019956852,0.211139666755398 35 0.2123067 1500 0.209429 1.01001 0.7284536 0
workflow/conf/bicf_logo.png

24.3 KiB

process {
executor = 'slurm'
queue = 'super'
clusterOptions = '--hold'
beforeScript= 'ulimit -Ss unlimited'
// Process specific configuration
withName: checkDesignFile {
module = ['python/3.6.1-2-anaconda']
executor = 'local'
}
withName: trimReads {
module = ['python/3.6.1-2-anaconda', 'trimgalore/0.4.1']
cpus = 32
}
withName: alignReads{
module = ['python/3.6.1-2-anaconda', 'bwa/intel/0.7.12', 'samtools/1.6']
queue = '128GB,256GB,256GBv1'
}
withName: filterReads{
module = ['python/3.6.1-2-anaconda', 'samtools/1.6', 'sambamba/0.6.6', 'bedtools/2.26.0']
queue = '128GB,256GB,256GBv1'
}
withName: experimentQC {
module = ['python/3.6.1-2-anaconda', 'deeptools/2.5.0.1']
queue = '128GB,256GB,256GBv1'
}
withName: convertReads {
module = ['python/3.6.1-2-anaconda', 'samtools/1.6', 'bedtools/2.26.0']
queue = '128GB,256GB,256GBv1'
}
withName: crossReads {
module = ['python/3.6.1-2-anaconda', 'phantompeakqualtools/1.2']
cpus = 32
}
withName: defineExpDesignFiles {
module = ['python/3.6.1-2-anaconda']
executor = 'local'
}
withName: poolAndPsuedoReads {
module = ['python/3.6.1-2-anaconda']
executor = 'local'
}
withName: callPeaksMACS {
module = ['python/3.6.1-2-anaconda', 'macs/2.1.0-20151222', 'UCSC_userApps/v317', 'bedtools/2.26.0', 'phantompeakqualtools/1.2']
queue = '128GB,256GB,256GBv1'
}
withName: plotProfile {
module = ['deeptools/2.5.0.1']
cpus = 32
}
withName: consensusPeaks {
module = ['python/3.6.1-2-anaconda', 'bedtools/2.26.0']
executor = 'local'
}
withName: peakAnnotation {
module = ['R/3.3.2-gccmkl']
executor = 'local'
}
withName: diffPeaks {
module = ['R/3.3.2-gccmkl']
cpus = 32
}
withName: motifSearch {
module = ['python/3.6.1-2-anaconda', 'meme/4.11.1-gcc-openmpi', 'bedtools/2.26.0']
cpus = 32
}
withName: multiqcReport {
module = ['python/3.6.1-2-anaconda', 'pandoc/2.7', 'singularity/3.0.2']
executor = 'local'
}
}
params {
// Reference file paths on BioHPC
genomes {
'GRCh38' {
bwa = '/project/shared/bicf_workflow_ref/human/GRCh38'
genomesize = 'hs'
chromsizes = '/project/shared/bicf_workflow_ref/human/GRCh38/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/human/GRCh38/genome.fa'
gtf = '/project/shared/bicf_workflow_ref/human/GRCh38/gencode.v25.chr_patch_hapl_scaff.annotation.gtf'
geneNames = '/project/shared/bicf_workflow_ref/human/GRCh38/genenames.txt'
}
'GRCh37' {
bwa = '/project/shared/bicf_workflow_ref/human/GRCh37'
genomesize = 'hs'
chromsizes = '/project/shared/bicf_workflow_ref/human/GRCh37/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/human/GRCh37/genome.fa'
gtf = '/project/shared/bicf_workflow_ref/human/GRCh37/gencode.v19.chr_patch_hapl_scaff.annotation.gtf'
geneNames = '/project/shared/bicf_workflow_ref/human/GRCh37/genenames.txt'
}
'GRCm38' {
bwa = '/project/shared/bicf_workflow_ref/mouse/GRCm38'
genomesize = 'mm'
chromsizes = '/project/shared/bicf_workflow_ref/mouse/GRCm38/genomefile.txt'
fasta = '/project/shared/bicf_workflow_ref/mouse/GRCm38/genome.fa'
gtf = '/project/shared/bicf_workflow_ref/mouse/GRCm38/gencode.vM20.annotation.gtf'
geneNames = '/project/shared/bicf_workflow_ref/mouse/GRCm38/genenames.txt'
}
}
}
trace {
enabled = true
file = 'pipeline_trace.txt'
fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
}
timeline {
enabled = true
file = 'timeline.html'
}
report {
enabled = true
file = 'report.html'
}
# Custom Logo
custom_logo: 'bicf_logo.png'
custom_logo_url: 'https://www.utsouthwestern.edu/labs/bioinformatics/'
custom_logo_title: 'Bioinformatics Core Facility'
report_header_info:
- Contact E-mail: 'bicf@utsouthwestern.edu'
- Application Type: 'ChIP-seq'
- Department: 'Bioinformatic Core Facility, Department of Bioinformatics'
- Contributors and Licensing: 'See https://doi.org/10.5281/zenodo.2648844'
# Title to use for the report.
title: BICF ChIP-seq Analysis Report
report_comment: >
This report has been generated by the <a href="https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/" target="_blank">BICF/chipseq_analysis</a>
pipeline.
extra_fn_clean_exts:
- 'pbc.qc'
- 'cc.qc'
fn_ignore_files:
- '*dedup.flagstat.qc'
custom_data:
library_complexity:
file_format: 'tsv'
id: 'library_complexity'
contents: 'TotalReadPairs DistinctReadPairs OneReadPair TwoReadPairs NRF PBC1 PBC2'
section_name: 'Library complexity'
plot_type: 'generalstats'
pconfig:
TotalReadPairs:
decimalPlaces: 0
shared_key: read_count
DistinctReadPairs:
decimalPlaces: 0
shared_key: read_count
NRF:
decimalPlaces: 2
PBC1:
decimalPlaces: 2
PBC2:
decimalPlaces: 2
sp:
phantompeakqualtools/out:
fn: '*cc.qc'
library_complexity:
fn: '*pbc.qc'
macs2:
fn: '*_peaks.xls'
report_section_order:
cutadapt:
order: -1000
Samtools:
order: -1100
Software_Versions:
order: -1200
Software_References:
order: -1300
table_columns_placement:
library_complexity:
TotalReadPairs: 1100
DistinctReadPairs: 1200
NRF: 1300
PBC1: 1400
PBC2: 1500
phantompeakqualtools:
Estimated_Fragment_Length_bp: 1600
NSC: 1700
RSC: 1800
table_columns_visible:
cutadapt:
percent_trimmed: False
library_complexity:
OneReadPair: False
TwoReadPairs: False
table_cond_formatting_rules:
library_complexity_mqc-generalstats-library_complexity-NRF:
pass:
- gt: 0.8
warn:
- lt: 0.8
fail:
- lt: 0.5
library_complexity_mqc-generalstats-library_complexity-PBC1:
pass:
- gt: 0.8
warn:
- lt: 0.8
fail:
- lt: 0.5
library_complexity_mqc-generalstats-library_complexity-PBC2:
pass:
- gt: 3
warn:
- lt: 3
fail:
- lt: 1
thousandsSep_format: ''
#!/usr/bin/env nextflow
params.design="$baseDir/../test_data/samplesheet.csv"
params.bams = "$baseDir/../test_data/*.bam"
// params.bais = "$baseDir/../test_data/*.bai"
params.peaks = "$baseDir/../test_data/*.broadPeak"
params.genomepath="/project/shared/bicf_workflow_ref/GRCh37"
toppeakcount = 200
design_file = file(params.design)
deeptools_design = Channel.fromPath(params.design)
diffbind_design = Channel.fromPath(params.design)
chipseeker_design = Channel.fromPath(params.design)
meme_design = Channel.fromPath(params.design)
index_bams = Channel.fromPath(params.bams)
deeptools_bams = Channel.fromPath(params.bams)
deeptools_peaks = Channel.fromPath(params.peaks)
chipseeker_peaks = Channel.fromPath(params.peaks)
diffbind_bams = Channel.fromPath(params.bams)
diffbind_peaks = Channel.fromPath(params.peaks)
meme_peaks = Channel.fromPath(params.peaks)
// deeptools_bamindex = Channel.fromPath(params.bais)
// diffbind_bamindex = Channel.fromPath(params.bais)
process bamindex {
publishDir "$baseDir/output/", mode: 'copy'
input:
file index_bam_files from index_bams
output:
file "*bai" into deeptools_bamindex
file "*bai" into diffbind_bamindex
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
module load samtools/intel/1.3
samtools index $index_bam_files
"""
}
process run_deeptools {
publishDir "$baseDir/output", mode: 'copy'
input:
file deeptools_design_file from deeptools_design
file deeptools_bam_files from deeptools_bams.toList()
file deeptools_peak_files from deeptools_peaks.toList()
file deeptools_bam_indexes from deeptools_bamindex.toList()
output:
file "*deeptools*" into deeptools_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
module load deeptools/2.3.5
python $baseDir/scripts/runDeepTools.py -i ${params.design} -g ${params.genomepath}}
"""
}
process run_diffbind {
publishDir "$baseDir/output", mode: 'copy'
input:
file diffbind_design_file from diffbind_design
file diffbind_bam_files from diffbind_bams.toList()
file diffbind_peak_files from diffbind_peaks.toList()
file diffbind_bam_indexes from diffbind_bamindex.toList()
output:
file "diffpeak.design" into diffpeaksdesign_chipseeker
file "diffpeak.design" into diffpeaksdesign_meme
file "*_diffbind.bed" into diffpeaks_meme
file "*_diffbind.bed" into diffpeaks_chipseeker
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/runDiffBind.R $diffbind_design_file
"""
}
process run_chipseeker_diffpeak {
publishDir "$baseDir/output", mode: 'copy'
input:
file diffpeak_design_file from diffpeaksdesign_chipseeker
file diffpeaks from diffpeaks_chipseeker
output:
file "*chipseeker*" into chipseeker_diffpeak_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/runChipseeker.R $diffpeak_design_file ${params.genomepath}
"""
}
process run_chipseeker_originalpeak {
publishDir "$baseDir/output", mode: 'copy'
input:
file design_file from chipseeker_design
file chipseeker_peak_files from chipseeker_peaks.toList()
output:
file "*chipseeker*" into chipseeker_originalpeak_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/runChipseeker.R $design_file ${params.genomepath}
"""
}
process run_meme_original {
publishDir "$baseDir/output", mode: 'copy'
input:
file design_meme from meme_design
file meme_peak_files from meme_peaks.toList()
output:
file "*meme*" into meme_original_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
module load meme/4.11.1-gcc-openmpi
python $baseDir/scripts/runMemechip.py -i $design_meme -g ${params.genomepath} -l ${toppeakcount}
"""
}
process run_meme_diffpeak {
publishDir "$baseDir/output", mode: 'copy'
input:
file peaks_meme from diffpeaks_meme
file diffpeak_design from diffpeaksdesign_meme
output:
file "*meme*" into meme_diffpeak_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
module load meme/4.11.1-gcc-openmpi
python $baseDir/scripts/runMemechip.py -i $diffpeak_design -g ${params.genomepath} -l ${toppeakcount}
"""
/*
BICF ChIP-seq Analysis Workflow
#### Homepage / Documentation
https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/
Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/LICENSE.md)
*/
// Path to an input file, or a pattern for multiple inputs
// Note - $baseDir is the location of this workflow file main.nf
// Define Input variables
params.reads = "$baseDir/../test_data/*.fastq.gz"
params.pairedEnd = false
params.designFile = "$baseDir/../test_data/design_ENCSR238SGC_SE.txt"
params.genome = 'GRCm38'
params.cutoffRatio = 1.2
params.outDir= "$baseDir/output"
params.extendReadsLen = 100
params.topPeakCount = 600
params.astrocyte = false
params.skipDiff = false
params.skipMotif = false
params.skipPlotProfile = false
params.references = "$baseDir/../docs/references.md"
params.multiqc = "$baseDir/conf/multiqc_config.yaml"
params.ci = false
params.dev = false
// Assign variables if astrocyte
if (params.astrocyte) {
print("Running under astrocyte")
referenceLocation = "/project/shared/bicf_workflow_ref"
if (params.genome == 'GRCh37') {
params.bwaIndex = "$referenceLocation/human/$params.genome"
params.chromSizes = "$referenceLocation/human/$params.genome/genomefile.txt"
params.fasta = "$referenceLocation/human/$params.genome/genome.fa"
params.gtf = "$referenceLocation/human/$params.genome/gencode.v19.chr_patch_hapl_scaff.annotation.gtf"
params.geneNames = "$referenceLocation/human/$params.genome/genenames.txt"
params.genomeSize = 'hs'
} else if (params.genome == 'GRCm38') {
params.bwaIndex = "$referenceLocation/mouse/$params.genome"
params.chromSizes = "$referenceLocation/mouse/$params.genome/genomefile.txt"
params.fasta = "$referenceLocation/mouse/$params.genome/genome.fa"
params.gtf = "$referenceLocation/mouse/$params.genome/gencode.vM20.annotation.gtf"
params.geneNames = "$referenceLocation/mouse/$params.genome/genenames.txt"
params.genomeSize = 'mm'
} else if (params.genome == 'GRCh38') {
params.bwaIndex = "$referenceLocation/human/$params.genome"
params.chromSizes = "$referenceLocation/human/$params.genome/genomefile.txt"
params.fasta = "$referenceLocation/human/$params.genome/genome.fa"
params.gtf = "$referenceLocation/human/$params.genome/gencode.v25.chr_patch_hapl_scaff.annotation.gtf"
params.geneNames = "$referenceLocation/human/$params.genome/genenames.txt"
params.genomeSize = 'hs'
}
} else {
params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false
params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?: false : false
params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
params.gtf = params.genome ? params.genomes[ params.genome ].gtf ?: false : false
params.geneNames = params.genome ? params.genomes[ params.genome ].geneNames ?: false : false
}
// Check inputs
if( params.bwaIndex ){
bwaIndex = Channel
.fromPath(params.bwaIndex)
.ifEmpty { exit 1, "BWA index not found: ${params.bwaIndex}" }
} else {
exit 1, "No reference genome specified."
}
// Define List of Files
readsList = Channel
.fromPath( params.reads )
.flatten()
.map { file -> [ file.getFileName().toString(), file.toString() ].join("\t")}
.collectFile( name: 'fileList.tsv', newLine: true )
// Define regular variables
pairedEnd = params.pairedEnd
designFile = Channel.fromPath(params.designFile)
genomeSize = params.genomeSize
genome = params.genome
chromSizes = params.chromSizes
fasta = params.fasta
cutoffRatio = params.cutoffRatio
outDir = params.outDir
extendReadsLen = params.extendReadsLen
topPeakCount = params.topPeakCount
skipDiff = params.skipDiff
skipMotif = params.skipMotif
skipPlotProfile = params.skipPlotProfile
references = params.references
multiqc = params.multiqc
gtfFile = params.gtf
geneNames = params.geneNames
/*
* trackStart: track start of pipeline
*/
process trackStart {
script:
"""
hostname
ulimit -a
curl -H 'Content-Type: application/json' -X PUT -d '{ \
"sessionId": "${workflow.sessionId}", \
"pipeline": "chipseq_analysis", \
"start": "${workflow.start}", \
"astrocyte": ${params.astrocyte}, \
"status": "started", \
"nextflowVersion": "${workflow.nextflow.version}", \
"pipelineVersion": "1.1.2", \
"ci": ${params.ci}, \
"dev": ${params.dev}}' \
"https://xku43pcwnf.execute-api.us-east-1.amazonaws.com/ProdDeploy/pipeline-tracking"
"""
}
// Check design file for errors
process checkDesignFile {
publishDir "$outDir/design", mode: 'copy'
input:
file designFile
file readsList
output:
file("design.tsv") into designFilePaths
script:
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
python3 $baseDir/scripts/check_design.py -d $designFile -f $readsList -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
python $baseDir/scripts/check_design.py -d $designFile -f $readsList
"""
}
}
// Define channel for raw reads
if (pairedEnd) {
rawReads = designFilePaths
.splitCsv(sep: '\t', header: true)
.map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read2], row.experiment_id, row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] }
} else {
rawReads = designFilePaths
.splitCsv(sep: '\t', header: true)
.map { row -> [ row.sample_id, [row.fastq_read1], row.experiment_id, row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] }
}
// Trim raw reads using trimgalore
process trimReads {
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
input:
set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from rawReads
output:
set sampleId, file('*.fq.gz'), experimentId, biosample, factor, treatment, replicate, controlId into trimmedReads
file('*trimming_report.txt') into trimgaloreResults
file('version_*.txt') into trimReadsVersions
script:
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load trimgalore/0.4.1
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -s $sampleId -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
module load trimgalore/0.4.1
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} -s $sampleId
"""
}
}
// Align trimmed reads using bwa
process alignReads {
queue '128GB,256GB,256GBv1'
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
input:
set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from trimmedReads
file index from bwaIndex.first()
output:
set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into mappedReads
file '*.flagstat.qc' into mappedReadsStats
file('version_*.txt') into alignReadsVersions
script:
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load bwa/intel/0.7.12
module load samtools/1.6
python3 $baseDir/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -s $sampleId -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
module load bwa/intel/0.7.12
module load samtools/1.6
python3 $baseDir/scripts/map_reads.py -f $reads -r ${index}/genome.fa -s $sampleId
"""
}
}
// Dedup reads using sambamba
process filterReads {
queue '128GB,256GB,256GBv1'
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
input:
set sampleId, mapped, experimentId, biosample, factor, treatment, replicate, controlId from mappedReads
output:
set sampleId, file('*.bam'), file('*.bai'), experimentId, biosample, factor, treatment, replicate, controlId into dedupReads
set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into convertReads
file '*.flagstat.qc' into dedupReadsStats
file '*.pbc.qc' into dedupReadsComplexity
file '*.dedup.qc' into dupReads
file('version_*.txt') into filterReadsVersions
script:
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load samtools/1.6
module load sambamba/0.6.6
module load bedtools/2.26.0
python3 $baseDir/scripts/map_qc.py -b $mapped -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
module load samtools/1.6
module load sambamba/0.6.6
module load bedtools/2.26.0
python3 $baseDir/scripts/map_qc.py -b $mapped
"""
}
}
// Define channel collecting dedup reads into new design file
dedupReads
.map{ sampleId, bam, bai, experimentId, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$bam\t$bai\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_dedup.tsv', seed:"sample_id\tbam_reads\tbam_index\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$outDir/design")
.into { dedupDesign; preDiffDesign }
// Quality Metrics using deeptools
process experimentQC {
queue '128GB,256GB,256GBv1'
publishDir "$outDir/${task.process}", mode: 'copy'
input:
file dedupDesign
output:
file '*.{pdf,npz}' into experimentQCStats
file('version_*.txt') into experimentQCVersions
script:
"""
module load python/3.6.1-2-anaconda
module load deeptools/2.5.0.1
python3 $baseDir/scripts/experiment_qc.py -d $dedupDesign -e $extendReadsLen
"""
}
// Convert reads to bam
process convertReads {
queue '128GB,256GB,256GBv1'
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
input:
set sampleId, deduped, experimentId, biosample, factor, treatment, replicate, controlId from convertReads
output:
set sampleId, file('*.tagAlign.gz'), file('*.bed{pe,se}.gz'), experimentId, biosample, factor, treatment, replicate, controlId into tagReads
file('version_*.txt') into convertReadsVersions
script:
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load samtools/1.6
module load bedtools/2.26.0
python3 $baseDir/scripts/convert_reads.py -b $deduped -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
module load samtools/1.6
module load bedtools/2.26.0
python3 $baseDir/scripts/convert_reads.py -b $deduped
"""
}
}
// Calculate Cross-correlation using phantompeaktools
process crossReads {
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
input:
set sampleId, seTagAlign, tagAlign, experimentId, biosample, factor, treatment, replicate, controlId from tagReads
output:
set sampleId, seTagAlign, tagAlign, file('*.cc.qc'), experimentId, biosample, factor, treatment, replicate, controlId into xcorReads
set file('*.cc.qc'), file('*.cc.plot.pdf') into crossReadsStats
file('version_*.txt') into crossReadsVersions
script:
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load phantompeakqualtools/1.2
python3 $baseDir/scripts/xcor.py -t $seTagAlign -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
module load phantompeakqualtools/1.2
python3 $baseDir/scripts/xcor.py -t $seTagAlign
"""
}
}
// Define channel collecting tagAlign and xcor into design file
xcorDesign = xcorReads
.map{ sampleId, seTagAlign, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$seTagAlign\t$tagAlign\t$xcor\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_xcor.tsv', seed:"sample_id\tse_tag_align\ttag_align\txcor\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$outDir/design")
// Make Experiment design files to be read in for downstream analysis
process defineExpDesignFiles {
publishDir "$outDir/design", mode: 'copy'
input:
file xcorDesign
output:
file '*.tsv' into experimentObjs mode flatten
script:
"""
module load python/3.6.1-2-anaconda
python3 $baseDir/scripts/experiment_design.py -d $xcorDesign
"""
}
// Make Experiment design files to be read in for downstream analysis
process poolAndPsuedoReads {
tag "${experimentObjs.baseName}"
publishDir "$outDir/design", mode: 'copy'
input:
file experimentObjs
output:
file '*.tsv' into experimentPoolObjs
script:
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio
"""
}
}
// Collect list of experiment design files into a single channel
experimentRows = experimentPoolObjs
.splitCsv(sep:'\t', header:true)
.map { row -> [ row.sample_id, row.tag_align, row.xcor, row.experiment_id, row.biosample, row.factor, row.treatment, row.replicate, row.control_id, row.control_tag_align] }
// Call Peaks using MACS
process callPeaksMACS {
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${experimentId}/${replicate}", mode: 'copy'
input:
set sampleId, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId, controlTagAlign from experimentRows
output:
set sampleId, file('*.narrowPeak'), file('*.fc_signal.bw'), file('*.pvalue_signal.bw'), experimentId, biosample, factor, treatment, replicate, controlId into experimentPeaks
file '*.xls' into callPeaksMACSsummit
file('version_*.txt') into callPeaksMACSVersions
file("*.fc_signal.bw") into bigwigs
script:
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load macs/2.1.0-20151222
module load UCSC_userApps/v317
module load bedtools/2.26.0
module load phantompeakqualtools/1.2
python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
module load macs/2.1.0-20151222
module load UCSC_userApps/v317
module load bedtools/2.26.0
module load phantompeakqualtools/1.2
python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes
"""
}
}
// Define channel collecting peaks into design file
peaksDesign = experimentPeaks
.map{ sampleId, peak, fcSignal, pvalueSignal, experimentId, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$peak\t$fcSignal\t$pvalueSignal\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_peak.tsv', seed:"sample_id\tpeaks\tfc_signal\tpvalue_signal\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$outDir/design")
//plotProfile
process plotProfile {
publishDir "$outDir/experimentQC", mode: 'copy'
input:
file bigWigList from bigwigs.collect()
output:
file '*.{png,gz}' into plotProfile
when:
!skipPlotProfile
script:
"""
module load deeptools/2.5.0.1
bash $baseDir/scripts/plot_profile.sh -g $gtfFile
"""
}
// Calculate Consensus Peaks
process consensusPeaks {
publishDir "$outDir/${task.process}", mode: 'copy'
publishDir "$outDir/design", mode: 'copy', pattern: '*.{csv|tsv}'
input:
file peaksDesign
file preDiffDesign
output:
file '*.replicated.*' into consensusPeaks
file '*.rejected.*' into rejectedPeaks
file 'design_diffPeaks.csv' into designDiffPeaks
file 'design_annotatePeaks.tsv' into designAnnotatePeaks, designMotifSearch
file 'unique_experiments.csv' into uniqueExperiments
file('version_*.txt') into consensusPeaksVersions
script:
"""
module load python/3.6.1-2-anaconda
module load bedtools/2.26.0
python3 $baseDir/scripts/overlap_peaks.py -d $peaksDesign -f $preDiffDesign
"""
}
// Annotate Peaks
process peakAnnotation {
publishDir "$outDir/${task.process}", mode: 'copy'
input:
file designAnnotatePeaks
output:
file "*chipseeker*" into peakAnnotation
file('version_*.txt') into peakAnnotationVersions
script:
"""
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $gtfFile $geneNames
"""
}
// Motif Search Peaks
process motifSearch {
publishDir "$outDir/${task.process}", mode: 'copy'
input:
file designMotifSearch
output:
file "*memechip" into motifSearch
file "*narrowPeak" into filteredPeaks
file('version_*.txt') into motifSearchVersions
when:
!skipMotif
script:
"""
module load python/3.6.1-2-anaconda
module load meme/4.11.1-gcc-openmpi
module load bedtools/2.26.0
python3 $baseDir/scripts/motif_search.py -d $designMotifSearch -g $fasta -p $topPeakCount
"""
}
// Define channel to find number of unique experiments
uniqueExperimentsList = uniqueExperiments
.splitCsv(sep: '\t', header: true)
// Calculate Differential Binding Activity
process diffPeaks {
publishDir "$outDir/${task.process}", mode: 'copy'
input:
file designDiffPeaks
val noUniqueExperiments from uniqueExperimentsList.count()
output:
file '*_diffbind.bed' into diffPeaks
file '*_diffbind.csv' into diffPeaksCounts
file '*.pdf' into diffPeaksStats
file 'normcount_peaksets.txt' into normCountPeaks
file('version_*.txt') into diffPeaksVersions
when:
noUniqueExperiments > 1 && !skipDiff
script:
"""
module load python/3.6.1-2-anaconda
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/diff_peaks.R $designDiffPeaks
"""
}
// Generate Multiqc Report, gerernate Software Versions and references
process multiqcReport {
publishDir "$outDir/${task.process}", mode: 'copy'
input:
file ('trimReads_vf/*') from trimReadsVersions.first()
file ('alignReads_vf/*') from alignReadsVersions.first()
file ('filterReads_vf/*') from filterReadsVersions.first()
file ('convertReads_vf/*') from convertReadsVersions.first()
file ('crossReads_vf/*') from crossReadsVersions.first()
file ('callPeaksMACS_vf/*') from callPeaksMACSVersions.first()
file ('consensusPeaks_vf/*') from consensusPeaksVersions.first()
file ('peakAnnotation_vf/*') from peakAnnotationVersions.first()
file ('motifSearch_vf/*') from motifSearchVersions.first().ifEmpty()
file ('diffPeaks_vf/*') from diffPeaksVersions.first().ifEmpty()
file ('experimentQC_vf/*') from experimentQCVersions.first()
file ('trimReads/*') from trimgaloreResults.collect()
file ('alignReads/*') from mappedReadsStats.collect()
file ('filterReads/*') from dedupReadsComplexity.collect()
file ('crossReads/*') from crossReadsStats.collect()
output:
file('software_versions_mqc.yaml') into softwareVersions
file('software_references_mqc.yaml') into softwareReferences
file "multiqc_report.html" into multiqcReport
file "*_data" into multiqcData
script:
"""
module load python/3.6.1-2-anaconda
module load pandoc/2.7
module load singularity/3.0.2
echo $workflow.nextflow.version > version_nextflow.txt
singularity exec /project/shared/bicf_workflow_ref/singularity_images/bicf-multiqc-2.0.0.img multiqc --version > version_multiqc.txt
python --version &> version_python.txt
python3 $baseDir/scripts/generate_references.py -r $references -o software_references
python3 $baseDir/scripts/generate_versions.py -o software_versions
singularity exec /project/shared/bicf_workflow_ref/singularity_images/bicf-multiqc-2.0.0.img multiqc -c $multiqc .
"""
}
profiles {
standard {
includeConfig 'conf/biohpc.config'
}
}
trace {
enabled = true
file = 'pipeline_trace.txt'
fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
}
timeline {
enabled = true
file = 'timeline.html'
}
report {
enabled = true
file = 'report.html'
}
manifest {
name = 'chipseq_analysis'
description = 'BICF ChIP-seq Analysis Workflow.'
homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis'
version = '1.1.2'
mainScript = 'main.nf'
nextflowVersion = '>=0.31.0'
}
#!/bin/Rscript
#*
#* --------------------------------------------------------------------------
#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/LICENSE.md)
#* --------------------------------------------------------------------------
#*
#Currently Human or Mouse
# Load libraries
library("ChIPseeker")
library(GenomicFeatures)
# Create parser object
args <- commandArgs(trailingOnly=TRUE)
# Check input args
if (length(args) != 3) {
stop("Usage: annotate_peaks.R annotate_design.tsv gtf geneNames", call.=FALSE)
}
design_file <- args[1]
gtf <- args[2]
geneNames <- args[3]
# Load UCSC Known Genes
txdb <- makeTxDbFromGFF(gtf)
sym <- read.table(geneNames, header=T, sep='\t') [,4:5]
# Output version of ChIPseeker
chipseeker_version = packageVersion('ChIPseeker')
write.table(paste("Version", chipseeker_version), file = "version_ChIPseeker.txt", sep = "\t",
row.names = FALSE, col.names = FALSE)
# Load design file
design <- read.csv(design_file, sep ='\t')
files <- as.list(as.character(design$Peaks))
names(files) <- design$Condition
# Granges of files
peaks <- lapply(files, readPeakFile, as = "GRanges", header = FALSE)
peakAnnoList <- lapply(peaks, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE)
column_names <- c("geneId","chr", "start", "end", "width", "strand_1", "name", "score", "strand", "signalValue",
"pValue", "qValue", "peak", "annotation", "geneChr", "geneStart", "geneEnd",
"geneLength" ,"geneStrand", "transcriptId", "distanceToTSS", "symbol")
for(index in c(1:length(peakAnnoList))) {
filename <- paste(names(peaks)[index], ".chipseeker_annotation.tsv", sep="")
df <- as.data.frame(peakAnnoList[[index]])
df$geneId <- sapply(strsplit(as.character(df$geneId), split = "\\."), "[[", 1)
df_final <- merge(df, sym, by.x="geneId", by.y="ensembl", all.x=T)
colnames(df_final) <- column_names
write.table(df_final[ , !(names(df_final) %in% c('strand_1'))], filename, sep="\t" ,quote=F, row.names=F)
# Draw individual plots
# Define names of Plots
pie_name <- paste(names(files)[index],".chipseeker_pie.pdf",sep="")
upsetplot_name <- paste(names(files)[index],".chipseeker_upsetplot.pdf",sep="")
# Pie Plots
pdf(pie_name)
plotAnnoPie(peakAnnoList[[index]])
dev.off()
# Upset Plot
pdf(upsetplot_name, onefile=F)
upsetplot(peakAnnoList[[index]])
dev.off()
}