Skip to content
Snippets Groups Projects
Commit acebeb68 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Revert main

parent 2d887a76
No related merge requests found
#!/usr/bin/env nextflow
params.design="$baseDir/../test_data/samplesheet.csv"
params.bams = "$baseDir/../test_data/*.bam"
params.peaks = "$baseDir/../test_data/*.broadPeak"
params.genomepath="/project/shared/bicf_workflow_ref/GRCh37"
toppeakcount = -1
design_file = file(params.design)
deeptools_design = Channel.fromPath(params.design)
diffbind_design = Channel.fromPath(params.design)
chipseeker_design = Channel.fromPath(params.design)
meme_design = Channel.fromPath(params.design)
index_bams = Channel.fromPath(params.bams)
deeptools_bams = Channel.fromPath(params.bams)
deeptools_peaks = Channel.fromPath(params.peaks)
chipseeker_peaks = Channel.fromPath(params.peaks)
diffbind_bams = Channel.fromPath(params.bams)
diffbind_peaks = Channel.fromPath(params.peaks)
meme_peaks = Channel.fromPath(params.peaks)
process bamindex {
publishDir "$baseDir/output/", mode: 'copy'
input:
file index_bam_files from index_bams
output:
file "*bai" into deeptools_bamindex
file "*bai" into diffbind_bamindex
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
module load samtools/intel/1.3
samtools index $index_bam_files
"""
}
process run_deeptools {
publishDir "$baseDir/output", mode: 'copy'
input:
file deeptools_design_file from deeptools_design
file deeptools_bam_files from deeptools_bams.toList()
file deeptools_peak_files from deeptools_peaks.toList()
file deeptools_bam_indexes from deeptools_bamindex.toList()
output:
file "*deeptools*" into deeptools_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
module load deeptools/2.3.5
python $baseDir/scripts/runDeepTools.py -i ${params.design} -g ${params.genomepath}}
"""
}
process run_diffbind {
publishDir "$baseDir/output", mode: 'copy'
input:
file diffbind_design_file from diffbind_design
file diffbind_bam_files from diffbind_bams.toList()
file diffbind_peak_files from diffbind_peaks.toList()
file diffbind_bam_indexes from diffbind_bamindex.toList()
output:
file "diffpeak.design" into diffpeaksdesign_chipseeker
file "diffpeak.design" into diffpeaksdesign_meme
file "*_diffbind.bed" into diffpeaks_meme
file "*_diffbind.bed" into diffpeaks_chipseeker
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/runDiffBind.R $diffbind_design_file
"""
}
process run_chipseeker_diffpeak {
publishDir "$baseDir/output", mode: 'copy'
input:
file diffpeak_design_file from diffpeaksdesign_chipseeker
file diffpeaks from diffpeaks_chipseeker
output:
file "*chipseeker*" into chipseeker_diffpeak_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/runChipseeker.R $diffpeak_design_file ${params.genomepath}
"""
}
process run_chipseeker_originalpeak {
publishDir "$baseDir/output", mode: 'copy'
input:
file design_file from chipseeker_design
file chipseeker_peak_files from chipseeker_peaks.toList()
output:
file "*chipseeker*" into chipseeker_originalpeak_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/runChipseeker.R $design_file ${params.genomepath}
"""
}
process run_meme_original {
publishDir "$baseDir/output", mode: 'copy'
input:
file design_meme from meme_design
file meme_peak_files from meme_peaks.toList()
output:
file "*meme*" into meme_original_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
module add deeptools/2.3.5
module load meme/4.11.1-gcc-openmpi
python $baseDir/scripts/runMemechip.py -i $design_meme -g ${params.genomepath} -l ${toppeakcount}
"""
}
process run_meme_diffpeak {
publishDir "$baseDir/output", mode: 'copy'
input:
file peaks_meme from diffpeaks_meme
file diffpeak_design from diffpeaksdesign_meme
output:
file "*meme*" into meme_diffpeak_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
module add deeptools/2.3.5
module load meme/4.11.1-gcc-openmpi
python $baseDir/scripts/runMemechip.py -i $diffpeak_design -g ${params.genomepath} -l ${toppeakcount}
"""
// Path to an input file, or a pattern for multiple inputs
// Note - $baseDir is the location of this workflow file main.nf
// Define Input variables
params.reads = "$baseDir/../test_data/*.fastq.gz"
params.pairedEnd = false
params.designFile = "$baseDir/../test_data/design_ENCSR238SGC_SE.txt"
params.genome = 'GRCm38'
params.genomes = []
params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false
params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?: false : false
params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
params.cutoffRatio = 1.2
// Check inputs
if( params.bwaIndex ){
bwaIndex = Channel
.fromPath(params.bwaIndex)
.ifEmpty { exit 1, "BWA index not found: ${params.bwaIndex}" }
} else {
exit 1, "No reference genome specified."
}
// Define List of Files
readsList = Channel
.fromPath( params.reads )
.flatten()
.map { file -> [ file.getFileName().toString(), file.toString() ].join("\t")}
.collectFile( name: 'fileList.tsv', newLine: true )
// Define regular variables
pairedEnd = params.pairedEnd
designFile = params.designFile
genomeSize = params.genomeSize
genome = params.genome
chromSizes = params.chromSizes
fasta = params.fasta
cutoffRatio = params.cutoffRatio
process checkDesignFile {
publishDir "$baseDir/output/design", mode: 'copy'
input:
designFile
file readsList
output:
file("design.tsv") into designFilePaths
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/check_design.py -d $designFile -f $readsList -p
"""
}
else {
"""
python $baseDir/scripts/check_design.py -d $designFile -f $readsList
"""
}
}
// Define channel for raw reads
if (pairedEnd) {
rawReads = designFilePaths
.splitCsv(sep: '\t', header: true)
.map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read2], row.experiment_id, row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] }
} else {
rawReads = designFilePaths
.splitCsv(sep: '\t', header: true)
.map { row -> [ row.sample_id, [row.fastq_read1], row.experiment_id, row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] }
}
// Trim raw reads using trimgalore
process trimReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from rawReads
output:
set sampleId, file('*.fq.gz'), experimentId, biosample, factor, treatment, replicate, controlId into trimmedReads
file('*trimming_report.txt') into trimgalore_results
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -p
"""
}
else {
"""
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]}
"""
}
}
// Align trimmed reads using bwa
process alignReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from trimmedReads
file index from bwaIndex.first()
output:
set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into mappedReads
file '*.srt.bam.flagstat.qc' into mappedReadsStats
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -p
"""
}
else {
"""
python3 $baseDir/scripts/map_reads.py -f $reads -r ${index}/genome.fa
"""
}
}
// Dedup reads using sambamba
process filterReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, mapped, experimentId, biosample, factor, treatment, replicate, controlId from mappedReads
output:
set sampleId, file('*.bam'), file('*.bai'), experimentId, biosample, factor, treatment, replicate, controlId into dedupReads
set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into convertReads
file '*flagstat.qc' into dedupReadsStats
file '*pbc.qc' into dedupReadsComplexity
file '*dup.qc' into dupReads
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/map_qc.py -b $mapped -p
"""
}
else {
"""
python3 $baseDir/scripts/map_qc.py -b $mapped
"""
}
}
// Define channel collecting dedup reads into new design file
dedupReads
.map{ sampleId, bam, bai, experimentId, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$bam\t$bai\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_dedup.tsv', seed:"sample_id\tbam_reads\tbam_index\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$baseDir/output/design")
.into { dedupDesign; preDiffDesign }
// Quality Metrics using deeptools
process experimentQC {
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
file dedupDesign
output:
file '*.{png,npz}' into deepToolsStats
script:
"""
python3 $baseDir/scripts/experiment_qc.py -d $dedupDesign
"""
}
// Convert reads to bam
process convertReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, deduped, experimentId, biosample, factor, treatment, replicate, controlId from convertReads
output:
set sampleId, file('*.tagAlign.gz'), file('*.bed{pe,se}.gz'), experimentId, biosample, factor, treatment, replicate, controlId into tagReads
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/convert_reads.py -b $deduped -p
"""
}
else {
"""
python3 $baseDir/scripts/convert_reads.py -b $deduped
"""
}
}
// Calculate Cross-correlation using phantompeaktools
process crossReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, seTagAlign, tagAlign, experimentId, biosample, factor, treatment, replicate, controlId from tagReads
output:
set sampleId, seTagAlign, tagAlign, file('*.cc.qc'), experimentId, biosample, factor, treatment, replicate, controlId into xcorReads
set file('*.cc.qc'), file('*.cc.plot.pdf') into xcorReadsStats
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/xcor.py -t $seTagAlign -p
"""
}
else {
"""
python3 $baseDir/scripts/xcor.py -t $seTagAlign
"""
}
}
// Define channel collecting tagAlign and xcor into design file
xcorDesign = xcorReads
.map{ sampleId, seTagAlign, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$seTagAlign\t$tagAlign\t$xcor\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_xcor.tsv', seed:"sample_id\tse_tag_align\ttag_align\txcor\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$baseDir/output/design")
// Make Experiment design files to be read in for downstream analysis
process defineExpDesignFiles {
publishDir "$baseDir/output/design", mode: 'copy'
input:
file xcorDesign
output:
file '*.tsv' into experimentObjs mode flatten
script:
"""
python3 $baseDir/scripts/experiment_design.py -d $xcorDesign
"""
}
// Make Experiment design files to be read in for downstream analysis
process poolAndPsuedoReads {
tag "${experimentObjs.baseName}"
publishDir "$baseDir/output/design", mode: 'copy'
input:
file experimentObjs
output:
file '*.tsv' into experimentPoolObjs
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio -p
"""
}
else {
"""
python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio
"""
}
}
// Collect list of experiment design files into a single channel
experimentRows = experimentPoolObjs
.splitCsv(sep:'\t', header:true)
.map { row -> [ row.sample_id, row.tag_align, row.xcor, row.experiment_id, row.biosample, row.factor, row.treatment, row.replicate, row.control_id, row.control_tag_align] }
// Call Peaks using MACS
process callPeaksMACS {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId, controlTagAlign from experimentRows
output:
set sampleId, file('*.narrowPeak'), file('*.fc_signal.bw'), file('*.pvalue_signal.bw'), experimentId, biosample, factor, treatment, replicate, controlId into experimentPeaks
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes -p
"""
}
else {
"""
python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes
"""
}
}
// Define channel collecting peaks into design file
peaksDesign = experimentPeaks
.map{ sampleId, peak, fcSignal, pvalueSignal, experimentId, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$peak\t$fcSignal\t$pvalueSignal\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_peak.tsv', seed:"sample_id\tpeaks\tfc_signal\tpvalue_signal\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$baseDir/output/design")
// Calculate Consensus Peaks
process consensusPeaks {
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
file peaksDesign
file preDiffDesign
output:
file '*.replicated.*' into consensusPeaks
file '*.rejected.*' into rejectedPeaks
file 'design_diffPeaks.csv' into designDiffPeaks
file 'design_annotatePeaks.tsv' into designAnnotatePeaks, designMotifSearch
file 'unique_experiments.csv' into uniqueExperiments
script:
"""
python3 $baseDir/scripts/overlap_peaks.py -d $peaksDesign -f $preDiffDesign
"""
}
// Define channel to find number of unique experiments
uniqueExperiments.splitCsv(sep: '\t', header: true).toList().map{ it.size().toInteger() }.set { noUniqueExperiments }
// Annotate Peaks
process peakAnnotation {
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
file designAnnotatePeaks
output:
file "*chipseeker*" into peakAnnotation
script:
"""
Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $genome
"""
}
// Calculate Differential Binding Activity
process diffPeaks {
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
file designDiffPeaks
output:
file 'design_diffpeak_annotatePeaks.tsv' into diffPeaksDesignAnnotatePeaks, diffPeaksDesignMeme
file '*_diffbind.bed' into diffPeaks
file '*_diffbind.csv' into diffPeaksCounts
file '*.pdf' into diffPeaksStats
file 'normcount_peaksets.txt' into normCountPeaks
script:
if (noUniqueExperiments <= 1) {
"""
touch design_diffpeak_annotatePeaks.tsv
touch no_diffbind.bed
touch no_diffbind.csv
touch heatmap.pdf
touch pca.pdf
touch normcount_peaksets.txt
"""
}
else {
"""
Rscript $baseDir/scripts/dba.R $designDiffPeaks
"""
}
}
// Motif Search Peaks
process motifSearch {
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
file designMotifSearch
output:
file "*meme*" into motifSearch
script:
"""
python2.7 $baseDir/scripts/motif_search.py -i $designMotifSearch -g $fasta
"""
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment