Skip to content
Snippets Groups Projects
Commit bb5a6702 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Add in old main.nf to fix organism in chipseeker.

parent 6a418e0f
Branches
Tags
No related merge requests found
#!/usr/bin/env nextflow
// Path to an input file, or a pattern for multiple inputs
// Note - $baseDir is the location of this workflow file main.nf
// Define Input variables
params.reads = "$baseDir/../test_data/*.fastq.gz"
params.pairedEnd = false
params.designFile = "$baseDir/../test_data/design_ENCSR238SGC_SE.txt"
params.genome = 'GRCm38'
params.genomes = []
params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false
params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?: false : false
params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
params.cutoffRatio = 1.2
// Check inputs
if( params.bwaIndex ){
bwaIndex = Channel
.fromPath(params.bwaIndex)
.ifEmpty { exit 1, "BWA index not found: ${params.bwaIndex}" }
} else {
exit 1, "No reference genome specified."
}
// Define List of Files
readsList = Channel
.fromPath( params.reads )
.flatten()
.map { file -> [ file.getFileName().toString(), file.toString() ].join("\t")}
.collectFile( name: 'fileList.tsv', newLine: true )
// Define regular variables
pairedEnd = params.pairedEnd
designFile = params.designFile
genomeSize = params.genomeSize
chromSizes = params.chromSizes
cutoffRatio = params.cutoffRatio
process checkDesignFile {
publishDir "$baseDir/output/design", mode: 'copy'
input:
designFile
file readsList
output:
file("design.tsv") into designFilePaths
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/check_design.py -d $designFile -f $readsList -p
"""
}
else {
"""
python $baseDir/scripts/check_design.py -d $designFile -f $readsList
"""
}
}
// Define channel for raw reads
if (pairedEnd) {
rawReads = designFilePaths
.splitCsv(sep: '\t', header: true)
.map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read2], row.experiment_id, row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] }
} else {
rawReads = designFilePaths
.splitCsv(sep: '\t', header: true)
.map { row -> [ row.sample_id, [row.fastq_read1], row.experiment_id, row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] }
}
// Trim raw reads using trimgalore
process trimReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from rawReads
output:
set sampleId, file('*.fq.gz'), experimentId, biosample, factor, treatment, replicate, controlId into trimmedReads
file('*trimming_report.txt') into trimgalore_results
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -p
"""
}
else {
"""
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]}
"""
}
params.design="$baseDir/../test_data/samplesheet.csv"
params.bams = "$baseDir/../test_data/*.bam"
params.peaks = "$baseDir/../test_data/*.broadPeak"
params.genomepath="/project/shared/bicf_workflow_ref/GRCh37"
toppeakcount = -1
design_file = file(params.design)
deeptools_design = Channel.fromPath(params.design)
diffbind_design = Channel.fromPath(params.design)
chipseeker_design = Channel.fromPath(params.design)
meme_design = Channel.fromPath(params.design)
index_bams = Channel.fromPath(params.bams)
deeptools_bams = Channel.fromPath(params.bams)
deeptools_peaks = Channel.fromPath(params.peaks)
chipseeker_peaks = Channel.fromPath(params.peaks)
diffbind_bams = Channel.fromPath(params.bams)
diffbind_peaks = Channel.fromPath(params.peaks)
meme_peaks = Channel.fromPath(params.peaks)
process bamindex {
publishDir "$baseDir/output/", mode: 'copy'
input:
file index_bam_files from index_bams
output:
file "*bai" into deeptools_bamindex
file "*bai" into diffbind_bamindex
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
module load samtools/intel/1.3
samtools index $index_bam_files
"""
}
// Align trimmed reads using bwa
process alignReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from trimmedReads
file index from bwaIndex.first()
output:
set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into mappedReads
file '*.srt.bam.flagstat.qc' into mappedReadsStats
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -p
"""
}
else {
"""
python3 $baseDir/scripts/map_reads.py -f $reads -r ${index}/genome.fa
"""
}
}
// Dedup reads using sambamba
process filterReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, mapped, experimentId, biosample, factor, treatment, replicate, controlId from mappedReads
output:
set sampleId, file('*.bam'), file('*.bai'), experimentId, biosample, factor, treatment, replicate, controlId into dedupReads
set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into convertReads
file '*flagstat.qc' into dedupReadsStats
file '*pbc.qc' into dedupReadsComplexity
file '*dup.qc' into dupReads
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/map_qc.py -b $mapped -p
"""
}
else {
"""
python3 $baseDir/scripts/map_qc.py -b $mapped
"""
}
}
// Define channel collecting dedup reads into new design file
dedupReads
.map{ sampleId, bam, bai, experimentId, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$bam\t$bai\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_dedup.tsv', seed:"sample_id\tbam_reads\tbam_index\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$baseDir/output/design")
.into { dedupDesign; preDiffDesign }
// Quality Metrics using deeptools
process experimentQC {
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
file dedupDesign
output:
file '*.{png,npz}' into deepToolsStats
script:
"""
python3 $baseDir/scripts/experiment_qc.py -d $dedupDesign
"""
process run_deeptools {
publishDir "$baseDir/output", mode: 'copy'
input:
file deeptools_design_file from deeptools_design
file deeptools_bam_files from deeptools_bams.toList()
file deeptools_peak_files from deeptools_peaks.toList()
file deeptools_bam_indexes from deeptools_bamindex.toList()
output:
file "*deeptools*" into deeptools_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
module load deeptools/2.3.5
python $baseDir/scripts/runDeepTools.py -i ${params.design} -g ${params.genomepath}}
"""
}
// Convert reads to bam
process convertReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, deduped, experimentId, biosample, factor, treatment, replicate, controlId from convertReads
output:
set sampleId, file('*.tagAlign.gz'), file('*.bed{pe,se}.gz'), experimentId, biosample, factor, treatment, replicate, controlId into tagReads
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/convert_reads.py -b $deduped -p
"""
}
else {
"""
python3 $baseDir/scripts/convert_reads.py -b $deduped
"""
}
process run_diffbind {
publishDir "$baseDir/output", mode: 'copy'
input:
file diffbind_design_file from diffbind_design
file diffbind_bam_files from diffbind_bams.toList()
file diffbind_peak_files from diffbind_peaks.toList()
file diffbind_bam_indexes from diffbind_bamindex.toList()
output:
file "diffpeak.design" into diffpeaksdesign_chipseeker
file "diffpeak.design" into diffpeaksdesign_meme
file "*_diffbind.bed" into diffpeaks_meme
file "*_diffbind.bed" into diffpeaks_chipseeker
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/runDiffBind.R $diffbind_design_file
"""
}
// Calculate Cross-correlation using phantompeaktools
process crossReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, seTagAlign, tagAlign, experimentId, biosample, factor, treatment, replicate, controlId from tagReads
output:
set sampleId, seTagAlign, tagAlign, file('*.cc.qc'), experimentId, biosample, factor, treatment, replicate, controlId into xcorReads
set file('*.cc.qc'), file('*.cc.plot.pdf') into xcorReadsStats
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/xcor.py -t $seTagAlign -p
"""
}
else {
"""
python3 $baseDir/scripts/xcor.py -t $seTagAlign
"""
}
process run_chipseeker_diffpeak {
publishDir "$baseDir/output", mode: 'copy'
input:
file diffpeak_design_file from diffpeaksdesign_chipseeker
file diffpeaks from diffpeaks_chipseeker
output:
file "*chipseeker*" into chipseeker_diffpeak_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/runChipseeker.R $diffpeak_design_file ${params.genomepath}
"""
}
// Define channel collecting tagAlign and xcor into design file
xcorDesign = xcorReads
.map{ sampleId, seTagAlign, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$seTagAlign\t$tagAlign\t$xcor\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_xcor.tsv', seed:"sample_id\tse_tag_align\ttag_align\txcor\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$baseDir/output/design")
// Make Experiment design files to be read in for downstream analysis
process defineExpDesignFiles {
publishDir "$baseDir/output/design", mode: 'copy'
input:
file xcorDesign
output:
file '*.tsv' into experimentObjs mode flatten
script:
"""
python3 $baseDir/scripts/experiment_design.py -d $xcorDesign
"""
process run_chipseeker_originalpeak {
publishDir "$baseDir/output", mode: 'copy'
input:
file design_file from chipseeker_design
file chipseeker_peak_files from chipseeker_peaks.toList()
output:
file "*chipseeker*" into chipseeker_originalpeak_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/runChipseeker.R $design_file ${params.genomepath}
"""
}
// Make Experiment design files to be read in for downstream analysis
process poolAndPsuedoReads {
tag "${experimentObjs.baseName}"
publishDir "$baseDir/output/design", mode: 'copy'
input:
file experimentObjs
output:
file '*.tsv' into experimentPoolObjs
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio -p
"""
}
else {
"""
python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio
"""
}
process run_meme_original {
publishDir "$baseDir/output", mode: 'copy'
input:
file design_meme from meme_design
file meme_peak_files from meme_peaks.toList()
output:
file "*meme*" into meme_original_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
module add deeptools/2.3.5
module load meme/4.11.1-gcc-openmpi
python $baseDir/scripts/runMemechip.py -i $design_meme -g ${params.genomepath} -l ${toppeakcount}
"""
}
// Collect list of experiment design files into a single channel
experimentRows = experimentPoolObjs
.splitCsv(sep:'\t', header:true)
.map { row -> [ row.sample_id, row.tag_align, row.xcor, row.experiment_id, row.biosample, row.factor, row.treatment, row.replicate, row.control_id, row.control_tag_align] }
// Call Peaks using MACS
process callPeaksMACS {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId, controlTagAlign from experimentRows
output:
set sampleId, file('*.narrowPeak'), file('*.fc_signal.bw'), file('*.pvalue_signal.bw'), experimentId, biosample, factor, treatment, replicate, controlId into experimentPeaks
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes -p
"""
}
else {
"""
python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes
"""
}
process run_meme_diffpeak {
publishDir "$baseDir/output", mode: 'copy'
input:
file peaks_meme from diffpeaks_meme
file diffpeak_design from diffpeaksdesign_meme
output:
file "*meme*" into meme_diffpeak_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
module add deeptools/2.3.5
module load meme/4.11.1-gcc-openmpi
python $baseDir/scripts/runMemechip.py -i $diffpeak_design -g ${params.genomepath} -l ${toppeakcount}
"""
}
// Define channel collecting peaks into design file
peaksDesign = experimentPeaks
.map{ sampleId, peak, fcSignal, pvalueSignal, experimentId, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$peak\t$fcSignal\t$pvalueSignal\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_peak.tsv', seed:"sample_id\tpeaks\tfc_signal\tpvalue_signal\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$baseDir/output/design")
// Calculate Consensus Peaks
process consensusPeaks {
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
file peaksDesign
file preDiffDesign
output:
file '*.replicated.*' into consensusPeaks
file '*.rejected.*' into rejectedPeaks
file("design_diffPeaks.tsv") into designDiffPeaks
script:
"""
python3 $baseDir/scripts/overlap_peaks.py -d $peaksDesign -f $preDiffDesign
"""
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment