Newer
Older
// Path to an input file, or a pattern for multiple inputs
// Note - $baseDir is the location of this workflow file main.nf
// Define Input variables
params.reads = "$baseDir/../test_data/*.fastq.gz"
params.pairedEnd = 'false'
params.designFile = "$baseDir/../test_data/design_ENCSR238SGC_SE.txt"
params.skipDiff = false
params.skipMotif = false
params.references = "$baseDir/../docs/references.md"
// Assign variables if astrocyte
if (params.astrocyte) {
print("Running under astrocyte")
referenceLocation = "/project/shared/bicf_workflow_ref"
params.bwaIndex = "$referenceLocation/$genome"
params.chromSizes = "$referenceLocation/$genome/genomefile.txt"
params.fasta = "$referenceLocation/$genome/genome.fa.txt"
if (params.genome == 'GRCh37' || params.genome == 'GRCh38') {
} else if (params.chromSizes == 'GRCm38') {
} else {
params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false
params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?: false : false
params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
// Check inputs
if( params.bwaIndex ){
bwaIndex = Channel
.fromPath(params.bwaIndex)
.ifEmpty { exit 1, "BWA index not found: ${params.bwaIndex}" }
} else {
exit 1, "No reference genome specified."
}
// Define List of Files
readsList = Channel
.fromPath( params.reads )
.flatten()
.map { file -> [ file.getFileName().toString(), file.toString() ].join("\t")}
.collectFile( name: 'fileList.tsv', newLine: true )
// Define regular variables
designFile = params.designFile
genomeSize = params.genomeSize
genome = params.genome
extendReadsLen = params.extendReadsLen
skipDiff = params.skipDiff
skipMotif = params.skipMotif
if (params.pairedEnd == 'false'){
pairedEnd = false
} else {
pairedEnd = true
}
// Check design file for errors
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
input:
designFile
file readsList
output:
file("design.tsv") into designFilePaths
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/check_design.py -d $designFile -f $readsList -p
"""
}
else {
"""
python $baseDir/scripts/check_design.py -d $designFile -f $readsList
"""
}
}
// Define channel for raw reads
if (pairedEnd) {
rawReads = designFilePaths
.splitCsv(sep: '\t', header: true)
.map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read2], row.experiment_id, row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] }
} else {
rawReads = designFilePaths
.splitCsv(sep: '\t', header: true)
.map { row -> [ row.sample_id, [row.fastq_read1], row.experiment_id, row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] }
}
// Trim raw reads using trimgalore
process trimReads {
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
input:
set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from rawReads
output:
set sampleId, file('*.fq.gz'), experimentId, biosample, factor, treatment, replicate, controlId into trimmedReads
file('*trimming_report.txt') into trimgaloreResults
file('version_*.txt') into trimReadsVersions
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -s $sampleId -p
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} -s $sampleId
"""
}
}
// Align trimmed reads using bwa
process alignReads {
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
input:
set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from trimmedReads
file index from bwaIndex.first()
output:
set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into mappedReads
file '*.flagstat.qc' into mappedReadsStats
file('version_*.txt') into alignReadsVersions
python3 $baseDir/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -s $sampleId -p
python3 $baseDir/scripts/map_reads.py -f $reads -r ${index}/genome.fa -s $sampleId
"""
}
}
// Dedup reads using sambamba
process filterReads {
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
input:
set sampleId, mapped, experimentId, biosample, factor, treatment, replicate, controlId from mappedReads
output:
set sampleId, file('*.bam'), file('*.bai'), experimentId, biosample, factor, treatment, replicate, controlId into dedupReads
set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into convertReads
file '*.flagstat.qc' into dedupReadsStats
file '*.pbc.qc' into dedupReadsComplexity
file '*.dedup.qc' into dupReads
file('version_*.txt') into filterReadsVersions
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/map_qc.py -b $mapped -p
"""
}
else {
"""
python3 $baseDir/scripts/map_qc.py -b $mapped
"""
}
}
// Define channel collecting dedup reads into new design file
dedupReads
.map{ sampleId, bam, bai, experimentId, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$bam\t$bai\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_dedup.tsv', seed:"sample_id\tbam_reads\tbam_index\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$outDir/design")
.into { dedupDesign; preDiffDesign }
// Quality Metrics using deeptools
process experimentQC {
publishDir "$outDir/${task.process}", mode: 'copy'
file('version_*.txt') into experimentQCVersions
python3 $baseDir/scripts/experiment_qc.py -d $dedupDesign -e $extendReadsLen
// Convert reads to bam
process convertReads {
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
input:
set sampleId, deduped, experimentId, biosample, factor, treatment, replicate, controlId from convertReads
output:
set sampleId, file('*.tagAlign.gz'), file('*.bed{pe,se}.gz'), experimentId, biosample, factor, treatment, replicate, controlId into tagReads
file('version_*.txt') into convertReadsVersions
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/convert_reads.py -b $deduped -p
"""
}
else {
"""
python3 $baseDir/scripts/convert_reads.py -b $deduped
"""
}
}
// Calculate Cross-correlation using phantompeaktools
process crossReads {
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
input:
set sampleId, seTagAlign, tagAlign, experimentId, biosample, factor, treatment, replicate, controlId from tagReads
output:
set sampleId, seTagAlign, tagAlign, file('*.cc.qc'), experimentId, biosample, factor, treatment, replicate, controlId into xcorReads
set file('*.cc.qc'), file('*.cc.plot.pdf') into crossReadsStats
file('version_*.txt') into crossReadsVersions
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/xcor.py -t $seTagAlign -p
"""
}
else {
"""
python3 $baseDir/scripts/xcor.py -t $seTagAlign
"""
}
}
// Define channel collecting tagAlign and xcor into design file
xcorDesign = xcorReads
.map{ sampleId, seTagAlign, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$seTagAlign\t$tagAlign\t$xcor\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_xcor.tsv', seed:"sample_id\tse_tag_align\ttag_align\txcor\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$outDir/design")
// Make Experiment design files to be read in for downstream analysis
process defineExpDesignFiles {
input:
file xcorDesign
output:
file '*.tsv' into experimentObjs mode flatten
script:
"""
python3 $baseDir/scripts/experiment_design.py -d $xcorDesign
"""
}
// Make Experiment design files to be read in for downstream analysis
process poolAndPsuedoReads {
tag "${experimentObjs.baseName}"
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
input:
file experimentObjs
output:
file '*.tsv' into experimentPoolObjs
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio -p
"""
}
else {
"""
python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio
"""
}
}
// Collect list of experiment design files into a single channel
experimentRows = experimentPoolObjs
.splitCsv(sep:'\t', header:true)
.map { row -> [ row.sample_id, row.tag_align, row.xcor, row.experiment_id, row.biosample, row.factor, row.treatment, row.replicate, row.control_id, row.control_tag_align] }
// Call Peaks using MACS
process callPeaksMACS {
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${experimentId}/${replicate}", mode: 'copy'
input:
set sampleId, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId, controlTagAlign from experimentRows
output:
set sampleId, file('*.narrowPeak'), file('*.fc_signal.bw'), file('*.pvalue_signal.bw'), experimentId, biosample, factor, treatment, replicate, controlId into experimentPeaks
file('version_*.txt') into callPeaksMACSVersions
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes -p
"""
}
else {
"""
python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes
"""
}
}
// Define channel collecting peaks into design file
peaksDesign = experimentPeaks
.map{ sampleId, peak, fcSignal, pvalueSignal, experimentId, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$peak\t$fcSignal\t$pvalueSignal\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_peak.tsv', seed:"sample_id\tpeaks\tfc_signal\tpvalue_signal\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$outDir/design")
// Calculate Consensus Peaks
process consensusPeaks {
publishDir "$outDir/${task.process}", mode: 'copy'
publishDir "$outDir/design", mode: 'copy', pattern: '*.{csv|tsv}'
input:
file peaksDesign
file preDiffDesign
output:
file '*.replicated.*' into consensusPeaks
file '*.rejected.*' into rejectedPeaks
file 'design_diffPeaks.csv' into designDiffPeaks
file 'design_annotatePeaks.tsv' into designAnnotatePeaks, designMotifSearch
file 'unique_experiments.csv' into uniqueExperiments
file('version_*.txt') into consensusPeaksVersions
script:
"""
python3 $baseDir/scripts/overlap_peaks.py -d $peaksDesign -f $preDiffDesign
"""
}
// Annotate Peaks
process peakAnnotation {
publishDir "$outDir/${task.process}", mode: 'copy'
input:
file designAnnotatePeaks
output:
file "*chipseeker*" into peakAnnotation
file('version_*.txt') into peakAnnotationVersions
script:
"""
Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $genome
"""
}
process motifSearch {
publishDir "$outDir/${task.process}", mode: 'copy'
file designMotifSearch
file "*memechip" into motifSearch
file "*narrowPeak" into filteredPeaks
file('version_*.txt') into motifSearchVersions
"""
python3 $baseDir/scripts/motif_search.py -d $designMotifSearch -g $fasta -p $topPeakCount
"""
// Define channel to find number of unique experiments
uniqueExperimentsList = uniqueExperiments
.splitCsv(sep: '\t', header: true)
// Calculate Differential Binding Activity
process diffPeaks {
publishDir "$outDir/${task.process}", mode: 'copy'
file designDiffPeaks
val noUniqueExperiments from uniqueExperimentsList.count()
file '*_diffbind.bed' into diffPeaks
file '*_diffbind.csv' into diffPeaksCounts
file '*.pdf' into diffPeaksStats
file 'normcount_peaksets.txt' into normCountPeaks
file('version_*.txt') into diffPeaksVersions
noUniqueExperiments > 1 && !skipDiff
Rscript $baseDir/scripts/diff_peaks.R $designDiffPeaks
// Collect Software Versions and references
file ('trimReads_vf/*') from trimReadsVersions.first()
file ('alignReads_vf/*') from alignReadsVersions.first()
file ('filterReads_vf/*') from filterReadsVersions.first()
file ('convertReads_vf/*') from convertReadsVersions.first()
file ('crossReads_vf/*') from crossReadsVersions.first()
file ('callPeaksMACS_vf/*') from callPeaksMACSVersions.first()
file ('consensusPeaks_vf/*') from consensusPeaksVersions.first()
file ('peakAnnotation_vf/*') from peakAnnotationVersions.first()
file ('motifSearch_vf/*') from motifSearchVersions.first().ifEmpty()
file ('diffPeaks_vf/*') from diffPeaksVersions.first().ifEmpty()
file ('experimentQC_vf/*') from experimentQCVersions.first()
file('software_versions_mqc.yaml') into softwareVersions
file('software_references_mqc.yaml') into softwareReferences
echo $workflow.nextflow.version > version_nextflow.txt
python3 $baseDir/scripts/generate_references.py -r $references -o software_references
python3 $baseDir/scripts/generate_versions.py -o software_versions