#!/usr/bin/env nextflow // Path to an input file, or a pattern for multiple inputs // Note - $baseDir is the location of this workflow file main.nf // Define Input variables params.reads = "$baseDir/../test_data/*.fastq.gz" params.pairedEnd = false params.chrM = true params.tn5 = true params.designFile = "$baseDir/../test_data/design_ENCSR265ZXX_SE.txt" params.genome = 'GRCh38' params.genomes = [] params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?: false : false params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false params.tssFile = params.genome ? params.genomes[ params.genome ].tssfile ?: false : false params.astrocyte = false params.outDir= "${baseDir}/output" // Check inputs if(params.bwaIndex) { bwaIndex = Channel .fromPath(params.bwaIndex) .ifEmpty { exit 1, "BWA index not found: ${params.bwaIndex}" } } else { exit 1, "No reference genome specified." } // Define List of Files readsList = Channel .fromPath(params.reads) .flatten() .map { file -> [file.getFileName().toString(), file.toString()].join("\t") } .collectFile(name: 'fileList.tsv', newLine: true) // Define regular variables pairedEnd = params.pairedEnd chrM = params.chrM tn5 = params.tn5 designFile = params.designFile genomeSize = params.genomeSize chromSizes = params.chromSizes outDir = params.outDir //tssFile = params.tssFile process checkDesignFile { publishDir "${outDir}/design", mode: 'copy' input: designFile file readsList output: file("design.tsv") into designFilePaths script: if (params.astrocyte == true) { if (pairedEnd) { """ module load python/3.6.1-2-anaconda python3 ${baseDir}/scripts/check_design.py -d ${designFile} -f ${readsList} -p -a """ } else { """ module load python/3.6.1-2-anaconda python3 ${baseDir}/scripts/check_design.py -d ${designFile} -f ${readsList} -a """ } } else { if (pairedEnd) { """ python3 ${baseDir}/scripts/check_design.py -d ${designFile} -f ${readsList} -p -a """ } else { """ python3 ${baseDir}/scripts/check_design.py -d ${designFile} -f ${readsList} -a """ } } } // Define channel for raw reads if (pairedEnd) { rawReads = designFilePaths .splitCsv(sep: '\t', header: true) .map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read2], row.experiment_id, row.replicate, row.fq_length ] } } else { rawReads = designFilePaths .splitCsv(sep: '\t', header: true) .map { row -> [ row.sample_id, [row.fastq_read1], row.experiment_id, row.replicate, row.fq_length ] } } // Trim raw reads using trimgalore process trimReads { tag "${sampleId}-${replicate}" publishDir "${outDir}/${task.process}/${sampleId}", mode: 'copy' input: set sampleId, reads, experimentId, replicate, fqLength from rawReads output: set sampleId, file('*.fq.gz'), experimentId, replicate, fqLength into trimmedReads file('*trimming_report.txt') into trimgaloreResults file('version_*.txt') into trimReadsVersions script: if (params.astrocyte == true) { if (pairedEnd) { """ module load python/3.6.1-2-anaconda module load trimgalore/0.4.1 python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -s ${sampleId} -p """ } else { """ module load python/3.6.1-2-anaconda module load trimgalore/0.4.1 python3 ${baseDir}/scripts/trim_reads.py -f ${reads[0]} -s ${sampleId} """ } } else { if (pairedEnd) { """ python3 ${baseDir}/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -s ${sampleId} -p """ } else { """ python3 ${baseDir}/scripts/trim_reads.py -f ${reads[0]} -s ${sampleId} """ } } } // Align trimmed reads using bwa process alignReads { tag "${sampleId}-${replicate}" publishDir "${outDir}/${task.process}/${sampleId}", mode: 'copy' input: set sampleId, reads, experimentId, replicate, fqLength from trimmedReads file index from bwaIndex.first() output: set sampleId, file('*.bam'), experimentId, replicate into mappedReads file '*.flagstat.qc' into mappedReadsStats file('version_*.txt') into alignReadsVersions script: if (params.astrocyte == true) { if (pairedEnd) { """ module load python/3.6.1-2-anaconda module load bwa/intel/0.7.12 module load samtools/intel/1.3 module load sambamba/0.6.6 python3 ${baseDir}/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -s $sampleId -l ${fqLength} -p """ } else { """ module load python/3.6.1-2-anaconda module load bwa/intel/0.7.12 module load samtools/intel/1.3 module load sambamba/0.6.6 python3 ${baseDir}/scripts/map_reads.py -f ${reads} -r ${index}/genome.fa -s $sampleId -l ${fqLength} """ } } else { if (pairedEnd) { """ python3 ${baseDir}/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -s $sampleId -l ${fqLength} -p """ } else { """ python3 ${baseDir}/scripts/map_reads.py -f ${reads} -r ${index}/genome.fa -s $sampleId -l ${fqLength} """ } } } // Dedup reads using sambamba process filterReads { tag "${sampleId}-${replicate}" publishDir "${outDir}/${task.process}/${sampleId}", mode: 'copy' input: set sampleId, mapped, experimentId, replicate from mappedReads output: set sampleId, file('*.bam'), file('*.bai'), experimentId, replicate into dedupReads set sampleId, file('*.bam'), file('*.bai'), experimentId, replicate into tssEnrich set sampleId, file('*.bam'), file('*.bai'), experimentId, replicate into convertReads file '*flagstat.qc' into dedupReadsStats file '*pbc.qc' into dedupReadsComplexity file '*dup.qc' into dupReads file('version_*.txt') into filterReadsVersions script: if (params.astrocyte == true) { if (pairedEnd) { """ module load python/3.6.1-2-anaconda module load samtools/1.4.1 module load sambamba/0.6.6 module load bedtools/2.26.0 python3 ${baseDir}/scripts/map_qc.py -b ${mapped} -p """ } else { """ module load python/3.6.1-2-anaconda module load samtools/1.4.1 module load sambamba/0.6.6 module load bedtools/2.26.0 python3 ${baseDir}/scripts/map_qc.py -b ${mapped} """ } } else { if (pairedEnd) { """ python3 ${baseDir}/scripts/map_qc.py -b ${mapped} -p """ } else { """ python3 ${baseDir}/scripts/map_qc.py -b ${mapped} """ } } } // Define channel collecting dedup reads into new design file dedupReads .map{ sampleId, bam, bai, experimentId, replicate -> "${sampleId}\t${bam}\t${bai}\t${experimentId}\t${replicate}\n"} .collectFile(name: 'design_dedup.tsv', seed: "sample_id\tbam_reads\tbam_index\texperiment_id\treplicate\n", storeDir: "${outDir}/design") .into { dedupDesign; preDiffDesign } // Quality Metrics using deeptools process experimentQC { publishDir "${outDir}/${task.process}", mode: 'copy' input: file dedupDesign output: file '*.{png,npz}' into deepToolsStats file('version_*.txt') into experimentQCVersions script: if (params.astrocyte == true) { """ module load python/3.6.1-2-anaconda module load deeptools/2.5.0.1 python3 ${baseDir}/scripts/experiment_qc.py -d ${dedupDesign} """ } else { """ python3 ${baseDir}/scripts/experiment_qc.py -d ${dedupDesign} """ } } // Convert reads to bam process convertReads { tag "$sampleId-$replicate" publishDir "$baseDir/output/${task.process}", mode: 'copy' input: set sampleId, deduped, bai, experimentId, replicate from convertReads output: set sampleId, file('*.tagAlign.gz'), experimentId, replicate into tagReads script: if (pairedEnd) { if (tn5 && chrM){ """ python3 $baseDir/scripts/convert_reads.py -b $deduped -p -m -t """ } else if (tn5){ """ python3 $baseDir/scripts/convert_reads.py -b $deduped -p -t """ } else if (chrM){ """ python3 $baseDir/scripts/convert_reads.py -b $deduped -p -m """ } else{ """ python3 $baseDir/scripts/convert_reads.py -b $deduped -p """ } } else { if (tn5 && chrM){ """ python3 $baseDir/scripts/convert_reads.py -b $deduped -m -t """ } else if (tn5){ """ python3 $baseDir/scripts/convert_reads.py -b $deduped -t """ } else if (chrM){ """ python3 $baseDir/scripts/convert_reads.py -b $deduped -m """ } else{ """ python3 $baseDir/scripts/convert_reads.py -b $deduped """ } } } // Calculate TSS Enrichment //process tssEnrich { // // tag "$sampleId-$replicate" // publishDir "$baseDir/output/${task.process}", mode: 'copy' // // input: // // set sampleId, deduped, bai, experimentId, replicate from tssEnrich // // // output: // // file '*.png' into atacQC // // script: // // """ // singularity run /project/shared/bicf_workflow_ref/singularity_images/bicf-metaseq:0.1.0.simg $baseDir/scripts/atac_qc.py -b $deduped -p $sampleId -t $tssFile -c $chromSizes // """ // //} // Calculate Cross-correlation using phantompeaktools process crossReads { tag "$sampleId-$replicate" publishDir "$baseDir/output/${task.process}", mode: 'copy' input: set sampleId, tagAlign, experimentId, replicate from tagReads output: set sampleId, tagAlign, file('*.cc.qc'), experimentId, replicate into xcorReads set file('*.cc.qc'), file('*.cc.plot.pdf') into xcorReadsStats script: if (pairedEnd) { """ python3 $baseDir/scripts/xcor.py -t $tagAlign -p """ } else { """ python3 $baseDir/scripts/xcor.py -t $tagAlign """ } } // Define channel collecting tagAlign and xcor into design file xcorDesign = xcorReads .map{ sampleId, tagAlign, xcor, experimentId, replicate -> "$sampleId\t$tagAlign\t$xcor\t$experimentId\t$replicate\n"} .collectFile(name:'design_xcor.tsv', seed:"sample_id\ttag_align\txcor\texperiment_id\treplicate\n", storeDir:"$baseDir/output/design") // Make Experiment design files to be read in for downstream analysis process defineExpDesignFiles { publishDir "$baseDir/output/design", mode: 'copy' input: file xcorDesign output: file '*.tsv' into experimentObjs mode flatten script: """ python3 $baseDir/scripts/experiment_design.py -d $xcorDesign -a """ } // Make Experiment design files to be read in for downstream analysis process poolAndPsuedoReads { tag "${experimentObjs.baseName}" publishDir "$baseDir/output/design", mode: 'copy' input: file experimentObjs output: file '*.tsv' into experimentPoolObjs script: if (pairedEnd) { """ python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -p -a """ } else { """ python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -a """ } } // Collect list of experiment design files into a single channel experimentRows = experimentPoolObjs .splitCsv(sep:'\t', header:true) .map { row -> [ row.sample_id, row.tag_align, row.xcor, row.experiment_id, row.replicate] } // Call Peaks using MACS process callPeaksMACS { tag "$sampleId-$replicate" publishDir "$baseDir/output/${task.process}", mode: 'copy' input: set sampleId, tagAlign, xcor, experimentId, replicate from experimentRows output: set sampleId, file('*.narrowPeak'), file('*.fc_signal.bw'), file('*.pvalue_signal.bw'), experimentId, replicate into experimentPeaks script: if (pairedEnd) { """ python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -s $sampleId -g $genomeSize -z $chromSizes -p -a """ } else { """ python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -s $sampleId -g $genomeSize -z $chromSizes -a """ } } // Define channel collecting peaks into design file peaksDesign = experimentPeaks .map{ sampleId, peak, fcSignal, pvalueSignal, experimentId, replicate -> "$sampleId\t$peak\t$fcSignal\t$pvalueSignal\t$experimentId\t$replicate\n"} .collectFile(name:'design_peak.tsv', seed:"sample_id\tpeaks\tfc_signal\tpvalue_signal\texperiment_id\treplicate\n", storeDir:"$baseDir/output/design") // Calculate Consensus Peaks process consensusPeaks { publishDir "$baseDir/output/${task.process}", mode: 'copy' input: file peaksDesign file preDiffDesign output: file '*.replicated.*' into consensusPeaks file '*.rejected.*' into rejectedPeaks file("design_diffPeaks.tsv") into designDiffPeaks script: """ python3 $baseDir/scripts/overlap_peaks.py -d $peaksDesign -f $preDiffDesign -a """ }