Add in old main.nf to fix organism in chipseeker.

bb5a6702 · Venkat Malladi · 6a418e0f · bb5a6702
Commit bb5a6702 authored 6 years ago by Venkat Malladi
--- a/workflow/main.nf
+++ b/workflow/main.nf
 #!/usr/bin/env nextflow
-
-// Path to an input file, or a pattern for multiple inputs
-// Note - $baseDir is the location of this workflow file main.nf
-
-// Define Input variables
-params.reads = "$baseDir/../test_data/*.fastq.gz"
-params.pairedEnd = false
-params.designFile = "$baseDir/../test_data/design_ENCSR238SGC_SE.txt"
-params.genome = 'GRCm38'
-params.genomes = []
-params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false
-params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?: false : false
-params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
-params.cutoffRatio = 1.2
-
-// Check inputs
-if( params.bwaIndex ){
-  bwaIndex = Channel
-    .fromPath(params.bwaIndex)
-    .ifEmpty { exit 1, "BWA index not found: ${params.bwaIndex}" }
-} else {
-  exit 1, "No reference genome specified."
-}
-
-// Define List of Files
-readsList = Channel
-  .fromPath( params.reads )
-  .flatten()
-  .map { file -> [ file.getFileName().toString(), file.toString() ].join("\t")}
-  .collectFile( name: 'fileList.tsv', newLine: true )
-
-// Define regular variables
-pairedEnd = params.pairedEnd
-designFile = params.designFile
-genomeSize = params.genomeSize
-chromSizes = params.chromSizes
-cutoffRatio = params.cutoffRatio
-
-process checkDesignFile {
-
-  publishDir "$baseDir/output/design", mode: 'copy'
-
-  input:
-
-  designFile
-  file readsList
-
-  output:
-
-  file("design.tsv") into designFilePaths
-
-  script:
-
-  if (pairedEnd) {
-    """
-    python3 $baseDir/scripts/check_design.py -d $designFile -f $readsList -p
-    """
-  }
-  else {
-    """
-    python $baseDir/scripts/check_design.py -d $designFile -f $readsList
-    """
-  }
-
-}
-
-// Define channel for raw reads
-if (pairedEnd) {
-  rawReads = designFilePaths
-    .splitCsv(sep: '\t', header: true)
-    .map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read2], row.experiment_id, row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] }
-} else {
-rawReads = designFilePaths
-  .splitCsv(sep: '\t', header: true)
-  .map { row -> [ row.sample_id, [row.fastq_read1], row.experiment_id, row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] }
-}
-
-// Trim raw reads using trimgalore
-process trimReads {
-
-  tag "$sampleId-$replicate"
-  publishDir "$baseDir/output/${task.process}", mode: 'copy'
-
-  input:
-
-  set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from rawReads
-
-  output:
-
-  set sampleId, file('*.fq.gz'), experimentId, biosample, factor, treatment, replicate, controlId into trimmedReads
-  file('*trimming_report.txt') into trimgalore_results
-
-  script:
-
-  if (pairedEnd) {
-    """
-    python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -p
-    """
-  }
-  else {
-    """
-    python3 $baseDir/scripts/trim_reads.py -f ${reads[0]}
-    """
-  }
-
+   params.design="$baseDir/../test_data/samplesheet.csv"
+   params.bams = "$baseDir/../test_data/*.bam"
+   params.peaks = "$baseDir/../test_data/*.broadPeak"
+   params.genomepath="/project/shared/bicf_workflow_ref/GRCh37"
+   toppeakcount = -1
+   design_file = file(params.design)
+   deeptools_design = Channel.fromPath(params.design)
+   diffbind_design = Channel.fromPath(params.design)
+   chipseeker_design = Channel.fromPath(params.design)
+   meme_design = Channel.fromPath(params.design)
+   index_bams = Channel.fromPath(params.bams)
+   deeptools_bams = Channel.fromPath(params.bams) 
+   deeptools_peaks = Channel.fromPath(params.peaks) 
+   chipseeker_peaks = Channel.fromPath(params.peaks) 
+   diffbind_bams = Channel.fromPath(params.bams) 
+   diffbind_peaks = Channel.fromPath(params.peaks) 
+   meme_peaks = Channel.fromPath(params.peaks)
+
+process bamindex {
+   publishDir "$baseDir/output/", mode: 'copy'
+   input:
+     file index_bam_files from index_bams
+   output:
+     file "*bai" into deeptools_bamindex
+     file "*bai" into diffbind_bamindex
+
+   script:
+     """
+     module load python/2.7.x-anaconda
+     module load R/3.3.2-gccmkl
+     module load samtools/intel/1.3
+     samtools index $index_bam_files
+     """
 }

-// Align trimmed reads using bwa
-process alignReads {
-
-  tag "$sampleId-$replicate"
-  publishDir "$baseDir/output/${task.process}", mode: 'copy'
-
-  input:
-
-  set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from trimmedReads
-  file index from bwaIndex.first()
-
-  output:
-
-  set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into mappedReads
-  file '*.srt.bam.flagstat.qc' into mappedReadsStats
-
-  script:
-
-  if (pairedEnd) {
-    """
-    python3 $baseDir/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -p
-    """
-  }
-  else {
-    """
-    python3 $baseDir/scripts/map_reads.py -f $reads -r ${index}/genome.fa
-    """
-  }
-
-}
-
-// Dedup reads using sambamba
-process filterReads {
-
-  tag "$sampleId-$replicate"
-  publishDir "$baseDir/output/${task.process}", mode: 'copy'
-
-  input:
-
-  set sampleId, mapped, experimentId, biosample, factor, treatment, replicate, controlId from mappedReads
-
-  output:
-
-  set sampleId, file('*.bam'), file('*.bai'), experimentId, biosample, factor, treatment, replicate, controlId into dedupReads
-  set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into convertReads
-  file '*flagstat.qc' into dedupReadsStats
-  file '*pbc.qc' into dedupReadsComplexity
-  file '*dup.qc' into dupReads
-
-  script:
-
-  if (pairedEnd) {
-    """
-    python3 $baseDir/scripts/map_qc.py -b $mapped -p
-    """
-  }
-  else {
-    """
-    python3 $baseDir/scripts/map_qc.py -b $mapped
-    """
-  }
-
-}
-
-// Define channel collecting dedup reads into new design file
-dedupReads
-.map{ sampleId, bam, bai, experimentId, biosample, factor, treatment, replicate, controlId ->
-"$sampleId\t$bam\t$bai\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
-.collectFile(name:'design_dedup.tsv', seed:"sample_id\tbam_reads\tbam_index\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$baseDir/output/design")
-.into { dedupDesign; preDiffDesign }
-
-// Quality Metrics using deeptools
-process experimentQC {
-
-  publishDir "$baseDir/output/${task.process}", mode: 'copy'
-
-  input:
-
-  file dedupDesign
-
-  output:
-
-  file '*.{png,npz}' into deepToolsStats
-
-  script:
-
-  """
-  python3 $baseDir/scripts/experiment_qc.py -d $dedupDesign
-  """
-
+process run_deeptools {
+   publishDir "$baseDir/output", mode: 'copy'
+   input:
+     file deeptools_design_file from deeptools_design
+     file deeptools_bam_files from deeptools_bams.toList()
+     file deeptools_peak_files from deeptools_peaks.toList()
+     file deeptools_bam_indexes from deeptools_bamindex.toList()
+   output:
+     file "*deeptools*" into deeptools_output
+     script:
+     """
+     module load python/2.7.x-anaconda
+     module load R/3.3.2-gccmkl
+     module load deeptools/2.3.5
+     python $baseDir/scripts/runDeepTools.py -i ${params.design} -g ${params.genomepath}}
+"""
 }

-// Convert reads to bam
-process convertReads {
-
-  tag "$sampleId-$replicate"
-  publishDir "$baseDir/output/${task.process}", mode: 'copy'
-
-  input:
-
-  set sampleId, deduped, experimentId, biosample, factor, treatment, replicate, controlId from convertReads
-
-  output:
-
-  set sampleId, file('*.tagAlign.gz'), file('*.bed{pe,se}.gz'), experimentId, biosample, factor, treatment, replicate, controlId into tagReads
-
-  script:
-
-  if (pairedEnd) {
-    """
-    python3 $baseDir/scripts/convert_reads.py -b $deduped -p
-    """
-  }
-  else {
-    """
-    python3 $baseDir/scripts/convert_reads.py -b $deduped
-    """
-  }

+process run_diffbind {
+   publishDir "$baseDir/output", mode: 'copy'
+   input:
+     file diffbind_design_file from diffbind_design
+     file diffbind_bam_files from diffbind_bams.toList()
+     file diffbind_peak_files from diffbind_peaks.toList()
+     file diffbind_bam_indexes from diffbind_bamindex.toList()
+   output:
+     file "diffpeak.design" into diffpeaksdesign_chipseeker
+     file "diffpeak.design" into diffpeaksdesign_meme
+     file "*_diffbind.bed" into diffpeaks_meme
+     file "*_diffbind.bed" into diffpeaks_chipseeker
+   script:
+     """
+     module load python/2.7.x-anaconda
+     module load R/3.3.2-gccmkl
+     Rscript $baseDir/scripts/runDiffBind.R $diffbind_design_file
+"""
 }

-// Calculate Cross-correlation using phantompeaktools
-process crossReads {
-
-  tag "$sampleId-$replicate"
-  publishDir "$baseDir/output/${task.process}", mode: 'copy'
-
-  input:
-
-  set sampleId, seTagAlign, tagAlign, experimentId, biosample, factor, treatment, replicate, controlId from tagReads
-
-  output:
-
-  set sampleId, seTagAlign, tagAlign, file('*.cc.qc'), experimentId, biosample, factor, treatment, replicate, controlId into xcorReads
-  set file('*.cc.qc'), file('*.cc.plot.pdf') into xcorReadsStats
-
-  script:
-
-  if (pairedEnd) {
-    """
-    python3 $baseDir/scripts/xcor.py -t $seTagAlign -p
-    """
-  }
-  else {
-    """
-    python3 $baseDir/scripts/xcor.py -t $seTagAlign
-    """
-  }
-
+process run_chipseeker_diffpeak {
+   publishDir "$baseDir/output", mode: 'copy'
+   input:
+     file diffpeak_design_file from diffpeaksdesign_chipseeker
+     file diffpeaks from diffpeaks_chipseeker
+   output:
+     file "*chipseeker*" into chipseeker_diffpeak_output
+   script:
+     """
+     module load python/2.7.x-anaconda
+     module load R/3.3.2-gccmkl
+     Rscript $baseDir/scripts/runChipseeker.R $diffpeak_design_file ${params.genomepath}
+"""
 }

-// Define channel collecting tagAlign and xcor into design file
-xcorDesign = xcorReads
-              .map{ sampleId, seTagAlign, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId ->
-              "$sampleId\t$seTagAlign\t$tagAlign\t$xcor\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
-              .collectFile(name:'design_xcor.tsv', seed:"sample_id\tse_tag_align\ttag_align\txcor\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$baseDir/output/design")
-
-// Make Experiment design files to be read in for downstream analysis
-process defineExpDesignFiles {
-
-  publishDir "$baseDir/output/design", mode: 'copy'
-
-  input:
-
-  file xcorDesign
-
-  output:
-
-  file '*.tsv' into experimentObjs mode flatten
-
-  script:
-
-  """
-  python3 $baseDir/scripts/experiment_design.py -d $xcorDesign
-  """
-
+process run_chipseeker_originalpeak {
+   publishDir "$baseDir/output", mode: 'copy'
+   input:
+     file design_file from chipseeker_design
+     file chipseeker_peak_files from chipseeker_peaks.toList()
+   output:
+     file "*chipseeker*" into chipseeker_originalpeak_output
+   script:
+     """
+     module load python/2.7.x-anaconda
+     module load R/3.3.2-gccmkl
+     Rscript $baseDir/scripts/runChipseeker.R $design_file ${params.genomepath}
+"""
 }

-
-// Make Experiment design files to be read in for downstream analysis
-process poolAndPsuedoReads {
-
-
-  tag "${experimentObjs.baseName}"
-  publishDir "$baseDir/output/design", mode: 'copy'
-
-  input:
-
-  file experimentObjs
-
-  output:
-
-  file '*.tsv' into experimentPoolObjs
-
-  script:
-
-  if (pairedEnd) {
-    """
-    python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio -p
-    """
-  }
-  else {
-    """
-    python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio
-    """
-  }
-
+process run_meme_original {
+   publishDir "$baseDir/output", mode: 'copy'
+   input:
+     file design_meme from meme_design
+     file meme_peak_files from meme_peaks.toList()
+   output:
+     file "*meme*" into meme_original_output
+   script:
+     """
+     module load python/2.7.x-anaconda
+     module load R/3.3.2-gccmkl
+     module add deeptools/2.3.5
+     module load meme/4.11.1-gcc-openmpi
+     python $baseDir/scripts/runMemechip.py -i $design_meme -g ${params.genomepath} -l ${toppeakcount}
+"""
 }

-// Collect list of experiment design files into a single channel
-experimentRows = experimentPoolObjs
-                .splitCsv(sep:'\t', header:true)
-                .map { row -> [ row.sample_id, row.tag_align, row.xcor, row.experiment_id, row.biosample, row.factor, row.treatment, row.replicate, row.control_id, row.control_tag_align] }
-
-// Call Peaks using MACS
-process callPeaksMACS {
-
-  tag "$sampleId-$replicate"
-  publishDir "$baseDir/output/${task.process}", mode: 'copy'
-
-  input:
-  set sampleId, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId, controlTagAlign from experimentRows
-
-  output:
-
-  set sampleId, file('*.narrowPeak'), file('*.fc_signal.bw'), file('*.pvalue_signal.bw'), experimentId, biosample, factor, treatment, replicate, controlId into experimentPeaks
-
-  script:
-
-  if (pairedEnd) {
-    """
-    python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes -p
-    """
-  }
-  else {
-    """
-    python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes
-    """
-  }
-
+process run_meme_diffpeak {
+   publishDir "$baseDir/output", mode: 'copy'
+   input:
+     file peaks_meme from diffpeaks_meme
+     file diffpeak_design from diffpeaksdesign_meme
+   output:
+     file "*meme*" into meme_diffpeak_output
+   script:
+     """
+     module load python/2.7.x-anaconda
+     module load R/3.3.2-gccmkl
+     module add deeptools/2.3.5
+     module load meme/4.11.1-gcc-openmpi
+     python $baseDir/scripts/runMemechip.py -i $diffpeak_design -g ${params.genomepath} -l ${toppeakcount}
+"""
 }

-// Define channel collecting peaks into design file
-peaksDesign = experimentPeaks
-              .map{ sampleId, peak, fcSignal, pvalueSignal, experimentId, biosample, factor, treatment, replicate, controlId ->
-              "$sampleId\t$peak\t$fcSignal\t$pvalueSignal\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
-              .collectFile(name:'design_peak.tsv', seed:"sample_id\tpeaks\tfc_signal\tpvalue_signal\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$baseDir/output/design")
-
-// Calculate Consensus Peaks
-process consensusPeaks {
-
-  publishDir "$baseDir/output/${task.process}", mode: 'copy'
-
-  input:
-
-  file peaksDesign
-  file preDiffDesign
-
-  output:
-
-  file '*.replicated.*' into consensusPeaks
-  file '*.rejected.*' into rejectedPeaks
-  file("design_diffPeaks.tsv") into designDiffPeaks
-
-  script:
-
-  """
-  python3 $baseDir/scripts/overlap_peaks.py -d $peaksDesign -f $preDiffDesign
-  """
-
-}