#!/usr/bin/env nextflow

// Path to an input file, or a pattern for multiple inputs
// Note - $baseDir is the location of this workflow file main.nf

// Define Input variables
params.reads = "$baseDir/../test_data/*.fastq.gz"
params.pairedEnd = false
params.chrM = true
params.tn5 = true
params.designFile = "$baseDir/../test_data/design_ENCSR265ZXX_SE.txt"
params.genome = 'GRCh38'
params.genomes = []
params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false
params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?: false : false
params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
params.tssFile = params.genome ? params.genomes[ params.genome ].tssfile ?: false : false
params.astrocyte = false
params.outDir= "${baseDir}/output"

// Check inputs
if(params.bwaIndex) {
  bwaIndex = Channel
    .fromPath(params.bwaIndex)
    .ifEmpty { exit 1, "BWA index not found: ${params.bwaIndex}" }
} else {
  exit 1, "No reference genome specified."
}

// Define List of Files
readsList = Channel
  .fromPath(params.reads)
  .flatten()
  .map { file -> [file.getFileName().toString(), file.toString()].join("\t") }
  .collectFile(name: 'fileList.tsv', newLine: true)

// Define regular variables
pairedEnd = params.pairedEnd
chrM = params.chrM
tn5 = params.tn5
designFile = params.designFile
genomeSize = params.genomeSize
chromSizes = params.chromSizes
outDir = params.outDir
//tssFile = params.tssFile

process checkDesignFile {

  publishDir "${outDir}/design", mode: 'copy'

  input:
    designFile
    file readsList

  output:
    file("design.tsv") into designFilePaths

  script:
    if (params.astrocyte == true) {
      if (pairedEnd) {
        """
        module load python/3.6.1-2-anaconda
        python3 ${baseDir}/scripts/check_design.py -d ${designFile} -f ${readsList} -p -a
        """
      }
      else {
        """
        module load python/3.6.1-2-anaconda
        python3 ${baseDir}/scripts/check_design.py -d ${designFile} -f ${readsList} -a
        """
      }
    }
    else {
      if (pairedEnd) {
        """
        python3 ${baseDir}/scripts/check_design.py -d ${designFile} -f ${readsList} -p -a
        """
      }
      else {
        """
        python3 ${baseDir}/scripts/check_design.py -d ${designFile} -f ${readsList} -a
        """
      }
    }

}

// Define channel for raw reads
if (pairedEnd) {
  rawReads = designFilePaths
    .splitCsv(sep: '\t', header: true)
    .map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read2], row.experiment_id, row.replicate, row.fq_length ] }
} 
else {
rawReads = designFilePaths
  .splitCsv(sep: '\t', header: true)
  .map { row -> [ row.sample_id, [row.fastq_read1], row.experiment_id, row.replicate, row.fq_length ] }
}

// Trim raw reads using trimgalore
process trimReads {

  tag "${sampleId}-${replicate}"
  publishDir "${outDir}/${task.process}/${sampleId}", mode: 'copy'

  input:
    set sampleId, reads, experimentId, replicate, fqLength from rawReads

  output:
    set sampleId, file('*.fq.gz'), experimentId, replicate, fqLength into trimmedReads
    file('*trimming_report.txt') into trimgaloreResults
    file('version_*.txt') into trimReadsVersions

  script:
    if (params.astrocyte == true) {
      if (pairedEnd) {
        """
        module load python/3.6.1-2-anaconda
        module load trimgalore/0.4.1
        python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -s ${sampleId} -p
        """
      }
      else {
        """
        module load python/3.6.1-2-anaconda
        module load trimgalore/0.4.1
        python3 ${baseDir}/scripts/trim_reads.py -f ${reads[0]} -s ${sampleId}
        """
      }
    }
    else {
      if (pairedEnd) {
        """
        python3 ${baseDir}/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -s ${sampleId} -p
        """
      }
      else {
        """
        python3 ${baseDir}/scripts/trim_reads.py -f ${reads[0]} -s ${sampleId}
        """
      }
    }

}

// Align trimmed reads using bwa
process alignReads {

  tag "${sampleId}-${replicate}"
  publishDir "${outDir}/${task.process}/${sampleId}", mode: 'copy'

  input:
    set sampleId, reads, experimentId, replicate, fqLength from trimmedReads
    file index from bwaIndex.first()

  output:
    set sampleId, file('*.bam'), experimentId, replicate into mappedReads
    file '*.flagstat.qc' into mappedReadsStats
    file('version_*.txt') into alignReadsVersions

  script:
    if (params.astrocyte == true) {
      if (pairedEnd) {
        """
        module load python/3.6.1-2-anaconda
        module load bwa/intel/0.7.12
        module load samtools/intel/1.3
        module load sambamba/0.6.6
        python3 ${baseDir}/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -s $sampleId -l ${fqLength} -p
        """
      }
      else {
        """
        module load python/3.6.1-2-anaconda
        module load bwa/intel/0.7.12
        module load samtools/intel/1.3
        module load sambamba/0.6.6
        python3 ${baseDir}/scripts/map_reads.py -f ${reads} -r ${index}/genome.fa -s $sampleId -l ${fqLength}
        """
      }
    }
    else {
      if (pairedEnd) {
        """
        python3 ${baseDir}/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -s $sampleId -l ${fqLength} -p
        """
      }
      else {
        """
        python3 ${baseDir}/scripts/map_reads.py -f ${reads} -r ${index}/genome.fa -s $sampleId -l ${fqLength}
        """
      }
    }

}

// Dedup reads using sambamba
process filterReads {

  tag "${sampleId}-${replicate}"
  publishDir "${outDir}/${task.process}/${sampleId}", mode: 'copy'

  input:
    set sampleId, mapped, experimentId, replicate from mappedReads

  output:
    set sampleId, file('*.bam'), file('*.bai'), experimentId, replicate into dedupReads
    set sampleId, file('*.bam'), file('*.bai'), experimentId, replicate into tssEnrich
    set sampleId, file('*.bam'), file('*.bai'), experimentId, replicate into convertReads
    file '*flagstat.qc' into dedupReadsStats
    file '*pbc.qc' into dedupReadsComplexity
    file '*dup.qc' into dupReads
    file('version_*.txt') into filterReadsVersions

  script:
    if (params.astrocyte == true) {
      if (pairedEnd) {
        """
        module load python/3.6.1-2-anaconda
        module load samtools/1.4.1
        module load sambamba/0.6.6
        module load bedtools/2.26.0
        python3 ${baseDir}/scripts/map_qc.py -b ${mapped} -p
        """
      }
      else {
        """
        module load python/3.6.1-2-anaconda
        module load samtools/1.4.1
        module load sambamba/0.6.6
        module load bedtools/2.26.0
        python3 ${baseDir}/scripts/map_qc.py -b ${mapped}
        """
      }
    }
    else {
      if (pairedEnd) {
        """
        python3 ${baseDir}/scripts/map_qc.py -b ${mapped} -p
        """
      }
      else {
        """
        python3 ${baseDir}/scripts/map_qc.py -b ${mapped}
        """
      }
    }

}

// Define channel collecting dedup reads into new design file
dedupReads
.map{ sampleId, bam, bai, experimentId, replicate ->
"${sampleId}\t${bam}\t${bai}\t${experimentId}\t${replicate}\n"}
.collectFile(name: 'design_dedup.tsv', seed: "sample_id\tbam_reads\tbam_index\texperiment_id\treplicate\n", storeDir: "${outDir}/design")
.into { dedupDesign; preDiffDesign }

// Quality Metrics using deeptools
process experimentQC {

  publishDir "${outDir}/${task.process}", mode: 'copy'

  input:
    file dedupDesign

  output:
    file '*.{png,npz}' into deepToolsStats
    file('version_*.txt') into experimentQCVersions

  script:
    if (params.astrocyte == true) {
      """
      module load python/3.6.1-2-anaconda
      module load deeptools/2.5.0.1
      python3 ${baseDir}/scripts/experiment_qc.py -d ${dedupDesign}
      """
    }
    else {
      """
      python3 ${baseDir}/scripts/experiment_qc.py -d ${dedupDesign}
      """
    }

}

// Convert reads to bam
process convertReads {

  tag "$sampleId-$replicate"
  publishDir "$baseDir/output/${task.process}", mode: 'copy'

  input:

  set sampleId, deduped, bai, experimentId, replicate from convertReads

  output:

  set sampleId, file('*.tagAlign.gz'), experimentId, replicate into tagReads


  script:

  if (pairedEnd) {
    if (tn5 && chrM){
      """
      python3 $baseDir/scripts/convert_reads.py -b $deduped -p -m -t
      """
    }
    else if (tn5){
      """
      python3 $baseDir/scripts/convert_reads.py -b $deduped -p -t
      """
    }
    else if (chrM){
      """
      python3 $baseDir/scripts/convert_reads.py -b $deduped -p -m
      """
    }
    else{
      """
      python3 $baseDir/scripts/convert_reads.py -b $deduped -p
      """
    }
  }
  else {
    if (tn5 && chrM){
      """
      python3 $baseDir/scripts/convert_reads.py -b $deduped -m -t
      """
    }
    else if (tn5){
      """
      python3 $baseDir/scripts/convert_reads.py -b $deduped -t
      """
    }
    else if (chrM){
      """
      python3 $baseDir/scripts/convert_reads.py -b $deduped -m
      """
    }
    else{
      """
      python3 $baseDir/scripts/convert_reads.py -b $deduped
      """
    }
  }

}

// Calculate TSS Enrichment
//process tssEnrich {
//
//  tag "$sampleId-$replicate"
//  publishDir "$baseDir/output/${task.process}", mode: 'copy'
//
//  input:
//
//  set sampleId, deduped, bai, experimentId, replicate from tssEnrich
//
//
//  output:
//
//  file '*.png' into atacQC
//
//  script:
//
//  """
//  singularity run /project/shared/bicf_workflow_ref/singularity_images/bicf-metaseq:0.1.0.simg $baseDir/scripts/atac_qc.py -b $deduped -p $sampleId -t $tssFile -c $chromSizes
//  """
//
//}

// Calculate Cross-correlation using phantompeaktools
process crossReads {

  tag "$sampleId-$replicate"
  publishDir "$baseDir/output/${task.process}", mode: 'copy'

  input:

  set sampleId, tagAlign, experimentId, replicate from tagReads

  output:

  set sampleId, tagAlign, file('*.cc.qc'), experimentId, replicate into xcorReads
  set file('*.cc.qc'), file('*.cc.plot.pdf') into xcorReadsStats

  script:

  if (pairedEnd) {
    """
    python3 $baseDir/scripts/xcor.py -t $tagAlign -p
    """
  }
  else {
    """
    python3 $baseDir/scripts/xcor.py -t $tagAlign
    """
  }

}

// Define channel collecting tagAlign and xcor into design file
xcorDesign = xcorReads
              .map{ sampleId, tagAlign, xcor, experimentId, replicate ->
              "$sampleId\t$tagAlign\t$xcor\t$experimentId\t$replicate\n"}
              .collectFile(name:'design_xcor.tsv', seed:"sample_id\ttag_align\txcor\texperiment_id\treplicate\n", storeDir:"$baseDir/output/design")

// Make Experiment design files to be read in for downstream analysis
process defineExpDesignFiles {

  publishDir "$baseDir/output/design", mode: 'copy'

  input:

  file xcorDesign

  output:

  file '*.tsv' into experimentObjs mode flatten

  script:

  """
  python3 $baseDir/scripts/experiment_design.py -d $xcorDesign -a
  """

}


// Make Experiment design files to be read in for downstream analysis
process poolAndPsuedoReads {


  tag "${experimentObjs.baseName}"
  publishDir "$baseDir/output/design", mode: 'copy'

  input:

  file experimentObjs

  output:

  file '*.tsv' into experimentPoolObjs

  script:

  if (pairedEnd) {
    """
    python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -p -a
    """
  }
  else {
    """
    python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -a
    """
  }

}

// Collect list of experiment design files into a single channel
experimentRows = experimentPoolObjs
                .splitCsv(sep:'\t', header:true)
                .map { row -> [ row.sample_id, row.tag_align, row.xcor, row.experiment_id, row.replicate] }

// Call Peaks using MACS
process callPeaksMACS {

  tag "$sampleId-$replicate"
  publishDir "$baseDir/output/${task.process}", mode: 'copy'

  input:
  set sampleId, tagAlign, xcor, experimentId, replicate from experimentRows

  output:

  set sampleId, file('*.narrowPeak'), file('*.fc_signal.bw'), file('*.pvalue_signal.bw'), experimentId, replicate into experimentPeaks

  script:

  if (pairedEnd) {
    """
    python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -s $sampleId -g $genomeSize -z $chromSizes -p -a
    """
  }
  else {
    """
    python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -s $sampleId -g $genomeSize -z $chromSizes -a
    """
  }

}

// Define channel collecting peaks into design file
peaksDesign = experimentPeaks
              .map{ sampleId, peak, fcSignal, pvalueSignal, experimentId, replicate ->
              "$sampleId\t$peak\t$fcSignal\t$pvalueSignal\t$experimentId\t$replicate\n"}
              .collectFile(name:'design_peak.tsv', seed:"sample_id\tpeaks\tfc_signal\tpvalue_signal\texperiment_id\treplicate\n", storeDir:"$baseDir/output/design")

// Calculate Consensus Peaks
process consensusPeaks {

  publishDir "$baseDir/output/${task.process}", mode: 'copy'

  input:

  file peaksDesign
  file preDiffDesign

  output:

  file '*.replicated.*' into consensusPeaks
  file '*.rejected.*' into rejectedPeaks
  file("design_diffPeaks.tsv") into designDiffPeaks


  script:

  """
  python3 $baseDir/scripts/overlap_peaks.py -d $peaksDesign -f $preDiffDesign -a
  """

}