From 96e6d832d428c027f6e00635d19aef9894bd6c05 Mon Sep 17 00:00:00 2001
From: Venkat Malladi <venkat.malladi@utsouthwestern.edu>
Date: Tue, 14 Nov 2017 19:58:42 -0600
Subject: [PATCH] Add in call peaks step and configurations.

---
 astrocyte_pkg.yml           |  3 +++
 workflow/conf/biohpc.config | 28 ++++++++++++++++++++++------
 workflow/main.nf            | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+), 6 deletions(-)

diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml
index 62c6297..7db5da7 100644
--- a/astrocyte_pkg.yml
+++ b/astrocyte_pkg.yml
@@ -48,6 +48,9 @@ workflow_modules:
   - 'sambamba/0.6.6'
   - 'bedtools/2.26.0'
   - 'deeptools/2.5.0.1'
+  - 'phantompeakqualtools/1.2'
+  - 'macs/2.1.0-20151222'
+  - 'UCSC_userApps/v317'
 
 # A list of parameters used by the workflow, defining how to present them,
 # options etc in the web interface. For each parameter:
diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config
index 74795c4..d986fe2 100644
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -39,19 +39,35 @@ process {
     module = ['python/3.6.1-2-anaconda']
     cpus = 32
   }
+  $callPeaksMACS {
+    module = ['python/3.6.1-2-anaconda', 'phantompeakqualtools/1.2', 'macs/2.1.0-20151222', 'UCSC_userApps/v317']
+    cpus = 32
+  }
 }
 
 params {
   // Reference file paths on BioHPC
   genomes {
-    'GRCh38' { bwa = '/project/shared/bicf_workflow_ref/GRCh38' }
-    'GRCh37' { bwa = '/project/shared/bicf_workflow_ref/GRCh37' }
-    'GRCm38' { bwa = '/project/shared/bicf_workflow_ref/GRCm38' }
+    'GRCh38' {
+      bwa = '/project/shared/bicf_workflow_ref/GRCh38'
+      genomesize = 'hs'
+      chromsizes = '/project/shared/bicf_workflow_ref/GRCh38/chrom.sizes'
+    }
+    'GRCh37' {
+      bwa = '/project/shared/bicf_workflow_ref/GRCh37'
+      genomesize = 'hs'
+      chromsizes = '/project/shared/bicf_workflow_ref/GRCh37/chrom.sizes'
+    }
+    'GRCm38' {
+      bwa = '/project/shared/bicf_workflow_ref/GRCm38'
+      genomesize = 'mm'
+      chromsizes = '/project/shared/bicf_workflow_ref/GRCm38/chrom.sizes'
+    }
   }
 }
 
 trace {
-    enabled = true
-    file = 'pipeline_trace.txt'
-    fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
+  enabled = true
+  file = 'pipeline_trace.txt'
+  fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
 }
diff --git a/workflow/main.nf b/workflow/main.nf
index 87dc34b..2245648 100644
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -10,6 +10,8 @@ params.designFile = "$baseDir/../test_data/design_ENCSR238SGC_SE.txt"
 params.genome = 'GRCm38'
 params.genomes = []
 params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false
+params.geneomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?: false : false
+params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
 params.cutoffRatio = 1.2
 
 // Check inputs
@@ -31,6 +33,8 @@ readsList = Channel
 // Define regular variables
 pairedEnd = params.pairedEnd
 designFile = params.designFile
+genomeSize = params.genomeSize
+chromSizes = params.chromSizes
 cutoffRatio = params.cutoffRatio
 
 process checkDesignFile {
@@ -307,3 +311,35 @@ process poolAndPsuedoReads {
   }
 
 }
+
+// Collect list of experiment design files into a single channel
+experimentRows = experimentPoolObjs.collect()
+            .splitCsv(sep:'\t', header: true)
+
+// Call Peaks using MACS
+process callPeaksMacs {
+
+  tag "$sampleId-$replicate"
+  publishDir "$baseDir/output/${task.process}", mode: 'copy'
+
+  input:
+  set sampleId, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId, controlTagAlign from experimentRows
+
+  output:
+
+  set sampleId, file('*.narrowPeak'), file('*.fc_signal.bw'), file('*.pvalue_signal.bw'), experimentId, biosample, factor, treatment, replicate, controlId from experimentRows
+
+  script:
+
+  if (pairedEnd) {
+    """
+    python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes -p
+    """
+  }
+  else {
+    """
+    python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes -p
+    """
+  }
+
+}
-- 
GitLab