From 7b67dda5ddacfa7f7a447fa744779bae988d11a3 Mon Sep 17 00:00:00 2001
From: Venkat Malladi <venkat.malladi@utsouthwestern.edu>
Date: Tue, 24 Jul 2018 23:07:18 -0500
Subject: [PATCH] Add in differntial binding peaks.

---
 workflow/conf/biohpc.config               |  6 ++-
 workflow/main.nf                          | 60 +++++++++++++++++++----
 workflow/scripts/{runDiffBind.R => dba.R} | 47 +++++++++++-------
 workflow/scripts/overlap_peaks.py         |  6 ++-
 4 files changed, 91 insertions(+), 28 deletions(-)
 rename workflow/scripts/{runDiffBind.R => dba.R} (57%)

diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config
index 032ea21..5a4b092 100644
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -47,10 +47,14 @@ process {
     module = ['python/3.6.1-2-anaconda', 'bedtools/2.26.0']
     executor = 'local'
   }
-  $consensusPeaks {
+  $peakAnnotation {
     module = ['R/3.4.1-gccmkl']
     executor = 'local'
   }
+  $diffPeaks {
+    module = ['R/3.4.1-gccmkl']
+    cpus = 32
+  }
 
 }
 
diff --git a/workflow/main.nf b/workflow/main.nf
index 36a1bf9..40a9990 100644
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -369,7 +369,9 @@ process consensusPeaks {
 
   file '*.replicated.*' into consensusPeaks
   file '*.rejected.*' into rejectedPeaks
-  file("design_diffPeaks.tsv") into designDiffPeaks
+  file("design_diffPeaks.csv") into designDiffPeaks
+  file("design_annotatePeaks.tsv") into designAnnotatePeaks
+  file("unqiue_experiments.csv") into uniqueExperiments
 
   script:
 
@@ -386,16 +388,56 @@ process peakAnnotation {
 
   input:
 
-  file consensusPeaks
+  file designAnnotatePeaks
 
- output:
+  output:
 
   file "*chipseeker*" into peakAnnotation
 
- script:
-   """
-   module load python/2.7.x-anaconda
-   module load R/3.3.2-gccmkl
-   Rscript $baseDir/scripts/annotate_peaks.R $design_file $genome
-   """
+  script:
+
+  """
+  Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $genome
+  """
+}
+
+// Define channel to find number of unique experiments
+peaksDesign = uniqueExperiments
+              .readLines()
+              .count()
+
+// Calculate Differential Binding Activity
+process diffPeaks {
+
+  publishDir "$baseDir/output/${task.process}", mode: 'copy'
+
+  input:
+
+  file designDiffPeaks
+
+  output:
+
+  file "design_diffpeak_annotatePeaks.csv" into diffPeaksDesignAnnotatePeaks
+  file "design_diffpeak_annotatePeaks.csv" into diffPeaksDesignMeme
+  file "*_diffbind.bed" into diffpeaks_meme
+  file "*_diffbind.bed" into diffpeaks_chipseeker
+  file '*.pdf' into diffPeaksStats
+  file 'normcount_peaksets.txt' into normCountPeaks
+
+  script:
+  if (peaksDesign == 1) {
+    """
+    touch design_diffpeak_annotatePeaks.tsv
+    touch no_diffbind.bed
+    touch heatmap.pdf
+    touch pca.pdf
+    touch normcount_peaksets.txt
+    """
+  }
+  else {
+    """
+    Rscript $baseDir/scripts/dba.R $designDiffPeaks
+    """
+  }
+
 }
diff --git a/workflow/scripts/runDiffBind.R b/workflow/scripts/dba.R
similarity index 57%
rename from workflow/scripts/runDiffBind.R
rename to workflow/scripts/dba.R
index f4c7a6b..3081db2 100644
--- a/workflow/scripts/runDiffBind.R
+++ b/workflow/scripts/dba.R
@@ -1,31 +1,44 @@
-library("DiffBind")
+#!/bin/Rscript
 
-#build dba object from sample sheet and do analysis
-args <- commandArgs(TRUE)
+# Load libraries
+source("http://bioconductor.org/biocLite.R")
+if(!require("DiffBind")){
+    biocLite("DiffBind")
+    library("DiffBind")
+}
+
+# Create parser object
+args <- commandArgs(trailingOnly=TRUE)
+
+# Check input args
+if (length(args) != 1) {
+  stop("Usage: dba.r [ annotate_design.tsv ] ", call.=FALSE)
+}
+
+# Build DBA object from design file
 data <- dba(sampleSheet=args[1])
 data <- dba.count(data)
 data <- dba.contrast(data, minMembers = 2, categories=DBA_CONDITION)
 data <- dba.analyze(data)
 
-#Draw figures
-pdf("diffbind.samples.heatmap.pdf")
+# Draw figures
+pdf("heatmap.pdf")
 plot(data)
 dev.off()
 
-pdf("diffbind.samples.pca.pdf")
+pdf("pca.pdf")
 dba.plotPCA(data, DBA_TISSUE, label=DBA_CONDITION)
 dev.off()
 
-#Save peak reads count
+# Save peak reads count
 normcount <- dba.peakset(data, bRetrieve=T)
-write.table(as.data.frame(normcount),"diffbind.normcount.txt",sep="\t",quote=F,row.names=F)
+write.table(as.data.frame(normcount),"normcount_peaksets.txt",sep="\t",quote=F,row.names=F)
 
-#Retriving the differentially bound sites
-#make new design file for chipseeker at the same time
+# Reteriving the differentially bound sites
+# Make new design file for peakAnnotation at the same time
 new_SampleID = c()
 new_Peaks = c()
-for (i in c(1:length(data$contrasts)))
-{
+for (i in c(1:length(data$contrasts))) {
  contrast_bed_name = paste(data$contrasts[[i]]$name1,"vs",
                       data$contrasts[[i]]$name2,"diffbind.bed",sep="_")
  contrast_name = paste(data$contrasts[[i]]$name1,"vs",
@@ -34,12 +47,12 @@ for (i in c(1:length(data$contrasts)))
  new_Peaks = c(new_Peaks, contrast_bed_name)
  report <- dba.report(data, contrast=i, th=1, bCount=TRUE)
  report <- as.data.frame(report)
- print(head(report))
  colnames(report)[1:5]<-c("chrom","peak_start","peak_stop","peak_width","peak_strand")
- #print(head(report))
+
  write.table(report,contrast_name,sep="\t",quote=F,row.names=F)
  write.table(report[,c(1:3)],contrast_bed_name,sep="\t",quote=F,row.names=F, col.names=F)
 }
-#Write new design file
-newdesign = data.frame(SampleID=new_SampleID, Peaks=new_Peaks)
-write.csv(newdesign,"diffpeak.design",row.names=F,quote=F)
+
+# Write new design file
+newdesign = data.frame(Condition=new_SampleID, Peaks=new_Peaks)
+write.csv(newdesign,"design_diffpeak_annotatePeaks.csv",row.names=F, quote=F)
diff --git a/workflow/scripts/overlap_peaks.py b/workflow/scripts/overlap_peaks.py
index a5ca5c2..bb7b53d 100644
--- a/workflow/scripts/overlap_peaks.py
+++ b/workflow/scripts/overlap_peaks.py
@@ -208,9 +208,13 @@ def main():
                             'Peaks',
                             'PeakCaller']
 
-    design_diff.to_csv("design_diffPeaks.tsv", header=True, sep='\t', index=False)
+    design_diff.to_csv("design_diffPeaks.csv", header=True, sep=',', index=False)
     design_anno.to_csv("design_annotatePeaks.tsv", header=True, sep='\t', index=False)
 
+    # Write the unique conditions
+    unique_experiments = pd.Series(design_diff['Condition'].unique)
+    unique_experiments.to_csv('unqiue_experiments.csv')
+
 
 if __name__ == '__main__':
     main()
-- 
GitLab