Add in differntial binding peaks.

7b67dda5 · Venkat Malladi · 7deea97e · 7b67dda5 · 7b67dda5 · 7b67dda5
Commit 7b67dda5 authored 6 years ago by Venkat Malladi
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -47,10 +47,14 @@ process {
    module = ['python/3.6.1-2-anaconda', 'bedtools/2.26.0']
    executor = 'local'
  }
-  $consensusPeaks {
+  $peakAnnotation {
    module = ['R/3.4.1-gccmkl']
    executor = 'local'
  }
+  $diffPeaks {
+    module = ['R/3.4.1-gccmkl']
+    cpus = 32
+  }

 }


--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -369,7 +369,9 @@ process consensusPeaks {

  file '*.replicated.*' into consensusPeaks
  file '*.rejected.*' into rejectedPeaks
-  file("design_diffPeaks.tsv") into designDiffPeaks
+  file("design_diffPeaks.csv") into designDiffPeaks
+  file("design_annotatePeaks.tsv") into designAnnotatePeaks
+  file("unqiue_experiments.csv") into uniqueExperiments

  script:

@@ -386,16 +388,56 @@ process peakAnnotation {

  input:

-  file consensusPeaks
+  file designAnnotatePeaks

- output:
+  output:

  file "*chipseeker*" into peakAnnotation

- script:
-   """
-   module load python/2.7.x-anaconda
-   module load R/3.3.2-gccmkl
-   Rscript $baseDir/scripts/annotate_peaks.R $design_file $genome
-   """
+  script:
+
+  """
+  Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $genome
+  """
+}
+
+// Define channel to find number of unique experiments
+peaksDesign = uniqueExperiments
+              .readLines()
+              .count()
+
+// Calculate Differential Binding Activity
+process diffPeaks {
+
+  publishDir "$baseDir/output/${task.process}", mode: 'copy'
+
+  input:
+
+  file designDiffPeaks
+
+  output:
+
+  file "design_diffpeak_annotatePeaks.csv" into diffPeaksDesignAnnotatePeaks
+  file "design_diffpeak_annotatePeaks.csv" into diffPeaksDesignMeme
+  file "*_diffbind.bed" into diffpeaks_meme
+  file "*_diffbind.bed" into diffpeaks_chipseeker
+  file '*.pdf' into diffPeaksStats
+  file 'normcount_peaksets.txt' into normCountPeaks
+
+  script:
+  if (peaksDesign == 1) {
+    """
+    touch design_diffpeak_annotatePeaks.tsv
+    touch no_diffbind.bed
+    touch heatmap.pdf
+    touch pca.pdf
+    touch normcount_peaksets.txt
+    """
+  }
+  else {
+    """
+    Rscript $baseDir/scripts/dba.R $designDiffPeaks
+    """
+  }
+
 }
--- a/workflow/scripts/runDiffBind.R
+++ b/workflow/scripts/runDiffBind.R
-library("DiffBind")
+#!/bin/Rscript

-#build dba object from sample sheet and do analysis
-args <- commandArgs(TRUE)
+# Load libraries
+source("http://bioconductor.org/biocLite.R")
+if(!require("DiffBind")){
+    biocLite("DiffBind")
+    library("DiffBind")
+}
+
+# Create parser object
+args <- commandArgs(trailingOnly=TRUE)
+
+# Check input args
+if (length(args) != 1) {
+  stop("Usage: dba.r [ annotate_design.tsv ] ", call.=FALSE)
+}
+
+# Build DBA object from design file
 data <- dba(sampleSheet=args[1])
 data <- dba.count(data)
 data <- dba.contrast(data, minMembers = 2, categories=DBA_CONDITION)
 data <- dba.analyze(data)

-#Draw figures
-pdf("diffbind.samples.heatmap.pdf")
+# Draw figures
+pdf("heatmap.pdf")
 plot(data)
 dev.off()

-pdf("diffbind.samples.pca.pdf")
+pdf("pca.pdf")
 dba.plotPCA(data, DBA_TISSUE, label=DBA_CONDITION)
 dev.off()

-#Save peak reads count
+# Save peak reads count
 normcount <- dba.peakset(data, bRetrieve=T)
-write.table(as.data.frame(normcount),"diffbind.normcount.txt",sep="\t",quote=F,row.names=F)
+write.table(as.data.frame(normcount),"normcount_peaksets.txt",sep="\t",quote=F,row.names=F)

-#Retriving the differentially bound sites
-#make new design file for chipseeker at the same time
+# Reteriving the differentially bound sites
+# Make new design file for peakAnnotation at the same time
 new_SampleID = c()
 new_Peaks = c()
-for (i in c(1:length(data$contrasts)))
-{
+for (i in c(1:length(data$contrasts))) {
 contrast_bed_name = paste(data$contrasts[[i]]$name1,"vs",
                      data$contrasts[[i]]$name2,"diffbind.bed",sep="_")
 contrast_name = paste(data$contrasts[[i]]$name1,"vs",
@@ -34,12 +47,12 @@ for (i in c(1:length(data$contrasts)))
 new_Peaks = c(new_Peaks, contrast_bed_name)
 report <- dba.report(data, contrast=i, th=1, bCount=TRUE)
 report <- as.data.frame(report)
- print(head(report))
 colnames(report)[1:5]<-c("chrom","peak_start","peak_stop","peak_width","peak_strand")
- #print(head(report))
+
 write.table(report,contrast_name,sep="\t",quote=F,row.names=F)
 write.table(report[,c(1:3)],contrast_bed_name,sep="\t",quote=F,row.names=F, col.names=F)
 }
-#Write new design file
-newdesign = data.frame(SampleID=new_SampleID, Peaks=new_Peaks)
-write.csv(newdesign,"diffpeak.design",row.names=F,quote=F)
+
+# Write new design file
+newdesign = data.frame(Condition=new_SampleID, Peaks=new_Peaks)
+write.csv(newdesign,"design_diffpeak_annotatePeaks.csv",row.names=F, quote=F)
--- a/workflow/scripts/overlap_peaks.py
+++ b/workflow/scripts/overlap_peaks.py
@@ -208,9 +208,13 @@ def main():
                            'Peaks',
                            'PeakCaller']

-    design_diff.to_csv("design_diffPeaks.tsv", header=True, sep='\t', index=False)
+    design_diff.to_csv("design_diffPeaks.csv", header=True, sep=',', index=False)
    design_anno.to_csv("design_annotatePeaks.tsv", header=True, sep='\t', index=False)

+    # Write the unique conditions
+    unique_experiments = pd.Series(design_diff['Condition'].unique)
+    unique_experiments.to_csv('unqiue_experiments.csv')
+

 if __name__ == '__main__':
    main()