From 7b67dda5ddacfa7f7a447fa744779bae988d11a3 Mon Sep 17 00:00:00 2001 From: Venkat Malladi <venkat.malladi@utsouthwestern.edu> Date: Tue, 24 Jul 2018 23:07:18 -0500 Subject: [PATCH] Add in differntial binding peaks. --- workflow/conf/biohpc.config | 6 ++- workflow/main.nf | 60 +++++++++++++++++++---- workflow/scripts/{runDiffBind.R => dba.R} | 47 +++++++++++------- workflow/scripts/overlap_peaks.py | 6 ++- 4 files changed, 91 insertions(+), 28 deletions(-) rename workflow/scripts/{runDiffBind.R => dba.R} (57%) diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index 032ea21..5a4b092 100644 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -47,10 +47,14 @@ process { module = ['python/3.6.1-2-anaconda', 'bedtools/2.26.0'] executor = 'local' } - $consensusPeaks { + $peakAnnotation { module = ['R/3.4.1-gccmkl'] executor = 'local' } + $diffPeaks { + module = ['R/3.4.1-gccmkl'] + cpus = 32 + } } diff --git a/workflow/main.nf b/workflow/main.nf index 36a1bf9..40a9990 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -369,7 +369,9 @@ process consensusPeaks { file '*.replicated.*' into consensusPeaks file '*.rejected.*' into rejectedPeaks - file("design_diffPeaks.tsv") into designDiffPeaks + file("design_diffPeaks.csv") into designDiffPeaks + file("design_annotatePeaks.tsv") into designAnnotatePeaks + file("unqiue_experiments.csv") into uniqueExperiments script: @@ -386,16 +388,56 @@ process peakAnnotation { input: - file consensusPeaks + file designAnnotatePeaks - output: + output: file "*chipseeker*" into peakAnnotation - script: - """ - module load python/2.7.x-anaconda - module load R/3.3.2-gccmkl - Rscript $baseDir/scripts/annotate_peaks.R $design_file $genome - """ + script: + + """ + Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $genome + """ +} + +// Define channel to find number of unique experiments +peaksDesign = uniqueExperiments + .readLines() + .count() + +// Calculate Differential Binding Activity +process diffPeaks { + + publishDir "$baseDir/output/${task.process}", mode: 'copy' + + input: + + file designDiffPeaks + + output: + + file "design_diffpeak_annotatePeaks.csv" into diffPeaksDesignAnnotatePeaks + file "design_diffpeak_annotatePeaks.csv" into diffPeaksDesignMeme + file "*_diffbind.bed" into diffpeaks_meme + file "*_diffbind.bed" into diffpeaks_chipseeker + file '*.pdf' into diffPeaksStats + file 'normcount_peaksets.txt' into normCountPeaks + + script: + if (peaksDesign == 1) { + """ + touch design_diffpeak_annotatePeaks.tsv + touch no_diffbind.bed + touch heatmap.pdf + touch pca.pdf + touch normcount_peaksets.txt + """ + } + else { + """ + Rscript $baseDir/scripts/dba.R $designDiffPeaks + """ + } + } diff --git a/workflow/scripts/runDiffBind.R b/workflow/scripts/dba.R similarity index 57% rename from workflow/scripts/runDiffBind.R rename to workflow/scripts/dba.R index f4c7a6b..3081db2 100644 --- a/workflow/scripts/runDiffBind.R +++ b/workflow/scripts/dba.R @@ -1,31 +1,44 @@ -library("DiffBind") +#!/bin/Rscript -#build dba object from sample sheet and do analysis -args <- commandArgs(TRUE) +# Load libraries +source("http://bioconductor.org/biocLite.R") +if(!require("DiffBind")){ + biocLite("DiffBind") + library("DiffBind") +} + +# Create parser object +args <- commandArgs(trailingOnly=TRUE) + +# Check input args +if (length(args) != 1) { + stop("Usage: dba.r [ annotate_design.tsv ] ", call.=FALSE) +} + +# Build DBA object from design file data <- dba(sampleSheet=args[1]) data <- dba.count(data) data <- dba.contrast(data, minMembers = 2, categories=DBA_CONDITION) data <- dba.analyze(data) -#Draw figures -pdf("diffbind.samples.heatmap.pdf") +# Draw figures +pdf("heatmap.pdf") plot(data) dev.off() -pdf("diffbind.samples.pca.pdf") +pdf("pca.pdf") dba.plotPCA(data, DBA_TISSUE, label=DBA_CONDITION) dev.off() -#Save peak reads count +# Save peak reads count normcount <- dba.peakset(data, bRetrieve=T) -write.table(as.data.frame(normcount),"diffbind.normcount.txt",sep="\t",quote=F,row.names=F) +write.table(as.data.frame(normcount),"normcount_peaksets.txt",sep="\t",quote=F,row.names=F) -#Retriving the differentially bound sites -#make new design file for chipseeker at the same time +# Reteriving the differentially bound sites +# Make new design file for peakAnnotation at the same time new_SampleID = c() new_Peaks = c() -for (i in c(1:length(data$contrasts))) -{ +for (i in c(1:length(data$contrasts))) { contrast_bed_name = paste(data$contrasts[[i]]$name1,"vs", data$contrasts[[i]]$name2,"diffbind.bed",sep="_") contrast_name = paste(data$contrasts[[i]]$name1,"vs", @@ -34,12 +47,12 @@ for (i in c(1:length(data$contrasts))) new_Peaks = c(new_Peaks, contrast_bed_name) report <- dba.report(data, contrast=i, th=1, bCount=TRUE) report <- as.data.frame(report) - print(head(report)) colnames(report)[1:5]<-c("chrom","peak_start","peak_stop","peak_width","peak_strand") - #print(head(report)) + write.table(report,contrast_name,sep="\t",quote=F,row.names=F) write.table(report[,c(1:3)],contrast_bed_name,sep="\t",quote=F,row.names=F, col.names=F) } -#Write new design file -newdesign = data.frame(SampleID=new_SampleID, Peaks=new_Peaks) -write.csv(newdesign,"diffpeak.design",row.names=F,quote=F) + +# Write new design file +newdesign = data.frame(Condition=new_SampleID, Peaks=new_Peaks) +write.csv(newdesign,"design_diffpeak_annotatePeaks.csv",row.names=F, quote=F) diff --git a/workflow/scripts/overlap_peaks.py b/workflow/scripts/overlap_peaks.py index a5ca5c2..bb7b53d 100644 --- a/workflow/scripts/overlap_peaks.py +++ b/workflow/scripts/overlap_peaks.py @@ -208,9 +208,13 @@ def main(): 'Peaks', 'PeakCaller'] - design_diff.to_csv("design_diffPeaks.tsv", header=True, sep='\t', index=False) + design_diff.to_csv("design_diffPeaks.csv", header=True, sep=',', index=False) design_anno.to_csv("design_annotatePeaks.tsv", header=True, sep='\t', index=False) + # Write the unique conditions + unique_experiments = pd.Series(design_diff['Condition'].unique) + unique_experiments.to_csv('unqiue_experiments.csv') + if __name__ == '__main__': main() -- GitLab