Skip to content
Snippets Groups Projects
Commit 7b67dda5 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Add in differntial binding peaks.

parent 7deea97e
Branches
Tags
No related merge requests found
......@@ -47,10 +47,14 @@ process {
module = ['python/3.6.1-2-anaconda', 'bedtools/2.26.0']
executor = 'local'
}
$consensusPeaks {
$peakAnnotation {
module = ['R/3.4.1-gccmkl']
executor = 'local'
}
$diffPeaks {
module = ['R/3.4.1-gccmkl']
cpus = 32
}
}
......
......@@ -369,7 +369,9 @@ process consensusPeaks {
file '*.replicated.*' into consensusPeaks
file '*.rejected.*' into rejectedPeaks
file("design_diffPeaks.tsv") into designDiffPeaks
file("design_diffPeaks.csv") into designDiffPeaks
file("design_annotatePeaks.tsv") into designAnnotatePeaks
file("unqiue_experiments.csv") into uniqueExperiments
script:
......@@ -386,16 +388,56 @@ process peakAnnotation {
input:
file consensusPeaks
file designAnnotatePeaks
output:
output:
file "*chipseeker*" into peakAnnotation
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/annotate_peaks.R $design_file $genome
"""
script:
"""
Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $genome
"""
}
// Define channel to find number of unique experiments
peaksDesign = uniqueExperiments
.readLines()
.count()
// Calculate Differential Binding Activity
process diffPeaks {
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
file designDiffPeaks
output:
file "design_diffpeak_annotatePeaks.csv" into diffPeaksDesignAnnotatePeaks
file "design_diffpeak_annotatePeaks.csv" into diffPeaksDesignMeme
file "*_diffbind.bed" into diffpeaks_meme
file "*_diffbind.bed" into diffpeaks_chipseeker
file '*.pdf' into diffPeaksStats
file 'normcount_peaksets.txt' into normCountPeaks
script:
if (peaksDesign == 1) {
"""
touch design_diffpeak_annotatePeaks.tsv
touch no_diffbind.bed
touch heatmap.pdf
touch pca.pdf
touch normcount_peaksets.txt
"""
}
else {
"""
Rscript $baseDir/scripts/dba.R $designDiffPeaks
"""
}
}
library("DiffBind")
#!/bin/Rscript
#build dba object from sample sheet and do analysis
args <- commandArgs(TRUE)
# Load libraries
source("http://bioconductor.org/biocLite.R")
if(!require("DiffBind")){
biocLite("DiffBind")
library("DiffBind")
}
# Create parser object
args <- commandArgs(trailingOnly=TRUE)
# Check input args
if (length(args) != 1) {
stop("Usage: dba.r [ annotate_design.tsv ] ", call.=FALSE)
}
# Build DBA object from design file
data <- dba(sampleSheet=args[1])
data <- dba.count(data)
data <- dba.contrast(data, minMembers = 2, categories=DBA_CONDITION)
data <- dba.analyze(data)
#Draw figures
pdf("diffbind.samples.heatmap.pdf")
# Draw figures
pdf("heatmap.pdf")
plot(data)
dev.off()
pdf("diffbind.samples.pca.pdf")
pdf("pca.pdf")
dba.plotPCA(data, DBA_TISSUE, label=DBA_CONDITION)
dev.off()
#Save peak reads count
# Save peak reads count
normcount <- dba.peakset(data, bRetrieve=T)
write.table(as.data.frame(normcount),"diffbind.normcount.txt",sep="\t",quote=F,row.names=F)
write.table(as.data.frame(normcount),"normcount_peaksets.txt",sep="\t",quote=F,row.names=F)
#Retriving the differentially bound sites
#make new design file for chipseeker at the same time
# Reteriving the differentially bound sites
# Make new design file for peakAnnotation at the same time
new_SampleID = c()
new_Peaks = c()
for (i in c(1:length(data$contrasts)))
{
for (i in c(1:length(data$contrasts))) {
contrast_bed_name = paste(data$contrasts[[i]]$name1,"vs",
data$contrasts[[i]]$name2,"diffbind.bed",sep="_")
contrast_name = paste(data$contrasts[[i]]$name1,"vs",
......@@ -34,12 +47,12 @@ for (i in c(1:length(data$contrasts)))
new_Peaks = c(new_Peaks, contrast_bed_name)
report <- dba.report(data, contrast=i, th=1, bCount=TRUE)
report <- as.data.frame(report)
print(head(report))
colnames(report)[1:5]<-c("chrom","peak_start","peak_stop","peak_width","peak_strand")
#print(head(report))
write.table(report,contrast_name,sep="\t",quote=F,row.names=F)
write.table(report[,c(1:3)],contrast_bed_name,sep="\t",quote=F,row.names=F, col.names=F)
}
#Write new design file
newdesign = data.frame(SampleID=new_SampleID, Peaks=new_Peaks)
write.csv(newdesign,"diffpeak.design",row.names=F,quote=F)
# Write new design file
newdesign = data.frame(Condition=new_SampleID, Peaks=new_Peaks)
write.csv(newdesign,"design_diffpeak_annotatePeaks.csv",row.names=F, quote=F)
......@@ -208,9 +208,13 @@ def main():
'Peaks',
'PeakCaller']
design_diff.to_csv("design_diffPeaks.tsv", header=True, sep='\t', index=False)
design_diff.to_csv("design_diffPeaks.csv", header=True, sep=',', index=False)
design_anno.to_csv("design_annotatePeaks.tsv", header=True, sep='\t', index=False)
# Write the unique conditions
unique_experiments = pd.Series(design_diff['Condition'].unique)
unique_experiments.to_csv('unqiue_experiments.csv')
if __name__ == '__main__':
main()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment