Skip to content
Snippets Groups Projects
Commit a858f1c3 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

First start at adding annotations.

parent 55c0d935
Branches
Tags
No related merge requests found
...@@ -34,6 +34,7 @@ readsList = Channel ...@@ -34,6 +34,7 @@ readsList = Channel
pairedEnd = params.pairedEnd pairedEnd = params.pairedEnd
designFile = params.designFile designFile = params.designFile
genomeSize = params.genomeSize genomeSize = params.genomeSize
genome = params.genome
chromSizes = params.chromSizes chromSizes = params.chromSizes
cutoffRatio = params.cutoffRatio cutoffRatio = params.cutoffRatio
...@@ -377,3 +378,22 @@ process consensusPeaks { ...@@ -377,3 +378,22 @@ process consensusPeaks {
""" """
} }
// Annotate Peaks
process peakAnnotation {
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
file consensusPeaks
output:
file "*chipseeker*" into chipseeker_originalpeak_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/runChipseeker.R $design_file ${params.genomepath}
"""
}
...@@ -182,9 +182,12 @@ def main(): ...@@ -182,9 +182,12 @@ def main():
design_peaks_df = pd.read_csv(design, sep='\t') design_peaks_df = pd.read_csv(design, sep='\t')
design_files_df = pd.read_csv(files, sep='\t') design_files_df = pd.read_csv(files, sep='\t')
# Make a design file for # Make a design file for differential binding
design_diff = update_design(design_files_df) design_diff = update_design(design_files_df)
# Make a design file for annotating Peaks
design_anno = pd.DataFrame()
# Find consenus overlap peaks for each experiment # Find consenus overlap peaks for each experiment
for experiment, df_experiment in design_peaks_df.groupby('experiment_id'): for experiment, df_experiment in design_peaks_df.groupby('experiment_id'):
replicated_peak = overlap(experiment, df_experiment) replicated_peak = overlap(experiment, df_experiment)
......
args = commandArgs(trailingOnly=TRUE) #!/bin/Rscript
#if (length(args)==0) {
# stop("At least one argument must be supplied (input file).n", call.=FALSE)
#} else if (length(args)==1) {
# # default output file
# args[3] = "out.txt"
#}
library(ChIPseeker) library(ChIPseeker)
#Parse the genome path and get genome version
path_elements = unlist(strsplit(args[2],"[/]"))
genome = path_elements[length(path_elements)] # Create parser object
parser <- ArgumentParser()
if(genome=="GRCh37")
{ # Specify our desired options
library(TxDb.Hsapiens.UCSC.hg19.knownGene) parser$add_argument("-d", "--design", help = "File path to design file", required = TRUE)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene parser$add_argument("-g", "--genome", help = "The genome assembly", required = TRUE)
}
if(genome=="GRCm38") # Parse arguments
{ args <- parser$parse_args()
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
# Load UCSC Known Genes
if(args$genome=='GRCh37') {
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
} else if(args$genome=='GRCm38') {
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
} }
if(genome=="GRCh38") else if(args$genome=='GRCh38') {
{ library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(TxDb.Hsapiens.UCSC.hg38.knownGene) txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
} }
design<-read.csv(args[1])
files<-as.list(as.character(design$Peaks))
names(files)<-design$SampleID
peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE) peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE)
for(index in c(1:length(peakAnnoList))) for(index in c(1:length(peakAnnoList)))
...@@ -38,8 +34,8 @@ for(index in c(1:length(peakAnnoList))) ...@@ -38,8 +34,8 @@ for(index in c(1:length(peakAnnoList)))
filename<-paste(names(files)[index],".chipseeker_annotation.xls",sep="") filename<-paste(names(files)[index],".chipseeker_annotation.xls",sep="")
write.table(as.data.frame(peakAnnoList[[index]]),filename,sep="\t",quote=F) write.table(as.data.frame(peakAnnoList[[index]]),filename,sep="\t",quote=F)
#draw individual plot #draw individual plot
pie_name <- paste(names(files)[index],".chipseeker_pie.pdf",sep="") pie_name <- paste(names(files)[index],".chipseeker_pie.pdf",sep="")
vennpie_name <- paste(names(files)[index],".chipseeker_vennpie.pdf",sep="") vennpie_name <- paste(names(files)[index],".chipseeker_vennpie.pdf",sep="")
upsetplot_name <- paste(names(files)[index],".chipseeker_upsetplot.pdf",sep="") upsetplot_name <- paste(names(files)[index],".chipseeker_upsetplot.pdf",sep="")
pdf(pie_name) pdf(pie_name)
plotAnnoPie(peakAnnoList[[index]]) plotAnnoPie(peakAnnoList[[index]])
...@@ -53,4 +49,3 @@ for(index in c(1:length(peakAnnoList))) ...@@ -53,4 +49,3 @@ for(index in c(1:length(peakAnnoList)))
} }
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment