Skip to content
Snippets Groups Projects
Commit a858f1c3 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

First start at adding annotations.

parent 55c0d935
1 merge request!18Resolve "Add in current chip-analysis functionality."
...@@ -34,6 +34,7 @@ readsList = Channel ...@@ -34,6 +34,7 @@ readsList = Channel
pairedEnd = params.pairedEnd pairedEnd = params.pairedEnd
designFile = params.designFile designFile = params.designFile
genomeSize = params.genomeSize genomeSize = params.genomeSize
genome = params.genome
chromSizes = params.chromSizes chromSizes = params.chromSizes
cutoffRatio = params.cutoffRatio cutoffRatio = params.cutoffRatio
...@@ -377,3 +378,22 @@ process consensusPeaks { ...@@ -377,3 +378,22 @@ process consensusPeaks {
""" """
} }
// Annotate Peaks
process peakAnnotation {
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
file consensusPeaks
output:
file "*chipseeker*" into chipseeker_originalpeak_output
script:
"""
module load python/2.7.x-anaconda
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/runChipseeker.R $design_file ${params.genomepath}
"""
}
...@@ -182,9 +182,12 @@ def main(): ...@@ -182,9 +182,12 @@ def main():
design_peaks_df = pd.read_csv(design, sep='\t') design_peaks_df = pd.read_csv(design, sep='\t')
design_files_df = pd.read_csv(files, sep='\t') design_files_df = pd.read_csv(files, sep='\t')
# Make a design file for # Make a design file for differential binding
design_diff = update_design(design_files_df) design_diff = update_design(design_files_df)
# Make a design file for annotating Peaks
design_anno = pd.DataFrame()
# Find consenus overlap peaks for each experiment # Find consenus overlap peaks for each experiment
for experiment, df_experiment in design_peaks_df.groupby('experiment_id'): for experiment, df_experiment in design_peaks_df.groupby('experiment_id'):
replicated_peak = overlap(experiment, df_experiment) replicated_peak = overlap(experiment, df_experiment)
......
args = commandArgs(trailingOnly=TRUE) #!/bin/Rscript
#if (length(args)==0) {
# stop("At least one argument must be supplied (input file).n", call.=FALSE)
#} else if (length(args)==1) {
# # default output file
# args[3] = "out.txt"
#}
library(ChIPseeker) library(ChIPseeker)
#Parse the genome path and get genome version
path_elements = unlist(strsplit(args[2],"[/]"))
genome = path_elements[length(path_elements)] # Create parser object
parser <- ArgumentParser()
if(genome=="GRCh37")
{ # Specify our desired options
library(TxDb.Hsapiens.UCSC.hg19.knownGene) parser$add_argument("-d", "--design", help = "File path to design file", required = TRUE)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene parser$add_argument("-g", "--genome", help = "The genome assembly", required = TRUE)
}
if(genome=="GRCm38") # Parse arguments
{ args <- parser$parse_args()
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
# Load UCSC Known Genes
if(args$genome=='GRCh37') {
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
} else if(args$genome=='GRCm38') {
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
} }
if(genome=="GRCh38") else if(args$genome=='GRCh38') {
{ library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(TxDb.Hsapiens.UCSC.hg38.knownGene) txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
} }
design<-read.csv(args[1])
files<-as.list(as.character(design$Peaks))
names(files)<-design$SampleID
peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE) peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE)
for(index in c(1:length(peakAnnoList))) for(index in c(1:length(peakAnnoList)))
...@@ -38,8 +34,8 @@ for(index in c(1:length(peakAnnoList))) ...@@ -38,8 +34,8 @@ for(index in c(1:length(peakAnnoList)))
filename<-paste(names(files)[index],".chipseeker_annotation.xls",sep="") filename<-paste(names(files)[index],".chipseeker_annotation.xls",sep="")
write.table(as.data.frame(peakAnnoList[[index]]),filename,sep="\t",quote=F) write.table(as.data.frame(peakAnnoList[[index]]),filename,sep="\t",quote=F)
#draw individual plot #draw individual plot
pie_name <- paste(names(files)[index],".chipseeker_pie.pdf",sep="") pie_name <- paste(names(files)[index],".chipseeker_pie.pdf",sep="")
vennpie_name <- paste(names(files)[index],".chipseeker_vennpie.pdf",sep="") vennpie_name <- paste(names(files)[index],".chipseeker_vennpie.pdf",sep="")
upsetplot_name <- paste(names(files)[index],".chipseeker_upsetplot.pdf",sep="") upsetplot_name <- paste(names(files)[index],".chipseeker_upsetplot.pdf",sep="")
pdf(pie_name) pdf(pie_name)
plotAnnoPie(peakAnnoList[[index]]) plotAnnoPie(peakAnnoList[[index]])
...@@ -53,4 +49,3 @@ for(index in c(1:length(peakAnnoList))) ...@@ -53,4 +49,3 @@ for(index in c(1:length(peakAnnoList)))
} }
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment