diff --git a/workflow/scripts/annotate_peaks.R b/workflow/scripts/annotate_peaks.R index 59bd26904e7aa2403bfa4503182b59d6379e7082..98bb8bdc846442e22ff354165c4580fd7777e25c 100644 --- a/workflow/scripts/annotate_peaks.R +++ b/workflow/scripts/annotate_peaks.R @@ -40,17 +40,26 @@ design <- read.csv(design_file, sep ='\t') files <- as.list(as.character(design$Peaks)) names(files) <- design$Condition +# Granges of files + +peaks <- lapply(files, readPeakFile, as = "GRanges", header = FALSE) +peakAnnoList <- lapply(peaks, annotatePeak, TxDb=txdb, annoDb=annodb, tssRegion=c(-3000, 3000), verbose=FALSE) + +column_names <- c("chr", "start", "end", "width", "strand_1", "name", "score", "strand", "signalValue", + "pValue", "qValue", "peak", "annotation", "geneChr", "geneStart", "geneEnd", + "geneLength" ,"geneStrand", "geneId", "transcriptId", "distanceToTSS", + "ENSEMBL", "symbol", "geneName") -peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb, annoDb=annodb, tssRegion=c(-3000, 3000), verbose=FALSE) for(index in c(1:length(peakAnnoList))) { - filename <- paste(names(files)[index],".chipseeker_annotation.csv",sep="") - write.table(as.data.frame(peakAnnoList[[index]]),filename,sep=",",quote=F) + filename <- paste(names(peaks)[index], ".chipseeker_annotation.csv", sep="") + df <- as.data.frame(peakAnnoList[[index]]) + colnames(df) <- column_names + write.table(df[ , !(names(df) %in% c('strand_1'))], filename, sep="," ,quote=F, row.names=F) # Draw individual plots # Define names of Plots pie_name <- paste(names(files)[index],".chipseeker_pie.pdf",sep="") - vennpie_name <- paste(names(files)[index],".chipseeker_vennpie.pdf",sep="") upsetplot_name <- paste(names(files)[index],".chipseeker_upsetplot.pdf",sep="") # Pie Plots @@ -58,11 +67,6 @@ for(index in c(1:length(peakAnnoList))) { plotAnnoPie(peakAnnoList[[index]]) dev.off() - # Venn Diagrams - pdf(vennpie_name) - vennpie(peakAnnoList[[index]]) - dev.off() - # Upset Plot pdf(upsetplot_name) upsetplot(peakAnnoList[[index]]) diff --git a/workflow/tests/test_annotate_peaks.py b/workflow/tests/test_annotate_peaks.py index 077aa0ebbf5d884ddc5533185785c5dc713bccdc..a578da5627943e1c12119e80da6f44ec09ced610 100644 --- a/workflow/tests/test_annotate_peaks.py +++ b/workflow/tests/test_annotate_peaks.py @@ -13,8 +13,7 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend def test_annotate_peaks_singleend(): assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.chipseeker_pie.pdf')) - assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.chipseeker_pie.pdf')) - assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.chipseeker_pie.pdf')) + assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.chipseeker_upsetplot.pdf')) annotation_file = test_output_path + 'ENCSR238SGC.chipseeker_annotation.csv' assert os.path.exists(annotation_file) assert utils.count_lines(annotation_file) == 152839 @@ -23,8 +22,7 @@ def test_annotate_peaks_singleend(): @pytest.mark.pairedend def test_annotate_peaks_pairedend(): assert os.path.exists(os.path.join(test_output_path, 'ENCSR217LRF.chipseeker_pie.pdf')) - assert os.path.exists(os.path.join(test_output_path, 'ENCSR217LRF.chipseeker_pie.pdf')) - assert os.path.exists(os.path.join(test_output_path, 'ENCSR217LRF.chipseeker_pie.pdf')) + assert os.path.exists(os.path.join(test_output_path, 'ENCSR217LRF.chipseeker_upsetplot.pdf')) annotation_file = test_output_path + 'ENCSR217LRF.chipseeker_annotation.csv' assert os.path.exists(annotation_file) assert utils.count_lines(annotation_file) == 25390