diff --git a/workflow/scripts/annotate_peaks.R b/workflow/scripts/annotate_peaks.R index 6a0fb4aaa82dde146a1606e886b264b0b309e3e3..63fa051353e81282fb9934b664e765c331dd91d9 100644 --- a/workflow/scripts/annotate_peaks.R +++ b/workflow/scripts/annotate_peaks.R @@ -29,10 +29,13 @@ genome <-args[2] # Load UCSC Known Genes if(genome=='GRCh37') { txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene + annodb <- org.Hs.eg.db } else if(genome=='GRCm38') { txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene + annodb <- org.Mm.eg.db } else if(genome=='GRCh38') { txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene + annodb <- org.Hs.eg.db } # Load design file @@ -41,11 +44,11 @@ files <- as.list(as.character(design$Peaks)) names(files) <- design$Condition -peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE) +peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb, annoDb=annodb, tssRegion=c(-3000, 3000), verbose=FALSE) for(index in c(1:length(peakAnnoList))) { - filename <- paste(names(files)[index],".chipseeker_annotation.xls",sep="") - write.table(as.data.frame(peakAnnoList[[index]]),filename,sep="\t",quote=F) + filename <- paste(names(files)[index],".chipseeker_annotation.csv",sep="") + write.table(as.data.frame(peakAnnoList[[index]]),filename,sep=",",quote=F) # Draw individual plots diff --git a/workflow/tests/test_annotate_peaks.py b/workflow/tests/test_annotate_peaks.py new file mode 100644 index 0000000000000000000000000000000000000000..e1b3632ceb1335afa00aeea0215e94a2769adc59 --- /dev/null +++ b/workflow/tests/test_annotate_peaks.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 + +import pytest +import pandas as pd +from io import StringIO +import os +import utils + +test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ + '/../output/peakAnnotation/' + +DESIGN_STRING = """sample_id\tbam_reads\tbam_index\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id +A_1\tA_1.bam\tA_1.bai\tA\tLiver\tH3K27ac\tNone\t1\tB_1 +A_2\tA_2.bam\tA_2.bai\tA\tLiver\tH3K27ac\tNone\t2\tB_2 +B_1\tB_1.bam\tB_1.bai\tB\tLiver\tH3K27ac\tNone\t1\tB_1 +B_2\tB_2.bam\tB_2.bai\tB\tLiver\tH3K27ac\tNone\t2\tB_2 +""" + + +@pytest.mark.integration +def test_annotate_peaks_singleend(): + assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.chipseeker_pie.pdf')) + assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.chipseeker_pie.pdf')) + assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.chipseeker_pie.pdf')) + annotation_file = test_output_path + 'ENCSR238SGC.chipseeker_annotation.csv' + assert os.path.exists(annotation_file)) + assert utils.count_lines(annotation_file) == 152839 + + +@pytest.mark.integration +def test_annotate_peaks_pairedend(): + assert os.path.exists(os.path.join(test_output_path, 'ENCSR217LRF.chipseeker_pie.pdf')) + assert os.path.exists(os.path.join(test_output_path, 'ENCSR217LRF.chipseeker_pie.pdf')) + assert os.path.exists(os.path.join(test_output_path, 'ENCSR217LRF.chipseeker_pie.pdf')) + annotation_file = test_output_path + 'ENCSR217LRF.chipseeker_annotation.csv' + assert os.path.exists(annotation_file)) + assert utils.count_lines(annotation_file) == 25390