From ea9dc8e6097b706723616ec58b7112d3e4458247 Mon Sep 17 00:00:00 2001
From: Venkat Malladi <venkat.malladi@utsouthwestern.edu>
Date: Mon, 24 Dec 2018 10:49:35 -0600
Subject: [PATCH] Test annotate peaks.

---
 workflow/scripts/annotate_peaks.R     |  9 ++++---
 workflow/tests/test_annotate_peaks.py | 37 +++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 3 deletions(-)
 create mode 100644 workflow/tests/test_annotate_peaks.py

diff --git a/workflow/scripts/annotate_peaks.R b/workflow/scripts/annotate_peaks.R
index 6a0fb4a..63fa051 100644
--- a/workflow/scripts/annotate_peaks.R
+++ b/workflow/scripts/annotate_peaks.R
@@ -29,10 +29,13 @@ genome <-args[2]
 # Load UCSC Known Genes
 if(genome=='GRCh37') {
     txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
+    annodb <- org.Hs.eg.db
 } else if(genome=='GRCm38')  {
     txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
+    annodb <- org.Mm.eg.db
 } else if(genome=='GRCh38')  {
     txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
+    annodb <- org.Hs.eg.db
 }
 
 # Load design file
@@ -41,11 +44,11 @@ files <- as.list(as.character(design$Peaks))
 names(files) <- design$Condition
 
 
-peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE)
+peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb, annoDb=annodb, tssRegion=c(-3000, 3000), verbose=FALSE)
 
 for(index in c(1:length(peakAnnoList))) {
-  filename <- paste(names(files)[index],".chipseeker_annotation.xls",sep="")
-  write.table(as.data.frame(peakAnnoList[[index]]),filename,sep="\t",quote=F)
+  filename <- paste(names(files)[index],".chipseeker_annotation.csv",sep="")
+  write.table(as.data.frame(peakAnnoList[[index]]),filename,sep=",",quote=F)
 
   # Draw individual plots
 
diff --git a/workflow/tests/test_annotate_peaks.py b/workflow/tests/test_annotate_peaks.py
new file mode 100644
index 0000000..e1b3632
--- /dev/null
+++ b/workflow/tests/test_annotate_peaks.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+
+import pytest
+import pandas as pd
+from io import StringIO
+import os
+import utils
+
+test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
+                '/../output/peakAnnotation/'
+
+DESIGN_STRING = """sample_id\tbam_reads\tbam_index\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id
+A_1\tA_1.bam\tA_1.bai\tA\tLiver\tH3K27ac\tNone\t1\tB_1
+A_2\tA_2.bam\tA_2.bai\tA\tLiver\tH3K27ac\tNone\t2\tB_2
+B_1\tB_1.bam\tB_1.bai\tB\tLiver\tH3K27ac\tNone\t1\tB_1
+B_2\tB_2.bam\tB_2.bai\tB\tLiver\tH3K27ac\tNone\t2\tB_2
+"""
+
+
+@pytest.mark.integration
+def test_annotate_peaks_singleend():
+    assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.chipseeker_pie.pdf'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.chipseeker_pie.pdf'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.chipseeker_pie.pdf'))
+    annotation_file = test_output_path + 'ENCSR238SGC.chipseeker_annotation.csv'
+    assert os.path.exists(annotation_file))
+    assert utils.count_lines(annotation_file) == 152839
+
+
+@pytest.mark.integration
+def test_annotate_peaks_pairedend():
+    assert os.path.exists(os.path.join(test_output_path, 'ENCSR217LRF.chipseeker_pie.pdf'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCSR217LRF.chipseeker_pie.pdf'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCSR217LRF.chipseeker_pie.pdf'))
+    annotation_file = test_output_path + 'ENCSR217LRF.chipseeker_annotation.csv'
+    assert os.path.exists(annotation_file))
+    assert utils.count_lines(annotation_file) == 25390
-- 
GitLab