From 5c9ce3aa3dcc06b14c1f2d43adc5137f2b2a4f5c Mon Sep 17 00:00:00 2001
From: Holly Ruess <s185797@Nucleus036.cm.cluster>
Date: Mon, 30 Dec 2019 19:51:33 -0600
Subject: [PATCH] fix pool and pseudo

---
 workflow/scripts/pool_and_psuedoreplicate.py    | 14 ++++++++++++++
 workflow/tests/test_pool_and_psuedoreplicate.py |  4 ++--
 workflow/tests/test_xcor.py                     |  6 +++---
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/workflow/scripts/pool_and_psuedoreplicate.py b/workflow/scripts/pool_and_psuedoreplicate.py
index 04ce605..ea454da 100644
--- a/workflow/scripts/pool_and_psuedoreplicate.py
+++ b/workflow/scripts/pool_and_psuedoreplicate.py
@@ -99,6 +99,20 @@ def pool(tag_files, outfile, paired):
     return pooled_filename
 
 
+def bedpe_to_tagalign(tag_file, outfile):
+    '''Convert read pairs to reads into standard tagAlign file.'''
+
+    se_tag_filename = outfile + ".tagAlign.gz"
+
+    # Convert read pairs to reads into standard tagAlign file
+    tag_steps = ["zcat -f %s" % (tag_file)]
+    tag_steps.extend([r"""awk 'BEGIN{OFS="\t"}{printf "%s\t%s\t%s\tN\t1000\t%s\n%s\t%s\t%s\tN\t1000\t%s\n",$1,$2,$3,$9,$4,$5,$6,$10}'"""])
+    tag_steps.extend(['gzip -cn'])
+    out, err = utils.run_pipe(tag_steps, outfile=se_tag_filename)
+
+    return se_tag_filename
+
+
 def self_psuedoreplication(tag_file, prefix, paired):
     '''Make 2 self-psuedoreplicates.'''
 
diff --git a/workflow/tests/test_pool_and_psuedoreplicate.py b/workflow/tests/test_pool_and_psuedoreplicate.py
index 0a6c899..f969dfc 100644
--- a/workflow/tests/test_pool_and_psuedoreplicate.py
+++ b/workflow/tests/test_pool_and_psuedoreplicate.py
@@ -46,7 +46,7 @@ def test_pool_and_psuedoreplicate_singleend_human():
     design_file = os.path.join(test_output_path, 'ENCSR265ZXX_ppr.tsv')
     assert os.path.exists(design_file)
     design_df = pd.read_csv(design_file, sep="\t")
-    assert design_df.shape[0] == 7
+    assert design_df.shape[0] == 6
 
 
 @pytest.mark.pairedend_mouse
@@ -59,4 +59,4 @@ def test_pool_and_psuedoreplicate_pairedend_mouse():
     design_file = os.path.join(test_output_path, 'ENCSR451NAE_ppr.tsv')
     assert os.path.exists(design_file)
     design_df = pd.read_csv(design_file, sep="\t")
-    assert design_df.shape[0] == 6
+    assert design_df.shape[0] == 5
diff --git a/workflow/tests/test_xcor.py b/workflow/tests/test_xcor.py
index 7e837af..1b6418f 100644
--- a/workflow/tests/test_xcor.py
+++ b/workflow/tests/test_xcor.py
@@ -17,9 +17,9 @@ def test_cross_plot_singleend_human():
 def test_cross_qc_singleend_human():
     qc_file = os.path.join(test_output_path,"ENCLB622FZX/ENCLB622FZX.cc.qc")
     df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
-    assert df_xcor[2].iloc[0] == '0'
-    assert df_xcor[8].iloc[0] == 1.347895
-    assert df_xcor[9].iloc[0] == 1.970438
+    assert df_xcor[2].iloc[0] == 0
+    assert round(df_xcor[8].iloc[0],6) == 1.332028
+    assert df_xcor[9].iloc[0] == 2.152459
 
 
 @pytest.mark.pairedend_mouse
-- 
GitLab