From ef9a113d4f2fe66b63969ac312b0be099d0b2f34 Mon Sep 17 00:00:00 2001
From: Jeremy Mathews <Jeremy.Mathews@utsouthwestern.edu>
Date: Wed, 14 Aug 2019 12:00:36 -0500
Subject: [PATCH] Update tests for xcor change

---
 CHANGELOG.md                           |  1 +
 workflow/scripts/xcor.py               |  8 +++++++-
 workflow/tests/test_call_peaks_macs.py |  2 +-
 workflow/tests/test_diff_peaks.py      |  2 +-
 workflow/tests/test_xcor.py            | 12 ++++++------
 5 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bc28c53..90cc2d7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file.
 - Add and Update tests
 - Use GTF files instead of TxDb and org libraries in Annotate Peaks
 - Make gtf and geneName files as param inputs
+- Fix xcor to increase file size for --random-source
 
 ## [publish_1.0.6 ] - 2019-05-31
 ### Added
diff --git a/workflow/scripts/xcor.py b/workflow/scripts/xcor.py
index 3bf8dbf..096afc9 100644
--- a/workflow/scripts/xcor.py
+++ b/workflow/scripts/xcor.py
@@ -107,10 +107,16 @@ def xcor(tag, paired):
     subsampled_tag_filename = \
         tag_basename + ".%d.tagAlign.gz" % (number_reads/1000000)
 
+    tag_extended = 'cat.tagAlign.gz'
+    out, err = utils.run_pipe([
+        "zcat %s %s %s" %
+        (tag, tag, tag)
+    ], outfile=tag_extended)
+
     steps = [
         'zcat %s' % (tag),
         'grep -v "chrM"',
-        'shuf -n %d --random-source=%s' % (number_reads, tag)]
+        'shuf -n %d --random-source=%s' % (number_reads, tag_extended)]
 
     if paired:
         steps.extend([r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""])
diff --git a/workflow/tests/test_call_peaks_macs.py b/workflow/tests/test_call_peaks_macs.py
index 28881bd..f8a0842 100644
--- a/workflow/tests/test_call_peaks_macs.py
+++ b/workflow/tests/test_call_peaks_macs.py
@@ -26,7 +26,7 @@ def test_peaks_xls_singleend():
 @pytest.mark.singleend
 def test_peaks_bed_singleend():
     peak_file = test_output_path +  'ENCSR238SGC/1/' + 'ENCLB144FDT.narrowPeak'
-    assert utils.count_lines(peak_file) == 227389
+    assert utils.count_lines(peak_file) == 199317
 
 
 @pytest.mark.pairedend
diff --git a/workflow/tests/test_diff_peaks.py b/workflow/tests/test_diff_peaks.py
index ed22fd8..93655d1 100644
--- a/workflow/tests/test_diff_peaks.py
+++ b/workflow/tests/test_diff_peaks.py
@@ -71,4 +71,4 @@ def test_diffbind_pairedend_single_rep():
         assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.bed'))
         diffbind_file = test_output_path + 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.csv'
     assert os.path.exists(diffbind_file)
-    assert utils.count_lines(diffbind_file) >= 66201
+    assert utils.count_lines(diffbind_file) >= 65182
diff --git a/workflow/tests/test_xcor.py b/workflow/tests/test_xcor.py
index fd47594..4b65aa9 100644
--- a/workflow/tests/test_xcor.py
+++ b/workflow/tests/test_xcor.py
@@ -17,9 +17,9 @@ def test_cross_plot_singleend():
 def test_cross_qc_singleend():
     qc_file = os.path.join(test_output_path,"ENCLB144FDT/ENCLB144FDT.cc.qc")
     df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
-    assert df_xcor[2].iloc[0] == '190,200,210'
-    assert df_xcor[8].iloc[0] == 1.025906
-    assert round(df_xcor[9].iloc[0], 6) == 1.139671
+    assert df_xcor[2].iloc[0] == '220,240,255'
+    assert df_xcor[8].iloc[0] == 1.024935
+    assert round(df_xcor[9].iloc[0], 6) == 0.697252
 
 
 @pytest.mark.pairedend
@@ -31,6 +31,6 @@ def test_cross_qc_pairedend():
 def test_cross_plot_pairedend():
     qc_file = os.path.join(test_output_path,"ENCLB568IYX/ENCLB568IYX.cc.qc")
     df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
-    assert df_xcor[2].iloc[0] == '220,430,475'
-    assert round(df_xcor[8].iloc[0],6) == 1.060018
-    assert df_xcor[9].iloc[0] == 4.099357
+    assert df_xcor[2].iloc[0] == '220,420,450'
+    assert round(df_xcor[8].iloc[0],6) == 1.058694
+    assert df_xcor[9].iloc[0] == 3.004596
-- 
GitLab