From ef9a113d4f2fe66b63969ac312b0be099d0b2f34 Mon Sep 17 00:00:00 2001 From: Jeremy Mathews <Jeremy.Mathews@utsouthwestern.edu> Date: Wed, 14 Aug 2019 12:00:36 -0500 Subject: [PATCH] Update tests for xcor change --- CHANGELOG.md | 1 + workflow/scripts/xcor.py | 8 +++++++- workflow/tests/test_call_peaks_macs.py | 2 +- workflow/tests/test_diff_peaks.py | 2 +- workflow/tests/test_xcor.py | 12 ++++++------ 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bc28c53..90cc2d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file. - Add and Update tests - Use GTF files instead of TxDb and org libraries in Annotate Peaks - Make gtf and geneName files as param inputs +- Fix xcor to increase file size for --random-source ## [publish_1.0.6 ] - 2019-05-31 ### Added diff --git a/workflow/scripts/xcor.py b/workflow/scripts/xcor.py index 3bf8dbf..096afc9 100644 --- a/workflow/scripts/xcor.py +++ b/workflow/scripts/xcor.py @@ -107,10 +107,16 @@ def xcor(tag, paired): subsampled_tag_filename = \ tag_basename + ".%d.tagAlign.gz" % (number_reads/1000000) + tag_extended = 'cat.tagAlign.gz' + out, err = utils.run_pipe([ + "zcat %s %s %s" % + (tag, tag, tag) + ], outfile=tag_extended) + steps = [ 'zcat %s' % (tag), 'grep -v "chrM"', - 'shuf -n %d --random-source=%s' % (number_reads, tag)] + 'shuf -n %d --random-source=%s' % (number_reads, tag_extended)] if paired: steps.extend([r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""]) diff --git a/workflow/tests/test_call_peaks_macs.py b/workflow/tests/test_call_peaks_macs.py index 28881bd..f8a0842 100644 --- a/workflow/tests/test_call_peaks_macs.py +++ b/workflow/tests/test_call_peaks_macs.py @@ -26,7 +26,7 @@ def test_peaks_xls_singleend(): @pytest.mark.singleend def test_peaks_bed_singleend(): peak_file = test_output_path + 'ENCSR238SGC/1/' + 'ENCLB144FDT.narrowPeak' - assert utils.count_lines(peak_file) == 227389 + assert utils.count_lines(peak_file) == 199317 @pytest.mark.pairedend diff --git a/workflow/tests/test_diff_peaks.py b/workflow/tests/test_diff_peaks.py index ed22fd8..93655d1 100644 --- a/workflow/tests/test_diff_peaks.py +++ b/workflow/tests/test_diff_peaks.py @@ -71,4 +71,4 @@ def test_diffbind_pairedend_single_rep(): assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.bed')) diffbind_file = test_output_path + 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.csv' assert os.path.exists(diffbind_file) - assert utils.count_lines(diffbind_file) >= 66201 + assert utils.count_lines(diffbind_file) >= 65182 diff --git a/workflow/tests/test_xcor.py b/workflow/tests/test_xcor.py index fd47594..4b65aa9 100644 --- a/workflow/tests/test_xcor.py +++ b/workflow/tests/test_xcor.py @@ -17,9 +17,9 @@ def test_cross_plot_singleend(): def test_cross_qc_singleend(): qc_file = os.path.join(test_output_path,"ENCLB144FDT/ENCLB144FDT.cc.qc") df_xcor = pd.read_csv(qc_file, sep="\t", header=None) - assert df_xcor[2].iloc[0] == '190,200,210' - assert df_xcor[8].iloc[0] == 1.025906 - assert round(df_xcor[9].iloc[0], 6) == 1.139671 + assert df_xcor[2].iloc[0] == '220,240,255' + assert df_xcor[8].iloc[0] == 1.024935 + assert round(df_xcor[9].iloc[0], 6) == 0.697252 @pytest.mark.pairedend @@ -31,6 +31,6 @@ def test_cross_qc_pairedend(): def test_cross_plot_pairedend(): qc_file = os.path.join(test_output_path,"ENCLB568IYX/ENCLB568IYX.cc.qc") df_xcor = pd.read_csv(qc_file, sep="\t", header=None) - assert df_xcor[2].iloc[0] == '220,430,475' - assert round(df_xcor[8].iloc[0],6) == 1.060018 - assert df_xcor[9].iloc[0] == 4.099357 + assert df_xcor[2].iloc[0] == '220,420,450' + assert round(df_xcor[8].iloc[0],6) == 1.058694 + assert df_xcor[9].iloc[0] == 3.004596 -- GitLab