diff --git a/CHANGELOG.md b/CHANGELOG.md index bc28c53bfc58c5288b3b912d06152a1657e3db0a..90cc2d75a828b8e52e7753b986e0efa88c230366 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file. - Add and Update tests - Use GTF files instead of TxDb and org libraries in Annotate Peaks - Make gtf and geneName files as param inputs +- Fix xcor to increase file size for --random-source ## [publish_1.0.6 ] - 2019-05-31 ### Added diff --git a/workflow/scripts/xcor.py b/workflow/scripts/xcor.py index 3bf8dbfa1e7da470ec0eea413970a72b01a968c1..096afc9b7e6ef431c341cecf258b92fa6227bbfb 100644 --- a/workflow/scripts/xcor.py +++ b/workflow/scripts/xcor.py @@ -107,10 +107,16 @@ def xcor(tag, paired): subsampled_tag_filename = \ tag_basename + ".%d.tagAlign.gz" % (number_reads/1000000) + tag_extended = 'cat.tagAlign.gz' + out, err = utils.run_pipe([ + "zcat %s %s %s" % + (tag, tag, tag) + ], outfile=tag_extended) + steps = [ 'zcat %s' % (tag), 'grep -v "chrM"', - 'shuf -n %d --random-source=%s' % (number_reads, tag)] + 'shuf -n %d --random-source=%s' % (number_reads, tag_extended)] if paired: steps.extend([r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""]) diff --git a/workflow/tests/test_call_peaks_macs.py b/workflow/tests/test_call_peaks_macs.py index 28881bd6e61a084ffa51a83647db9cd76ace6854..f8a0842a83392fb7a15222f86d29213c288eb5b3 100644 --- a/workflow/tests/test_call_peaks_macs.py +++ b/workflow/tests/test_call_peaks_macs.py @@ -26,7 +26,7 @@ def test_peaks_xls_singleend(): @pytest.mark.singleend def test_peaks_bed_singleend(): peak_file = test_output_path + 'ENCSR238SGC/1/' + 'ENCLB144FDT.narrowPeak' - assert utils.count_lines(peak_file) == 227389 + assert utils.count_lines(peak_file) == 199317 @pytest.mark.pairedend diff --git a/workflow/tests/test_diff_peaks.py b/workflow/tests/test_diff_peaks.py index ed22fd83d5db04fac6c3c4924cafd81455a55c5a..93655d177a322b2d44056df79485909413444317 100644 --- a/workflow/tests/test_diff_peaks.py +++ b/workflow/tests/test_diff_peaks.py @@ -71,4 +71,4 @@ def test_diffbind_pairedend_single_rep(): assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.bed')) diffbind_file = test_output_path + 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.csv' assert os.path.exists(diffbind_file) - assert utils.count_lines(diffbind_file) >= 66201 + assert utils.count_lines(diffbind_file) >= 65182 diff --git a/workflow/tests/test_xcor.py b/workflow/tests/test_xcor.py index fd475943f3f60317c4e0d5dd1ef067cc23fd842d..4b65aa9fd6f5e83a960df1c6ad52cf2211cc3f17 100644 --- a/workflow/tests/test_xcor.py +++ b/workflow/tests/test_xcor.py @@ -17,9 +17,9 @@ def test_cross_plot_singleend(): def test_cross_qc_singleend(): qc_file = os.path.join(test_output_path,"ENCLB144FDT/ENCLB144FDT.cc.qc") df_xcor = pd.read_csv(qc_file, sep="\t", header=None) - assert df_xcor[2].iloc[0] == '190,200,210' - assert df_xcor[8].iloc[0] == 1.025906 - assert round(df_xcor[9].iloc[0], 6) == 1.139671 + assert df_xcor[2].iloc[0] == '220,240,255' + assert df_xcor[8].iloc[0] == 1.024935 + assert round(df_xcor[9].iloc[0], 6) == 0.697252 @pytest.mark.pairedend @@ -31,6 +31,6 @@ def test_cross_qc_pairedend(): def test_cross_plot_pairedend(): qc_file = os.path.join(test_output_path,"ENCLB568IYX/ENCLB568IYX.cc.qc") df_xcor = pd.read_csv(qc_file, sep="\t", header=None) - assert df_xcor[2].iloc[0] == '220,430,475' - assert round(df_xcor[8].iloc[0],6) == 1.060018 - assert df_xcor[9].iloc[0] == 4.099357 + assert df_xcor[2].iloc[0] == '220,420,450' + assert round(df_xcor[8].iloc[0],6) == 1.058694 + assert df_xcor[9].iloc[0] == 3.004596