From 5c9ce3aa3dcc06b14c1f2d43adc5137f2b2a4f5c Mon Sep 17 00:00:00 2001 From: Holly Ruess <s185797@Nucleus036.cm.cluster> Date: Mon, 30 Dec 2019 19:51:33 -0600 Subject: [PATCH] fix pool and pseudo --- workflow/scripts/pool_and_psuedoreplicate.py | 14 ++++++++++++++ workflow/tests/test_pool_and_psuedoreplicate.py | 4 ++-- workflow/tests/test_xcor.py | 6 +++--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/workflow/scripts/pool_and_psuedoreplicate.py b/workflow/scripts/pool_and_psuedoreplicate.py index 04ce605..ea454da 100644 --- a/workflow/scripts/pool_and_psuedoreplicate.py +++ b/workflow/scripts/pool_and_psuedoreplicate.py @@ -99,6 +99,20 @@ def pool(tag_files, outfile, paired): return pooled_filename +def bedpe_to_tagalign(tag_file, outfile): + '''Convert read pairs to reads into standard tagAlign file.''' + + se_tag_filename = outfile + ".tagAlign.gz" + + # Convert read pairs to reads into standard tagAlign file + tag_steps = ["zcat -f %s" % (tag_file)] + tag_steps.extend([r"""awk 'BEGIN{OFS="\t"}{printf "%s\t%s\t%s\tN\t1000\t%s\n%s\t%s\t%s\tN\t1000\t%s\n",$1,$2,$3,$9,$4,$5,$6,$10}'"""]) + tag_steps.extend(['gzip -cn']) + out, err = utils.run_pipe(tag_steps, outfile=se_tag_filename) + + return se_tag_filename + + def self_psuedoreplication(tag_file, prefix, paired): '''Make 2 self-psuedoreplicates.''' diff --git a/workflow/tests/test_pool_and_psuedoreplicate.py b/workflow/tests/test_pool_and_psuedoreplicate.py index 0a6c899..f969dfc 100644 --- a/workflow/tests/test_pool_and_psuedoreplicate.py +++ b/workflow/tests/test_pool_and_psuedoreplicate.py @@ -46,7 +46,7 @@ def test_pool_and_psuedoreplicate_singleend_human(): design_file = os.path.join(test_output_path, 'ENCSR265ZXX_ppr.tsv') assert os.path.exists(design_file) design_df = pd.read_csv(design_file, sep="\t") - assert design_df.shape[0] == 7 + assert design_df.shape[0] == 6 @pytest.mark.pairedend_mouse @@ -59,4 +59,4 @@ def test_pool_and_psuedoreplicate_pairedend_mouse(): design_file = os.path.join(test_output_path, 'ENCSR451NAE_ppr.tsv') assert os.path.exists(design_file) design_df = pd.read_csv(design_file, sep="\t") - assert design_df.shape[0] == 6 + assert design_df.shape[0] == 5 diff --git a/workflow/tests/test_xcor.py b/workflow/tests/test_xcor.py index 7e837af..1b6418f 100644 --- a/workflow/tests/test_xcor.py +++ b/workflow/tests/test_xcor.py @@ -17,9 +17,9 @@ def test_cross_plot_singleend_human(): def test_cross_qc_singleend_human(): qc_file = os.path.join(test_output_path,"ENCLB622FZX/ENCLB622FZX.cc.qc") df_xcor = pd.read_csv(qc_file, sep="\t", header=None) - assert df_xcor[2].iloc[0] == '0' - assert df_xcor[8].iloc[0] == 1.347895 - assert df_xcor[9].iloc[0] == 1.970438 + assert df_xcor[2].iloc[0] == 0 + assert round(df_xcor[8].iloc[0],6) == 1.332028 + assert df_xcor[9].iloc[0] == 2.152459 @pytest.mark.pairedend_mouse -- GitLab