From 487b307d7f2ff50941b4874c50c8f6fc66b0e6ce Mon Sep 17 00:00:00 2001
From: Venkat Malladi <venkat.malladi@utsouthwestern.edu>
Date: Sun, 6 Jan 2019 20:44:11 -0600
Subject: [PATCH] Update cross correlation filenames to be shorter.

---
 workflow/scripts/xcor.py    | 14 +++++++++++---
 workflow/tests/test_xcor.py |  8 ++++----
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/workflow/scripts/xcor.py b/workflow/scripts/xcor.py
index f799137..2737984 100644
--- a/workflow/scripts/xcor.py
+++ b/workflow/scripts/xcor.py
@@ -22,6 +22,13 @@ logger.propagate = False
 logger.setLevel(logging.INFO)
 
 
+# the order of this list is important.
+# strip_extensions strips from the right inward, so
+# the expected right-most extensions should appear first (like .gz)
+# Modified from J. Seth Strattan
+STRIP_EXTENSIONS = ['.gz', '.tagAlign', '.bedse', 'bedpe' ]
+
+
 def get_args():
     '''Define arguments.'''
 
@@ -60,7 +67,8 @@ def check_tools():
 def xcor(tag, paired):
     '''Use spp to calculate cross-correlation stats.'''
 
-    tag_basename = os.path.basename(utils.strip_extensions(tag, ['.gz']))
+    extension
+    tag_basename = os.path.basename(utils.strip_extensions(tag, STRIP_EXTENSIONS))
     uncompressed_tag_filename = tag_basename
 
 
@@ -83,8 +91,8 @@ def xcor(tag, paired):
     out, err = utils.run_pipe(steps, outfile=subsampled_tag_filename)
 
     # Calculate Cross-correlation QC scores
-    cc_scores_filename = subsampled_tag_filename + ".cc.qc"
-    cc_plot_filename = subsampled_tag_filename + ".cc.plot.pdf"
+    cc_scores_filename = tag_basename + ".cc.qc"
+    cc_plot_filename = tag_basename + ".cc.plot.pdf"
 
     # CC_SCORE FILE format
     # Filename <tab>
diff --git a/workflow/tests/test_xcor.py b/workflow/tests/test_xcor.py
index 8492de4..b2aeb81 100644
--- a/workflow/tests/test_xcor.py
+++ b/workflow/tests/test_xcor.py
@@ -10,8 +10,8 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 
 @pytest.mark.singleend
 def test_map_qc_singleend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCFF833BLU.filt.nodup.tagAlign.15.tagAlign.gz.cc.plot.pdf'))
-    qc_file = os.path.join(test_output_path,"ENCFF833BLU.filt.nodup.tagAlign.15.tagAlign.gz.cc.qc")
+    assert os.path.exists(os.path.join(test_output_path, 'ENCFF833BLU.cc.plot.pdf'))
+    qc_file = os.path.join(test_output_path,"ENCFF833BLU.cc.qc")
     df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
     assert df_xcor[2].iloc[0] == '190,200,210'
     assert df_xcor[8].iloc[0] == 1.025906
@@ -20,8 +20,8 @@ def test_map_qc_singleend():
 
 @pytest.mark.pairedend
 def test_map_qc_pairedend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCFF582IOZ_val_2ENCFF957SQS_val_1.filt.nodup.tagAlign.15.tagAlign.gz.cc.plot.pdf'))
-    qc_file = os.path.join(test_output_path,"ENCFF582IOZ_val_2ENCFF957SQS_val_1.filt.nodup.tagAlign.15.tagAlign.gz.cc.qc")
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.cc.plot.pdf'))
+    qc_file = os.path.join(test_output_path,"ENCLB568IYX.cc.qc")
     df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
     assert df_xcor[2].iloc[0] == '210,220,475'
     assert df_xcor[8].iloc[0] == 1.062032
-- 
GitLab