diff --git a/workflow/scripts/xcor.py b/workflow/scripts/xcor.py index f799137aa41caddf5f27729be9abb0eebeeb7482..2737984df794dca446f1f3f5bfb60c68ace48621 100644 --- a/workflow/scripts/xcor.py +++ b/workflow/scripts/xcor.py @@ -22,6 +22,13 @@ logger.propagate = False logger.setLevel(logging.INFO) +# the order of this list is important. +# strip_extensions strips from the right inward, so +# the expected right-most extensions should appear first (like .gz) +# Modified from J. Seth Strattan +STRIP_EXTENSIONS = ['.gz', '.tagAlign', '.bedse', 'bedpe' ] + + def get_args(): '''Define arguments.''' @@ -60,7 +67,8 @@ def check_tools(): def xcor(tag, paired): '''Use spp to calculate cross-correlation stats.''' - tag_basename = os.path.basename(utils.strip_extensions(tag, ['.gz'])) + extension + tag_basename = os.path.basename(utils.strip_extensions(tag, STRIP_EXTENSIONS)) uncompressed_tag_filename = tag_basename @@ -83,8 +91,8 @@ def xcor(tag, paired): out, err = utils.run_pipe(steps, outfile=subsampled_tag_filename) # Calculate Cross-correlation QC scores - cc_scores_filename = subsampled_tag_filename + ".cc.qc" - cc_plot_filename = subsampled_tag_filename + ".cc.plot.pdf" + cc_scores_filename = tag_basename + ".cc.qc" + cc_plot_filename = tag_basename + ".cc.plot.pdf" # CC_SCORE FILE format # Filename <tab> diff --git a/workflow/tests/test_xcor.py b/workflow/tests/test_xcor.py index 8492de444ddc8f17d5d4ad9d7ce2f19785859753..b2aeb81f0da755234ceda4d8009fba3d2c33e50d 100644 --- a/workflow/tests/test_xcor.py +++ b/workflow/tests/test_xcor.py @@ -10,8 +10,8 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend def test_map_qc_singleend(): - assert os.path.exists(os.path.join(test_output_path, 'ENCFF833BLU.filt.nodup.tagAlign.15.tagAlign.gz.cc.plot.pdf')) - qc_file = os.path.join(test_output_path,"ENCFF833BLU.filt.nodup.tagAlign.15.tagAlign.gz.cc.qc") + assert os.path.exists(os.path.join(test_output_path, 'ENCFF833BLU.cc.plot.pdf')) + qc_file = os.path.join(test_output_path,"ENCFF833BLU.cc.qc") df_xcor = pd.read_csv(qc_file, sep="\t", header=None) assert df_xcor[2].iloc[0] == '190,200,210' assert df_xcor[8].iloc[0] == 1.025906 @@ -20,8 +20,8 @@ def test_map_qc_singleend(): @pytest.mark.pairedend def test_map_qc_pairedend(): - assert os.path.exists(os.path.join(test_output_path, 'ENCFF582IOZ_val_2ENCFF957SQS_val_1.filt.nodup.tagAlign.15.tagAlign.gz.cc.plot.pdf')) - qc_file = os.path.join(test_output_path,"ENCFF582IOZ_val_2ENCFF957SQS_val_1.filt.nodup.tagAlign.15.tagAlign.gz.cc.qc") + assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.cc.plot.pdf')) + qc_file = os.path.join(test_output_path,"ENCLB568IYX.cc.qc") df_xcor = pd.read_csv(qc_file, sep="\t", header=None) assert df_xcor[2].iloc[0] == '210,220,475' assert df_xcor[8].iloc[0] == 1.062032