Skip to content
Snippets Groups Projects
Commit 487b307d authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Update cross correlation filenames to be shorter.

parent f760c6df
1 merge request!20Resolve "Use SampleIds/ Experiment Id as file names throughtout pipeline"
...@@ -22,6 +22,13 @@ logger.propagate = False ...@@ -22,6 +22,13 @@ logger.propagate = False
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
# the order of this list is important.
# strip_extensions strips from the right inward, so
# the expected right-most extensions should appear first (like .gz)
# Modified from J. Seth Strattan
STRIP_EXTENSIONS = ['.gz', '.tagAlign', '.bedse', 'bedpe' ]
def get_args(): def get_args():
'''Define arguments.''' '''Define arguments.'''
...@@ -60,7 +67,8 @@ def check_tools(): ...@@ -60,7 +67,8 @@ def check_tools():
def xcor(tag, paired): def xcor(tag, paired):
'''Use spp to calculate cross-correlation stats.''' '''Use spp to calculate cross-correlation stats.'''
tag_basename = os.path.basename(utils.strip_extensions(tag, ['.gz'])) extension
tag_basename = os.path.basename(utils.strip_extensions(tag, STRIP_EXTENSIONS))
uncompressed_tag_filename = tag_basename uncompressed_tag_filename = tag_basename
...@@ -83,8 +91,8 @@ def xcor(tag, paired): ...@@ -83,8 +91,8 @@ def xcor(tag, paired):
out, err = utils.run_pipe(steps, outfile=subsampled_tag_filename) out, err = utils.run_pipe(steps, outfile=subsampled_tag_filename)
# Calculate Cross-correlation QC scores # Calculate Cross-correlation QC scores
cc_scores_filename = subsampled_tag_filename + ".cc.qc" cc_scores_filename = tag_basename + ".cc.qc"
cc_plot_filename = subsampled_tag_filename + ".cc.plot.pdf" cc_plot_filename = tag_basename + ".cc.plot.pdf"
# CC_SCORE FILE format # CC_SCORE FILE format
# Filename <tab> # Filename <tab>
......
...@@ -10,8 +10,8 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ ...@@ -10,8 +10,8 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.singleend @pytest.mark.singleend
def test_map_qc_singleend(): def test_map_qc_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCFF833BLU.filt.nodup.tagAlign.15.tagAlign.gz.cc.plot.pdf')) assert os.path.exists(os.path.join(test_output_path, 'ENCFF833BLU.cc.plot.pdf'))
qc_file = os.path.join(test_output_path,"ENCFF833BLU.filt.nodup.tagAlign.15.tagAlign.gz.cc.qc") qc_file = os.path.join(test_output_path,"ENCFF833BLU.cc.qc")
df_xcor = pd.read_csv(qc_file, sep="\t", header=None) df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
assert df_xcor[2].iloc[0] == '190,200,210' assert df_xcor[2].iloc[0] == '190,200,210'
assert df_xcor[8].iloc[0] == 1.025906 assert df_xcor[8].iloc[0] == 1.025906
...@@ -20,8 +20,8 @@ def test_map_qc_singleend(): ...@@ -20,8 +20,8 @@ def test_map_qc_singleend():
@pytest.mark.pairedend @pytest.mark.pairedend
def test_map_qc_pairedend(): def test_map_qc_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'ENCFF582IOZ_val_2ENCFF957SQS_val_1.filt.nodup.tagAlign.15.tagAlign.gz.cc.plot.pdf')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.cc.plot.pdf'))
qc_file = os.path.join(test_output_path,"ENCFF582IOZ_val_2ENCFF957SQS_val_1.filt.nodup.tagAlign.15.tagAlign.gz.cc.qc") qc_file = os.path.join(test_output_path,"ENCLB568IYX.cc.qc")
df_xcor = pd.read_csv(qc_file, sep="\t", header=None) df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
assert df_xcor[2].iloc[0] == '210,220,475' assert df_xcor[2].iloc[0] == '210,220,475'
assert df_xcor[8].iloc[0] == 1.062032 assert df_xcor[8].iloc[0] == 1.062032
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment