diff --git a/workflow/scripts/map_reads.py b/workflow/scripts/map_reads.py index a0275cca6d3c6da59a025d8e769519292a5f03b7..4de2972b1edfec1c1f65158b6d6cbf56a639e948 100644 --- a/workflow/scripts/map_reads.py +++ b/workflow/scripts/map_reads.py @@ -59,15 +59,6 @@ def get_args(): ## Functions -def strip_extensions(filename, extensions): - '''Strips extensions to get basename of file.''' - - basename = filename - for extension in extensions: - basename = basename.rpartition(extension)[0] or basename - - return basename - def check_tools(): '''Checks for required componenets on user system''' @@ -92,7 +83,7 @@ def check_tools(): def generate_sa(fastq, reference): '''Use BWA to generate Suffix Arrays.''' - fastq_basename = os.path.basename(strip_extensions(fastq, STRIP_EXTENSIONS)) + fastq_basename = os.path.basename(utils.strip_extensions(fastq, STRIP_EXTENSIONS)) bwa_aln_params = '-q 5 -l 32 -k 2' @@ -184,16 +175,16 @@ def main(): # Run alignment for either PE or SE if paired: # paired-end data fastq_r1_basename = os.path.basename( - strip_extensions(fastq[0], STRIP_EXTENSIONS)) + utils.strip_extensions(fastq[0], STRIP_EXTENSIONS)) fastq_r2_basename = os.path.basename( - strip_extensions(fastq[1], STRIP_EXTENSIONS)) + utils.strip_extensions(fastq[1], STRIP_EXTENSIONS)) fastq_basename = fastq_r1_basename + fastq_r2_basename bam_filename = align_pe(fastq, sai, reference, fastq_basename) else: fastq_basename = os.path.basename( - strip_extensions(fastq[0], STRIP_EXTENSIONS)) + utils.strip_extensions(fastq[0], STRIP_EXTENSIONS)) bam_filename = align_se(fastq, sai, reference, fastq_basename) diff --git a/workflow/scripts/utils.py b/workflow/scripts/utils.py index e8984472acc5bdf468cda3ad0d2893ea9f703ec1..98d65aa0d185e5eb605230dab98b74dfceba1b2a 100644 --- a/workflow/scripts/utils.py +++ b/workflow/scripts/utils.py @@ -46,3 +46,13 @@ def run_pipe(steps, outfile=None): p = p_next out, err = p.communicate() return out, err + + +def strip_extensions(filename, extensions): + '''Strips extensions to get basename of file.''' + + basename = filename + for extension in extensions: + basename = basename.rpartition(extension)[0] or basename + + return basename diff --git a/workflow/tests/test_utils.py b/workflow/tests/test_utils.py index 159c9bb21f75d93ef6a8a8cf77fbe490bcb22c01..0997c95bc6780b2ac77f16525a5538b9a62bb4ae 100644 --- a/workflow/tests/test_utils.py +++ b/workflow/tests/test_utils.py @@ -6,6 +6,8 @@ import shlex import utils +STRIP_EXTENSIONS = ['.gz', '.fq', '.fastq', '.fa', '.fasta'] + @pytest.fixture def steps(): steps = [] @@ -51,3 +53,18 @@ def test_run_last_step_file(steps_2, capsys, tmpdir): output, errors = capsys.readouterr() assert "last step shlex" in output assert check_output in tmp_outfile.read() + + +def test_strip_extensions(): + filename = utils.strip_extensions('ENCFF833BLU.fastq.gz',STRIP_EXTENSIONS) + assert filename == 'ENCFF833BLU' + + +def test_strip_extensions_not_valid(): + filename = utils.strip_extensions('ENCFF833BLU.not.valid',STRIP_EXTENSIONS) + assert filename == 'ENCFF833BLU.not.valid' + + +def test_strip_extensions_missing_basename(): + filename = utils.strip_extensions('.fastq.gz',STRIP_EXTENSIONS) + assert filename == '.fastq'