diff --git a/workflow/scripts/check_design.py b/workflow/scripts/check_design.py index 929a97dc3b12da8ddd254671bdb62ff4a194c063..acf4f014f2c4ed24d25da9bd5e268ace9bd2fadd 100644 --- a/workflow/scripts/check_design.py +++ b/workflow/scripts/check_design.py @@ -11,7 +11,7 @@ For more details: %(prog)s --help ''' -## SETTINGS +# SETTINGS logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) @@ -57,7 +57,7 @@ def check_design_headers(design, paired): design_headers = list(design.columns.values) - if paired: # paired-end data + if paired: # paired-end data design_template.extend(['fastq_read2']) # Check if headers @@ -79,7 +79,8 @@ def check_controls(design): if len(missing_controls) > 0: logger.error('Missing control experiments: %s', list(missing_controls)) - raise Exception("Missing control experiments: %s" % list(missing_controls)) + raise Exception("Missing control experiments: %s" % + list(missing_controls)) def check_files(design, fastq, paired): @@ -87,9 +88,9 @@ def check_files(design, fastq, paired): logger.info("Running file check.") - if paired: # paired-end data + if paired: # paired-end data files = list(design['fastq_read1']) + list(design['fastq_read2']) - else: # single-end data + else: # single-end data files = design['fastq_read1'] files_found = fastq['name'] @@ -98,13 +99,14 @@ def check_files(design, fastq, paired): if len(missing_files) > 0: logger.error('Missing files from design file: %s', list(missing_files)) - raise Exception("Missing files from design file: %s" % list(missing_files)) + raise Exception("Missing files from design file: %s" % + list(missing_files)) else: file_dict = fastq.set_index('name').T.to_dict() design['fastq_read1'] = design['fastq_read1'] \ .apply(lambda x: file_dict[x]['path']) - if paired: # paired-end data + if paired: # paired-end data design['fastq_read2'] = design['fastq_read2'] \ .apply(lambda x: file_dict[x]['path']) return design diff --git a/workflow/scripts/map_reads.py b/workflow/scripts/map_reads.py index 4de2972b1edfec1c1f65158b6d6cbf56a639e948..ba6bbd0722a49ab09ddd84befbfe0a0ce3e07f19 100644 --- a/workflow/scripts/map_reads.py +++ b/workflow/scripts/map_reads.py @@ -8,7 +8,6 @@ import argparse import shutil import shlex import logging -import sys from multiprocessing import cpu_count import json import utils @@ -18,7 +17,7 @@ For more details: %(prog)s --help ''' -## SETTINGS +# SETTINGS logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) @@ -56,8 +55,7 @@ def get_args(): return args - -## Functions +# Functions def check_tools(): @@ -102,7 +100,6 @@ def generate_sa(fastq, reference): def align_se(fastq, sai, reference, fastq_basename): '''Use BWA to align SE data.''' - sam_filename = "%s.sam" % (fastq_basename) bam_filename = '%s.srt.bam' % (fastq_basename) steps = [ @@ -118,6 +115,7 @@ def align_se(fastq, sai, reference, fastq_basename): return bam_filename + def align_pe(fastq, sai, reference, fastq_basename): '''Use BWA to align PE data.''' @@ -173,7 +171,7 @@ def main(): sai.append(sai_filename) # Run alignment for either PE or SE - if paired: # paired-end data + if paired: # paired-end data fastq_r1_basename = os.path.basename( utils.strip_extensions(fastq[0], STRIP_EXTENSIONS)) fastq_r2_basename = os.path.basename( diff --git a/workflow/scripts/trim_reads.py b/workflow/scripts/trim_reads.py index c3925a06eb72eb9f62936298042f2d600882c30e..f9f2524f942fb356c360696cee70ea80a403cc4b 100644 --- a/workflow/scripts/trim_reads.py +++ b/workflow/scripts/trim_reads.py @@ -2,19 +2,17 @@ '''Trim low quality reads and remove sequences less than 35 base pairs.''' -import os import subprocess import argparse import shutil import logging -import sys EPILOG = ''' For more details: %(prog)s --help ''' -## SETTINGS +# SETTINGS logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) @@ -65,7 +63,7 @@ def check_tools(): def trim_reads(fastq, paired): '''Run trim_galore on 1 or 2 files.''' - if paired: # paired-end data + if paired: # paired-end data trim_params = '--paired -q 25 --illumina --gzip --length 35' trim_command = "trim_galore %s %s %s " \ % (trim_params, fastq[0], fastq[1]) diff --git a/workflow/scripts/utils.py b/workflow/scripts/utils.py index 98d65aa0d185e5eb605230dab98b74dfceba1b2a..5162646d56b5281a88cd7195dba784865cffe7f8 100644 --- a/workflow/scripts/utils.py +++ b/workflow/scripts/utils.py @@ -3,9 +3,6 @@ '''General utilities.''' -import sys -import os -import subprocess import shlex import logging diff --git a/workflow/tests/test_check_design.py b/workflow/tests/test_check_design.py index 394d5251b382b682bf758debdedd12aaf5637551..a63bc7772068ec265fcd1bc656e3d36e92ffa853 100644 --- a/workflow/tests/test_check_design.py +++ b/workflow/tests/test_check_design.py @@ -1,11 +1,9 @@ #!/usr/bin/env python3 -import os import pytest import pandas as pd from io import StringIO import check_design -import sys DESIGN_STRING = """sample_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\tfastq_read1 @@ -49,11 +47,12 @@ def design_2(design): design_df = design.drop(design.index[2]) return design_df + @pytest.fixture def design_3(design): # Drop A_2 and B_2 and append as fastq_read2 - design_df = design.drop(design.index[[1,3]]) - design_df['fastq_read2'] = design.loc[[1,3],'fastq_read1'].values + design_df = design.drop(design.index[[1, 3]]) + design_df['fastq_read2'] = design.loc[[1, 3], 'fastq_read1'].values return design_df @@ -94,10 +93,10 @@ def test_check_files_missing_files(design, fastq_files_1): def test_check_files_output_singleend(design, fastq_files): paired = False new_design = check_design.check_files(design, fastq_files, paired) - assert new_design.loc[0,'fastq_read1'] == "/path/to/file/A_1.fastq.gz" + assert new_design.loc[0, 'fastq_read1'] == "/path/to/file/A_1.fastq.gz" def test_check_files_output_pairedend(design_3, fastq_files): paired = True new_design = check_design.check_files(design_3, fastq_files, paired) - assert new_design.loc[0,'fastq_read2'] == "/path/to/file/A_2.fastq.gz" + assert new_design.loc[0, 'fastq_read2'] == "/path/to/file/A_2.fastq.gz" diff --git a/workflow/tests/test_utils.py b/workflow/tests/test_utils.py index 0997c95bc6780b2ac77f16525a5538b9a62bb4ae..3bbe48abcabc3a027e978dbea73b2a9d350e794d 100644 --- a/workflow/tests/test_utils.py +++ b/workflow/tests/test_utils.py @@ -1,13 +1,12 @@ #!/usr/bin/env python3 import pytest -import shutil -import shlex import utils STRIP_EXTENSIONS = ['.gz', '.fq', '.fastq', '.fa', '.fasta'] + @pytest.fixture def steps(): steps = [] @@ -56,15 +55,15 @@ def test_run_last_step_file(steps_2, capsys, tmpdir): def test_strip_extensions(): - filename = utils.strip_extensions('ENCFF833BLU.fastq.gz',STRIP_EXTENSIONS) + filename = utils.strip_extensions('ENCFF833BLU.fastq.gz', STRIP_EXTENSIONS) assert filename == 'ENCFF833BLU' def test_strip_extensions_not_valid(): - filename = utils.strip_extensions('ENCFF833BLU.not.valid',STRIP_EXTENSIONS) + filename = utils.strip_extensions('ENCFF833BLU.not.valid', STRIP_EXTENSIONS) assert filename == 'ENCFF833BLU.not.valid' def test_strip_extensions_missing_basename(): - filename = utils.strip_extensions('.fastq.gz',STRIP_EXTENSIONS) + filename = utils.strip_extensions('.fastq.gz', STRIP_EXTENSIONS) assert filename == '.fastq'