diff --git a/workflow/main.nf b/workflow/main.nf index ac29389dc993ebfd8e2034712d4cbf24168fdb2a..4700105726ccbe33a8906c8c0fbf0902447f865f 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -424,7 +424,7 @@ process motifSearch { output: file "*memechip" into motifSearch - file "sorted-*" into filteredPeaks + file "*narrowPeak" into filteredPeaks script: diff --git a/workflow/scripts/motif_search.py b/workflow/scripts/motif_search.py index 9feac8c39201af96700f69370a70b24a7111a5fc..ab80819a7fc04984952b015cd2542574d5dd00c0 100644 --- a/workflow/scripts/motif_search.py +++ b/workflow/scripts/motif_search.py @@ -27,6 +27,13 @@ logger.propagate = False logger.setLevel(logging.INFO) +# the order of this list is important. +# strip_extensions strips from the right inward, so +# the expected right-most extensions should appear first (like .gz) +# Modified from J. Seth Strattan +STRIP_EXTENSIONS = ['.narrowPeak', '.replicated' ] + + def get_args(): '''Define arguments.''' @@ -55,9 +62,11 @@ def run_wrapper(args): motif_search(*args) def motif_search(filename, genome, experiment, peak): + '''Run motif serach on peaks.''' - file_basename = os.path.basename(filename) - sorted_fn = 'sorted-%s' % (file_basename) + file_basename = os.path.basename( + utils.strip_extensions(filename, STRIP_EXTENSIONS)) + sorted_fn = '%s.%d.narrowPeak' % (file_basename, peak) out_fa = '%s.fa' % (experiment) out_motif = '%s_memechip' % (experiment) diff --git a/workflow/tests/test_motif_search.py b/workflow/tests/test_motif_search.py index 577407a6ad4473c09e8f0a18c71fe4bf5e4fd86c..182a6206db0b2701242780db1f7011518b4ac697 100644 --- a/workflow/tests/test_motif_search.py +++ b/workflow/tests/test_motif_search.py @@ -14,7 +14,7 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ def test_motif_search_singleend(): assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.fa')) assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC_memechip', 'index.html')) - peak_file_ENCSR238SGC = test_output_path + 'sorted-ENCSR238SGC.replicated.narrowPeak' + peak_file_ENCSR238SGC = test_output_path + 'ENCSR238SGC.600.narrowPeak' assert os.path.exists(peak_file_ENCSR238SGC) assert utils.count_lines(peak_file_ENCSR238SGC) == 600 @@ -22,6 +22,6 @@ def test_motif_search_singleend(): def test_motif_search_pairedend(): assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA.fa')) assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA_memechip', 'index.html')) - peak_file_ENCSR729LGA= test_output_path + 'sorted-ENCSR729LGA.replicated.narrowPeak' + peak_file_ENCSR729LGA= test_output_path + 'ENCSR729LGA.600.narrowPeak' assert os.path.exists(peak_file_ENCSR729LGA) assert utils.count_lines(peak_file_ENCSR729LGA) == 600