From db7110cb90267a7c20fc7ca658064979ea9400e0 Mon Sep 17 00:00:00 2001
From: Venkat Malladi <venkat.malladi@utsouthwestern.edu>
Date: Sun, 6 Jan 2019 21:39:32 -0600
Subject: [PATCH] Fix names in motif search and adding summits files.

---
 workflow/main.nf                    |  2 +-
 workflow/scripts/motif_search.py    | 13 +++++++++++--
 workflow/tests/test_motif_search.py |  4 ++--
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/workflow/main.nf b/workflow/main.nf
index ac29389..4700105 100644
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -424,7 +424,7 @@ process motifSearch {
   output:
 
   file "*memechip" into motifSearch
-  file "sorted-*" into filteredPeaks
+  file "*narrowPeak" into filteredPeaks
 
   script:
 
diff --git a/workflow/scripts/motif_search.py b/workflow/scripts/motif_search.py
index 9feac8c..ab80819 100644
--- a/workflow/scripts/motif_search.py
+++ b/workflow/scripts/motif_search.py
@@ -27,6 +27,13 @@ logger.propagate = False
 logger.setLevel(logging.INFO)
 
 
+# the order of this list is important.
+# strip_extensions strips from the right inward, so
+# the expected right-most extensions should appear first (like .gz)
+# Modified from J. Seth Strattan
+STRIP_EXTENSIONS = ['.narrowPeak', '.replicated' ]
+
+
 def get_args():
     '''Define arguments.'''
 
@@ -55,9 +62,11 @@ def run_wrapper(args):
   motif_search(*args)
 
 def motif_search(filename, genome, experiment, peak):
+    '''Run motif serach on peaks.'''
 
-    file_basename = os.path.basename(filename)
-    sorted_fn = 'sorted-%s' % (file_basename)
+    file_basename = os.path.basename(
+        utils.strip_extensions(filename, STRIP_EXTENSIONS))
+    sorted_fn = '%s.%d.narrowPeak' % (file_basename, peak)
     out_fa = '%s.fa' % (experiment)
     out_motif = '%s_memechip' % (experiment)
 
diff --git a/workflow/tests/test_motif_search.py b/workflow/tests/test_motif_search.py
index 577407a..182a620 100644
--- a/workflow/tests/test_motif_search.py
+++ b/workflow/tests/test_motif_search.py
@@ -14,7 +14,7 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 def test_motif_search_singleend():
     assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.fa'))
     assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC_memechip', 'index.html'))
-    peak_file_ENCSR238SGC = test_output_path + 'sorted-ENCSR238SGC.replicated.narrowPeak'
+    peak_file_ENCSR238SGC = test_output_path + 'ENCSR238SGC.600.narrowPeak'
     assert os.path.exists(peak_file_ENCSR238SGC)
     assert utils.count_lines(peak_file_ENCSR238SGC) == 600
 
@@ -22,6 +22,6 @@ def test_motif_search_singleend():
 def test_motif_search_pairedend():
     assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA.fa'))
     assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA_memechip', 'index.html'))
-    peak_file_ENCSR729LGA= test_output_path + 'sorted-ENCSR729LGA.replicated.narrowPeak'
+    peak_file_ENCSR729LGA= test_output_path + 'ENCSR729LGA.600.narrowPeak'
     assert os.path.exists(peak_file_ENCSR729LGA)
     assert utils.count_lines(peak_file_ENCSR729LGA) == 600
-- 
GitLab