From cf00e0e580f6fff183be01f5a7cc52128091acc3 Mon Sep 17 00:00:00 2001
From: Venkat Malladi <venkat.malladi@utsouthwestern.edu>
Date: Sun, 6 Jan 2019 19:30:21 -0600
Subject: [PATCH] Update file names and output for filter reads.

---
 workflow/main.nf              |  6 +++---
 workflow/scripts/map_qc.py    |  4 ++--
 workflow/tests/test_map_qc.py | 18 ++++++++++--------
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/workflow/main.nf b/workflow/main.nf
index 0f4b58f..3bd2edf 100644
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -161,9 +161,9 @@ process filterReads {
 
   set sampleId, file('*.bam'), file('*.bai'), experimentId, biosample, factor, treatment, replicate, controlId into dedupReads
   set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into convertReads
-  file '*flagstat.qc' into dedupReadsStats
-  file '*pbc.qc' into dedupReadsComplexity
-  file '*dup.qc' into dupReads
+  file '*.flagstat.qc' into dedupReadsStats
+  file '*.pbc.qc' into dedupReadsComplexity
+  file '*.dedup.qc' into dupReads
 
   script:
 
diff --git a/workflow/scripts/map_qc.py b/workflow/scripts/map_qc.py
index 4cc549f..78b9298 100644
--- a/workflow/scripts/map_qc.py
+++ b/workflow/scripts/map_qc.py
@@ -169,7 +169,7 @@ def dedup_mapped(bam, bam_basename, paired):
 
 
     # Remove duplicates
-    final_bam_prefix = bam_basename + ".filt.nodup"
+    final_bam_prefix = bam_basename + ".dedup"
     final_bam_filename = final_bam_prefix + ".bam"
 
     if paired:  # paired-end data
@@ -206,7 +206,7 @@ def dedup_mapped(bam, bam_basename, paired):
 def compute_complexity(bam, paired, bam_basename):
     '''Calculate library complexity .'''
 
-    pbc_file_qc_filename = bam_basename + ".filt.nodup.pbc.qc"
+    pbc_file_qc_filename = bam_basename + ".pbc.qc"
     tmp_pbc_file_qc_filename = "tmp.%s" % (pbc_file_qc_filename)
 
     # Sort by name
diff --git a/workflow/tests/test_map_qc.py b/workflow/tests/test_map_qc.py
index 78e3b39..5ae8218 100644
--- a/workflow/tests/test_map_qc.py
+++ b/workflow/tests/test_map_qc.py
@@ -10,13 +10,14 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 
 @pytest.mark.singleend
 def test_map_qc_singleend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.filt.nodup.bam'))
-    assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.filt.nodup.bam.bai'))
-    filtered_reads_report = test_output_path + 'ENCFF646LXU.filt.nodup.flagstat.qc'
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.bam'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.bam.bai'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.qc'))
+    filtered_reads_report = test_output_path + 'ENCLB831RUI.dedup.flagstat.qc'
     samtools_report = open(filtered_reads_report).readlines()
     assert '64962570 + 0 in total' in samtools_report[0]
     assert '64962570 + 0 mapped (100.00%:N/A)' in samtools_report[4]
-    library_complexity = test_output_path + 'ENCFF646LXU.filt.nodup.pbc.qc'
+    library_complexity = test_output_path + 'ENCLB831RUI.pbc.qc'
     df_library_complexity = pd.read_csv(library_complexity, sep='\t')
     assert  df_library_complexity["NRF"].iloc[0] == 0.926192
     assert  df_library_complexity["PBC1"].iloc[0] == 0.926775
@@ -25,13 +26,14 @@ def test_map_qc_singleend():
 
 @pytest.mark.pairedend
 def test_map_qc_pairedend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.bam'))
-    assert os.path.exists(os.path.join(test_output_path, 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.bambai'))
-    filtered_reads_report = test_output_path + 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.flagstat.qc'
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.bam'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.bam.bai'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.qc'))
+    filtered_reads_report = test_output_path + 'ENCLB568IYX.dedup.flagstat.qc'
     samtools_report = open(filtered_reads_report).readlines()
     assert '47389080 + 0 in total' in samtools_report[0]
     assert '47389080 + 0 mapped (100.00%:N/A)' in samtools_report[4]
-    library_complexity = test_output_path + 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.pbc.qc'
+    library_complexity = test_output_path + 'ENCLB568IYX.pbc.qc'
     df_library_complexity = pd.read_csv(library_complexity, sep='\t')
     assert  df_library_complexity["NRF"].iloc[0] == 0.947064
     assert  df_library_complexity["PBC1"].iloc[0] == 0.946724
-- 
GitLab