From cf00e0e580f6fff183be01f5a7cc52128091acc3 Mon Sep 17 00:00:00 2001 From: Venkat Malladi <venkat.malladi@utsouthwestern.edu> Date: Sun, 6 Jan 2019 19:30:21 -0600 Subject: [PATCH] Update file names and output for filter reads. --- workflow/main.nf | 6 +++--- workflow/scripts/map_qc.py | 4 ++-- workflow/tests/test_map_qc.py | 18 ++++++++++-------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/workflow/main.nf b/workflow/main.nf index 0f4b58f..3bd2edf 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -161,9 +161,9 @@ process filterReads { set sampleId, file('*.bam'), file('*.bai'), experimentId, biosample, factor, treatment, replicate, controlId into dedupReads set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into convertReads - file '*flagstat.qc' into dedupReadsStats - file '*pbc.qc' into dedupReadsComplexity - file '*dup.qc' into dupReads + file '*.flagstat.qc' into dedupReadsStats + file '*.pbc.qc' into dedupReadsComplexity + file '*.dedup.qc' into dupReads script: diff --git a/workflow/scripts/map_qc.py b/workflow/scripts/map_qc.py index 4cc549f..78b9298 100644 --- a/workflow/scripts/map_qc.py +++ b/workflow/scripts/map_qc.py @@ -169,7 +169,7 @@ def dedup_mapped(bam, bam_basename, paired): # Remove duplicates - final_bam_prefix = bam_basename + ".filt.nodup" + final_bam_prefix = bam_basename + ".dedup" final_bam_filename = final_bam_prefix + ".bam" if paired: # paired-end data @@ -206,7 +206,7 @@ def dedup_mapped(bam, bam_basename, paired): def compute_complexity(bam, paired, bam_basename): '''Calculate library complexity .''' - pbc_file_qc_filename = bam_basename + ".filt.nodup.pbc.qc" + pbc_file_qc_filename = bam_basename + ".pbc.qc" tmp_pbc_file_qc_filename = "tmp.%s" % (pbc_file_qc_filename) # Sort by name diff --git a/workflow/tests/test_map_qc.py b/workflow/tests/test_map_qc.py index 78e3b39..5ae8218 100644 --- a/workflow/tests/test_map_qc.py +++ b/workflow/tests/test_map_qc.py @@ -10,13 +10,14 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend def test_map_qc_singleend(): - assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.filt.nodup.bam')) - assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.filt.nodup.bam.bai')) - filtered_reads_report = test_output_path + 'ENCFF646LXU.filt.nodup.flagstat.qc' + assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.bam')) + assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.bam.bai')) + assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.qc')) + filtered_reads_report = test_output_path + 'ENCLB831RUI.dedup.flagstat.qc' samtools_report = open(filtered_reads_report).readlines() assert '64962570 + 0 in total' in samtools_report[0] assert '64962570 + 0 mapped (100.00%:N/A)' in samtools_report[4] - library_complexity = test_output_path + 'ENCFF646LXU.filt.nodup.pbc.qc' + library_complexity = test_output_path + 'ENCLB831RUI.pbc.qc' df_library_complexity = pd.read_csv(library_complexity, sep='\t') assert df_library_complexity["NRF"].iloc[0] == 0.926192 assert df_library_complexity["PBC1"].iloc[0] == 0.926775 @@ -25,13 +26,14 @@ def test_map_qc_singleend(): @pytest.mark.pairedend def test_map_qc_pairedend(): - assert os.path.exists(os.path.join(test_output_path, 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.bam')) - assert os.path.exists(os.path.join(test_output_path, 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.bambai')) - filtered_reads_report = test_output_path + 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.flagstat.qc' + assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.bam')) + assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.bam.bai')) + assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.qc')) + filtered_reads_report = test_output_path + 'ENCLB568IYX.dedup.flagstat.qc' samtools_report = open(filtered_reads_report).readlines() assert '47389080 + 0 in total' in samtools_report[0] assert '47389080 + 0 mapped (100.00%:N/A)' in samtools_report[4] - library_complexity = test_output_path + 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.pbc.qc' + library_complexity = test_output_path + 'ENCLB568IYX.pbc.qc' df_library_complexity = pd.read_csv(library_complexity, sep='\t') assert df_library_complexity["NRF"].iloc[0] == 0.947064 assert df_library_complexity["PBC1"].iloc[0] == 0.946724 -- GitLab