diff --git a/workflow/main.nf b/workflow/main.nf index 0f4b58f8b18c15ac1398f042587cac6c9e9fc2e4..3bd2edf01fc2191bdfe863e785cc4821128350b9 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -161,9 +161,9 @@ process filterReads { set sampleId, file('*.bam'), file('*.bai'), experimentId, biosample, factor, treatment, replicate, controlId into dedupReads set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into convertReads - file '*flagstat.qc' into dedupReadsStats - file '*pbc.qc' into dedupReadsComplexity - file '*dup.qc' into dupReads + file '*.flagstat.qc' into dedupReadsStats + file '*.pbc.qc' into dedupReadsComplexity + file '*.dedup.qc' into dupReads script: diff --git a/workflow/scripts/map_qc.py b/workflow/scripts/map_qc.py index 4cc549fa1652da3403b0d527b9127273b58e465c..78b9298282a59be5c94d7a62d544fb4aebb21deb 100644 --- a/workflow/scripts/map_qc.py +++ b/workflow/scripts/map_qc.py @@ -169,7 +169,7 @@ def dedup_mapped(bam, bam_basename, paired): # Remove duplicates - final_bam_prefix = bam_basename + ".filt.nodup" + final_bam_prefix = bam_basename + ".dedup" final_bam_filename = final_bam_prefix + ".bam" if paired: # paired-end data @@ -206,7 +206,7 @@ def dedup_mapped(bam, bam_basename, paired): def compute_complexity(bam, paired, bam_basename): '''Calculate library complexity .''' - pbc_file_qc_filename = bam_basename + ".filt.nodup.pbc.qc" + pbc_file_qc_filename = bam_basename + ".pbc.qc" tmp_pbc_file_qc_filename = "tmp.%s" % (pbc_file_qc_filename) # Sort by name diff --git a/workflow/tests/test_map_qc.py b/workflow/tests/test_map_qc.py index 78e3b39d27f4762ef2c65f6ace51a39c7dbca568..5ae8218f33fb6a339aebffcb97598ff05793cf01 100644 --- a/workflow/tests/test_map_qc.py +++ b/workflow/tests/test_map_qc.py @@ -10,13 +10,14 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend def test_map_qc_singleend(): - assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.filt.nodup.bam')) - assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.filt.nodup.bam.bai')) - filtered_reads_report = test_output_path + 'ENCFF646LXU.filt.nodup.flagstat.qc' + assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.bam')) + assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.bam.bai')) + assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.qc')) + filtered_reads_report = test_output_path + 'ENCLB831RUI.dedup.flagstat.qc' samtools_report = open(filtered_reads_report).readlines() assert '64962570 + 0 in total' in samtools_report[0] assert '64962570 + 0 mapped (100.00%:N/A)' in samtools_report[4] - library_complexity = test_output_path + 'ENCFF646LXU.filt.nodup.pbc.qc' + library_complexity = test_output_path + 'ENCLB831RUI.pbc.qc' df_library_complexity = pd.read_csv(library_complexity, sep='\t') assert df_library_complexity["NRF"].iloc[0] == 0.926192 assert df_library_complexity["PBC1"].iloc[0] == 0.926775 @@ -25,13 +26,14 @@ def test_map_qc_singleend(): @pytest.mark.pairedend def test_map_qc_pairedend(): - assert os.path.exists(os.path.join(test_output_path, 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.bam')) - assert os.path.exists(os.path.join(test_output_path, 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.bambai')) - filtered_reads_report = test_output_path + 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.flagstat.qc' + assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.bam')) + assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.bam.bai')) + assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.qc')) + filtered_reads_report = test_output_path + 'ENCLB568IYX.dedup.flagstat.qc' samtools_report = open(filtered_reads_report).readlines() assert '47389080 + 0 in total' in samtools_report[0] assert '47389080 + 0 mapped (100.00%:N/A)' in samtools_report[4] - library_complexity = test_output_path + 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.pbc.qc' + library_complexity = test_output_path + 'ENCLB568IYX.pbc.qc' df_library_complexity = pd.read_csv(library_complexity, sep='\t') assert df_library_complexity["NRF"].iloc[0] == 0.947064 assert df_library_complexity["PBC1"].iloc[0] == 0.946724