Skip to content
Snippets Groups Projects
Commit cf00e0e5 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Update file names and output for filter reads.

parent e57b3605
1 merge request!20Resolve "Use SampleIds/ Experiment Id as file names throughtout pipeline"
......@@ -161,9 +161,9 @@ process filterReads {
set sampleId, file('*.bam'), file('*.bai'), experimentId, biosample, factor, treatment, replicate, controlId into dedupReads
set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into convertReads
file '*flagstat.qc' into dedupReadsStats
file '*pbc.qc' into dedupReadsComplexity
file '*dup.qc' into dupReads
file '*.flagstat.qc' into dedupReadsStats
file '*.pbc.qc' into dedupReadsComplexity
file '*.dedup.qc' into dupReads
script:
......
......@@ -169,7 +169,7 @@ def dedup_mapped(bam, bam_basename, paired):
# Remove duplicates
final_bam_prefix = bam_basename + ".filt.nodup"
final_bam_prefix = bam_basename + ".dedup"
final_bam_filename = final_bam_prefix + ".bam"
if paired: # paired-end data
......@@ -206,7 +206,7 @@ def dedup_mapped(bam, bam_basename, paired):
def compute_complexity(bam, paired, bam_basename):
'''Calculate library complexity .'''
pbc_file_qc_filename = bam_basename + ".filt.nodup.pbc.qc"
pbc_file_qc_filename = bam_basename + ".pbc.qc"
tmp_pbc_file_qc_filename = "tmp.%s" % (pbc_file_qc_filename)
# Sort by name
......
......@@ -10,13 +10,14 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.singleend
def test_map_qc_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.filt.nodup.bam'))
assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.filt.nodup.bam.bai'))
filtered_reads_report = test_output_path + 'ENCFF646LXU.filt.nodup.flagstat.qc'
assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.bam'))
assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.bam.bai'))
assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.qc'))
filtered_reads_report = test_output_path + 'ENCLB831RUI.dedup.flagstat.qc'
samtools_report = open(filtered_reads_report).readlines()
assert '64962570 + 0 in total' in samtools_report[0]
assert '64962570 + 0 mapped (100.00%:N/A)' in samtools_report[4]
library_complexity = test_output_path + 'ENCFF646LXU.filt.nodup.pbc.qc'
library_complexity = test_output_path + 'ENCLB831RUI.pbc.qc'
df_library_complexity = pd.read_csv(library_complexity, sep='\t')
assert df_library_complexity["NRF"].iloc[0] == 0.926192
assert df_library_complexity["PBC1"].iloc[0] == 0.926775
......@@ -25,13 +26,14 @@ def test_map_qc_singleend():
@pytest.mark.pairedend
def test_map_qc_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.bam'))
assert os.path.exists(os.path.join(test_output_path, 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.bambai'))
filtered_reads_report = test_output_path + 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.flagstat.qc'
assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.bam'))
assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.bam.bai'))
assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.qc'))
filtered_reads_report = test_output_path + 'ENCLB568IYX.dedup.flagstat.qc'
samtools_report = open(filtered_reads_report).readlines()
assert '47389080 + 0 in total' in samtools_report[0]
assert '47389080 + 0 mapped (100.00%:N/A)' in samtools_report[4]
library_complexity = test_output_path + 'ENCFF293YFE_val_2ENCFF330MCZ_val_1.filt.nodup.pbc.qc'
library_complexity = test_output_path + 'ENCLB568IYX.pbc.qc'
df_library_complexity = pd.read_csv(library_complexity, sep='\t')
assert df_library_complexity["NRF"].iloc[0] == 0.947064
assert df_library_complexity["PBC1"].iloc[0] == 0.946724
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment