Skip to content
Snippets Groups Projects
Commit 9b65c0bd authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Add header to pbc file.

parent 2948ddca
Branches
Tags
1 merge request!6Resolve "Bam Stats and Filter"
...@@ -10,6 +10,7 @@ import shlex ...@@ -10,6 +10,7 @@ import shlex
import logging import logging
from multiprocessing import cpu_count from multiprocessing import cpu_count
import utils import utils
import pandas as pd
EPILOG = ''' EPILOG = '''
For more details: For more details:
...@@ -205,7 +206,8 @@ def dedup_mapped(bam, bam_basename, paired): ...@@ -205,7 +206,8 @@ def dedup_mapped(bam, bam_basename, paired):
def compute_complexity(bam, paired, bam_basename): def compute_complexity(bam, paired, bam_basename):
'''Calculate library complexity .''' '''Calculate library complexity .'''
pbc_file_qc_filename = bam_basename + "filt.nodup.pbc.qc" pbc_file_qc_filename = bam_basename + ".filt.nodup.pbc.qc"
tmp_pbc_file_qc_filename = "tmp.%s" % (pbc_file_qc_filename)
# Sort by name # Sort by name
# convert to bedPE and obtain fragment coordinates # convert to bedPE and obtain fragment coordinates
...@@ -220,6 +222,13 @@ def compute_complexity(bam, paired, bam_basename): ...@@ -220,6 +222,13 @@ def compute_complexity(bam, paired, bam_basename):
# NRF=Distinct/Total [tab] # NRF=Distinct/Total [tab]
# PBC1=OnePair/Distinct [tab] # PBC1=OnePair/Distinct [tab]
# PBC2=OnePair/TwoPair # PBC2=OnePair/TwoPair
pbc_headers = ['TotalReadPairs',
'DistinctReadPairs',
'OneReadPair',
'TwoReadPairs',
'NRF',
'PBC1',
'PBC2']
if paired: if paired:
steps = [ steps = [
...@@ -236,10 +245,16 @@ def compute_complexity(bam, paired, bam_basename): ...@@ -236,10 +245,16 @@ def compute_complexity(bam, paired, bam_basename):
"uniq -c", "uniq -c",
r"""awk 'BEGIN{mt=0;m0=0;m1=0;m2=0} ($1==1){m1=m1+1} ($1==2){m2=m2+1} {m0=m0+1} {mt=mt+$1} END{printf "%d\t%d\t%d\t%d\t%f\t%f\t%f\n",mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}'""" r"""awk 'BEGIN{mt=0;m0=0;m1=0;m2=0} ($1==1){m1=m1+1} ($1==2){m2=m2+1} {m0=m0+1} {mt=mt+$1} END{printf "%d\t%d\t%d\t%d\t%f\t%f\t%f\n",mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}'"""
]) ])
out, err = utils.run_pipe(steps, pbc_file_qc_filename) out, err = utils.run_pipe(steps, tmp_pbc_file_qc_filename)
if err: if err:
logger.error("PBC file error: %s" % (err)) logger.error("PBC file error: %s" % (err))
# Add headers
pbc_file = pd.read_csv(tmp_pbc_file_qc_filename, sep='\t', header=None,
names=pbc_headers)
pbc_file.to_csv(pbc_file_qc_filename, header=True, sep='\t', index=False)
os.remove(tmp_pbc_file_qc_filename)
def main(): def main():
args = get_args() args = get_args()
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment