Skip to content
Snippets Groups Projects
Commit 9b65c0bd authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Add header to pbc file.

parent 2948ddca
Branches
Tags
No related merge requests found
...@@ -10,6 +10,7 @@ import shlex ...@@ -10,6 +10,7 @@ import shlex
import logging import logging
from multiprocessing import cpu_count from multiprocessing import cpu_count
import utils import utils
import pandas as pd
EPILOG = ''' EPILOG = '''
For more details: For more details:
...@@ -205,7 +206,8 @@ def dedup_mapped(bam, bam_basename, paired): ...@@ -205,7 +206,8 @@ def dedup_mapped(bam, bam_basename, paired):
def compute_complexity(bam, paired, bam_basename): def compute_complexity(bam, paired, bam_basename):
'''Calculate library complexity .''' '''Calculate library complexity .'''
pbc_file_qc_filename = bam_basename + "filt.nodup.pbc.qc" pbc_file_qc_filename = bam_basename + ".filt.nodup.pbc.qc"
tmp_pbc_file_qc_filename = "tmp.%s" % (pbc_file_qc_filename)
# Sort by name # Sort by name
# convert to bedPE and obtain fragment coordinates # convert to bedPE and obtain fragment coordinates
...@@ -220,6 +222,13 @@ def compute_complexity(bam, paired, bam_basename): ...@@ -220,6 +222,13 @@ def compute_complexity(bam, paired, bam_basename):
# NRF=Distinct/Total [tab] # NRF=Distinct/Total [tab]
# PBC1=OnePair/Distinct [tab] # PBC1=OnePair/Distinct [tab]
# PBC2=OnePair/TwoPair # PBC2=OnePair/TwoPair
pbc_headers = ['TotalReadPairs',
'DistinctReadPairs',
'OneReadPair',
'TwoReadPairs',
'NRF',
'PBC1',
'PBC2']
if paired: if paired:
steps = [ steps = [
...@@ -236,10 +245,16 @@ def compute_complexity(bam, paired, bam_basename): ...@@ -236,10 +245,16 @@ def compute_complexity(bam, paired, bam_basename):
"uniq -c", "uniq -c",
r"""awk 'BEGIN{mt=0;m0=0;m1=0;m2=0} ($1==1){m1=m1+1} ($1==2){m2=m2+1} {m0=m0+1} {mt=mt+$1} END{printf "%d\t%d\t%d\t%d\t%f\t%f\t%f\n",mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}'""" r"""awk 'BEGIN{mt=0;m0=0;m1=0;m2=0} ($1==1){m1=m1+1} ($1==2){m2=m2+1} {m0=m0+1} {mt=mt+$1} END{printf "%d\t%d\t%d\t%d\t%f\t%f\t%f\n",mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}'"""
]) ])
out, err = utils.run_pipe(steps, pbc_file_qc_filename) out, err = utils.run_pipe(steps, tmp_pbc_file_qc_filename)
if err: if err:
logger.error("PBC file error: %s" % (err)) logger.error("PBC file error: %s" % (err))
# Add headers
pbc_file = pd.read_csv(tmp_pbc_file_qc_filename, sep='\t', header=None,
names=pbc_headers)
pbc_file.to_csv(pbc_file_qc_filename, header=True, sep='\t', index=False)
os.remove(tmp_pbc_file_qc_filename)
def main(): def main():
args = get_args() args = get_args()
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment