Commit 3c0a5651 authored by yy1533's avatar yy1533
Browse files

💪 demultipelxing_sam will have all the claimed bc. demultiplex-sam...

💪 demultipelxing_sam will have all the claimed bc. demultiplex-sam command has option to run in either claimed bc mode or gnerate whatever it has
parent ffe03c52
......@@ -6,6 +6,8 @@ import argparse
from celseq2.helper import print_logger
from celseq2.helper import join_path
from celseq2.demultiplex import bc_dict_id2seq, str2int
def _cell_seq (name, length=6):
# BC-TCTGAG_UMI-CGTTAC => TCTGAG
......@@ -20,6 +22,7 @@ def demultiplex_sam (samfile, outdir, bc_length):
if not samfile:
return
samobj = pysam.AlignmentFile(samfile, 'rb')
dict_samout = {}
for aln in samobj:
......@@ -36,6 +39,32 @@ def demultiplex_sam (samfile, outdir, bc_length):
fh.close()
def demultiplex_sam_with_claim (samfile, outdir, bc_length, claimed_bc):
if not samfile:
return
if not claimed_bc:
return
samobj = pysam.AlignmentFile(samfile, 'rb')
dict_samout = {}
for bc in claimed_bc:
fh = pysam.AlignmentFile(
join_path(outdir, bc + '.sam'),
'w', template=samobj)
dict_samout[bc] = fh
for aln in samobj:
bc = _cell_seq(aln.query_name, length=bc_length)
fh = dict_samout.get(bc, None)
if not fh:
continue
fh.write(aln)
for _, fh in dict_samout.items():
fh.close()
def main():
parser = argparse.ArgumentParser(add_help=True)
parser.add_argument('--sbam', type=str, metavar='FILENAME',
......@@ -45,12 +74,37 @@ def main():
default='.')
parser.add_argument('--bc-length', type=int, metavar='N',
help='Length of cell barcode.', default=6)
parser.add_argument('--claim', action='store_true', dest='claim')
parser.set_defaults(claim=False)
parser.add_argument('--bc-index', type=str, metavar='FILENAME',
help='File path to barcode dictionary.')
parser.add_argument('--bc-seq-column', type=int, metavar='N',
default=0,
help=('Column of cell barcode dictionary file '
'which tells the actual sequences.'))
parser.add_argument('--bc-index-used', type=str, metavar='string',
default='1-96',
help='Index of used barcode IDs (default=1-96)')
args = parser.parse_args()
print_logger('Demultiplexing SAM/BAM starts {} ...'.format(args.sbam))
demultiplex_sam(samfile=args.sbam, outdir=args.savetodir,
bc_length=args.bc_length)
print_logger('Demultiplexing SAM/BAM ends. See: {}.'.format(args.savetodir))
if args.claim:
all_bc_dict = bc_dict_id2seq(args.bc_index, args.bc_seq_column)
bc_index_used = str2int(args.bc_index_used)
bc_seq_used = [all_bc_dict.get(x, None) for x in bc_index_used]
demultiplex_sam_with_claim(
samfile=args.sbam,
outdir=args.savetodir,
bc_length=args.bc_length,
claimed_bc=bc_seq_used)
else:
demultiplex_sam(
samfile=args.sbam,
outdir=args.savetodir,
bc_length=args.bc_length)
print_logger('Demultiplexing SAM/BAM ends. See: {}'.format(args.savetodir))
if __name__ == "__main__":
......
......@@ -390,6 +390,7 @@ rule combo_demultiplexing_sam:
sam = dynamic(join_path(DIR_PROJ, SUBDIR_ALIGN, '{itemID}', '{bcID}.sam')),
params:
jobs = len(item_names),
claim_used_bc = True,
run:
p = Pool(params.jobs)
for item_sam in input.sam:
......@@ -397,10 +398,17 @@ rule combo_demultiplexing_sam:
demultiplex_itemID_dir = join_path(DIR_PROJ, SUBDIR_ALIGN, itemID)
mkfolder(demultiplex_itemID_dir)
item_bc_used = bc_used[item_names.index(itemID)]
cmd = 'sam-demultiplex '
cmd += ' --sbam {} '.format(item_sam)
cmd += ' --savetodir {} '.format(demultiplex_itemID_dir)
cmd += ' --bc-length {} '.format(BC_LENGTH)
if params.claim_used_bc:
cmd += ' --claim '
cmd += ' --bc-index {} '.format(BC_INDEX_FPATH)
cmd += ' --bc-seq-column {} '.format(BC_SEQ_COLUMN)
cmd += ' --bc-index-used {} '.format(item_bc_used)
p.apply_async(shell, args=(cmd,))
p.close()
p.join()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment