Commit 9e4ced0c authored by yy1533's avatar yy1533
Browse files

🍺 option to save/unsave the fastq from unknown cell barcodes

parent 38f471c1
......@@ -77,6 +77,7 @@ def demultiplexing(read1_fpath, read2_fpath, dict_bc_id2seq,
len_umi=6, len_bc=6, len_tx=35,
bc_qual_min=10,
is_gzip=True,
save_unknown_bc_fastq=False,
do_bc_rev_complement=False,
do_tx_rev_complement=False,
verbose=False):
......@@ -153,12 +154,13 @@ def demultiplexing(read1_fpath, read2_fpath, dict_bc_id2seq,
try:
fhout = bc_fhout[cell_bc]
except KeyError:
fhout = bc_fhout['UNKNOWNBC_R1']
fhout.write('{}\n{}\n{}\n{}\n'.format(umibc_name, umibc_seq,
"+", umibc_qualstr))
fhout = bc_fhout['UNKNOWNBC_R2']
fhout.write('{}\n{}\n{}\n{}\n'.format(tx_name, tx_seq,
"+", tx_qualstr))
if save_unknown_bc_fastq:
fhout = bc_fhout['UNKNOWNBC_R1']
fhout.write('{}\n{}\n{}\n{}\n'.format(umibc_name, umibc_seq,
"+", umibc_qualstr))
fhout = bc_fhout['UNKNOWNBC_R2']
fhout.write('{}\n{}\n{}\n{}\n'.format(tx_name, tx_seq,
"+", tx_qualstr))
sample_counter['unknown'] += 1
continue
......@@ -247,6 +249,9 @@ def main():
help='Length of CELSeq barcode (default=6)')
parser.add_argument('--cut-length', metavar='N', type=int, default=35,
help='Length of read on R2 to be mapped. (default=35)')
parser.add_argument('--save-unknown-bc-fastq',
dest='save_unknown_bc_fastq', action='store_true')
parser.set_defaults(save_unknown_bc_fastq=False)
parser.add_argument('--verbose', dest='verbose', action='store_true')
parser.set_defaults(verbose=False)
......@@ -269,6 +274,7 @@ def main():
len_tx=args.cut_length,
bc_qual_min=args.min_bc_quality,
is_gzip=args.is_gzip,
save_unknown_bc_fastq=args.save_unknown_bc_fastq,
do_bc_rev_complement=False,
do_tx_rev_complement=False,
verbose=args.verbose)
......
## CEL-seq2 Tech Setting ##
####################################
## Tech Setting
####################################
BC_INDEX_FPATH: '/absolute/path/to/wonderful_umi.tab'
BC_SEQ_COLUMN: 1
BC_SEQ_COLUMN: 0
BC_IDs_DEFAULT: '1-96'
UMI_START_POSITION: 0
UMI_LENGTH: 6
BC_START_POSITION: 6
BC_LENGTH: 6
## Tools ##
# 'bowtie2' 'star'
####################################
## Alignment Tools
####################################
## Which RNA-seq aligner to use? 'bowtie2' or 'star'
ALIGNER: 'bowtie2'
BOWTIE2_INDEX_PREFIX: '/absolute/path/to/bowtie2_index'
## What is the absolute path to command bowtie2?
BOWTIE2: '/absolute/path/to/bowtie2'
STAR_INDEX_DIR: '/absolute/path/to/star/folder'
## What is the sharef prefix of bowtie2 index?
BOWTIE2_INDEX_PREFIX: '/absolute/path/to/bowtie2_index'
## What is the absolute path to command STAR?
STAR: '/absolute/path/to/star'
## Whare is the directory to save STAR index?
STAR_INDEX_DIR: '/absolute/path/to/star/folder/'
####################################
## Annotations ##
# Path to GTF/GFF file
####################################
## Where is the GTF.gz/GFF.gz/GTF/GFF file?
GFF: '/absolute/path/to/wonderful.gtf.gz'
# Refer: http://htseq.readthedocs.io/en/master/count.html
## Refer: http://htseq.readthedocs.io/en/master/count.html
## What is considered the "gene"? 'gene_name' or 'gene_id'?
FEATURE_ID: 'gene_name'
## What is the content of the "gene"? Mostly 'exon'.
FEATURE_CONTENT: 'exon'
# GENE_BIOTYPE: ['protein_coding', 'lincRNA'] If nothing is set, report all.
## Which type of genes to report?
GENE_BIOTYPE:
- 'protein_coding'
- 'lincRNA'
## If nothing set as below, all genes are reported.
## GENE_BIOTYPE:
## Demultiplexing ##
####################################
## Demultiplexing
####################################
FASTQ_QUAL_MIN_OF_BC: 10
CUT_LENGTH: 35
SAVE_UNKNOWN_BC_FASTQ: false
## Alignment ##
## UMI Count ##
####################################
## UMI Count
####################################
ALN_QUAL_MIN: 0
## Running Parameters ##
####################################
## Running Parameters
####################################
num_threads: 15
verbose: True
......@@ -54,6 +54,7 @@ if not GENE_BIOTYPE: GENE_BIOTYPE = ();
## Demultiplexing ##
FASTQ_QUAL_MIN_OF_BC = config.get('FASTQ_QUAL_MIN_OF_BC', None) # 10
CUT_LENGTH = config.get('CUT_LENGTH', None) # 35
SAVE_UNKNOWN_BC_FASTQ = config.get('SAVE_UNKNOWN_BC_FASTQ', False) # False
## Alignment ##
ALIGNER = config.get('ALIGNER', None) # 'bowtie2', 'star'
assert (ALIGNER), 'Error: Specify aligner.'
......@@ -241,7 +242,9 @@ rule combo_demultiplexing:
output:
fq = dynamic(join_path(DIR_PROJ, SUBDIR_FASTQ, '{itemid}', '{bc}.fastq')),
message: 'Performing combo-demultiplexing'
params: jobs = len(item_names)
params:
jobs = len(item_names),
save_unknown_bc_fastq = SAVE_UNKNOWN_BC_FASTQ,
run:
# Demultiplx fastq in Process pool
p = Pool(params.jobs)
......@@ -275,6 +278,9 @@ rule combo_demultiplexing:
"--out-dir {}".format(itemid_fqs_dir),
"--is-gzip ",
"--stats-file {}".format(itemid_log)])
if params.save_unknown_bc_fastq:
cmd += ' --save-unknown-bc-fastq '
p.apply_async(shell, args=(cmd,))
p.close()
p.join()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment