Commit c69fb44b authored by yy1533's avatar yy1533
Browse files

🍻 release QC plots

parent e07f8cf7
......@@ -86,9 +86,13 @@ def plotly_qc(fpath, saveto, sep=',', name=''):
num_detected_genes = (expr > 0).sum(axis=0)
mt_index = pd.Series(
[x for x in expr.index if x.startswith('mt-') or x.startswith('MT-')])
mt_umis = expr.loc[mt_index, :].sum(axis=0)
percent_mt = mt_umis / total_num_UMIs
percent_mt = percent_mt.replace(np.inf, 0)
if not mt_index:
percent_mt = 0
else:
mt_umis = expr.loc[mt_index, :].sum(axis=0)
percent_mt = mt_umis / total_num_UMIs
percent_mt = percent_mt.replace(np.inf, 0)
qc = pd.DataFrame(dict(total_num_UMIs=total_num_UMIs,
num_detected_genes=num_detected_genes,
percent_mt=percent_mt))
......@@ -109,7 +113,7 @@ def plotly_qc(fpath, saveto, sep=',', name=''):
x=qc.total_num_UMIs,
y=qc.percent_mt,
xlab='#Total UMIs (median={})'.format(qc.total_num_UMIs.median()),
ylab='%MT (median={:6.4f})'.format(qc.percent_mt.median()),
ylab='MT Fraction (median={:6.4f})'.format(qc.percent_mt.median()),
main=name,
hover_text=qc.index.values)
plotly_mt_vs_umi.layout.yaxis.scaleanchor = None
......@@ -188,8 +192,12 @@ def plotly_qc_st(fpath, saveto, sep='\t', name=''):
ST_detected_genes = (ST.iloc[:, 2:] > 0).sum(axis=1)
mt_cols = [x for x in ST.columns if x.startswith(
'mt-') or x.startswith('MT-')]
ST_percent_mt = ST[mt_cols].sum(axis=1) / ST_total_UMIs
ST_percent_mt.replace(np.inf, 0)
if not mt_cols:
ST_percent_mt = 0
else:
ST_percent_mt = ST[mt_cols].sum(axis=1) / ST_total_UMIs
ST_percent_mt.replace(np.inf, 0)
ST_qc = pd.DataFrame(
dict(Row=ST.Row, Col=ST.Col,
total_num_UMIs=ST_total_UMIs,
......@@ -247,8 +255,8 @@ def main():
help=('file path (CSV/TSV) to the expression file with genes/features '
'as rows and cells/samples on columns. '
'First column saves gene names.'))
parser.add_argument('saveto', type=str, metavar='FILENAME')
parser.add_argument('saveto', type=str, metavar='FILENAME',
help='File path (html) to save the QC plots.')
parser.add_argument('--name', type=str, metavar='STR', default='')
parser.add_argument('--sep', type=str, default='\t',
help='File sep (default: \'\t\')')
......
......@@ -104,12 +104,13 @@ SUBDIR_LOG = 'small_log'
SUBDIR_QSUB = 'qsub_log'
SUBDIR_ANNO = 'annotation'
SUBDIR_REPORT = 'report'
SUBDIR_QC_EXPR = 'qc_expr'
SUBDIRS = [SUBDIR_INPUT,
SUBDIR_FASTQ, SUBDIR_ALIGN, SUBDIR_ALIGN_ITEM,
SUBDIR_UMI_CNT, SUBDIR_UMI_SET,
SUBDIR_EXPR,
SUBDIR_REPORT,
SUBDIR_REPORT, SUBDIR_QC_EXPR,
SUBDIR_LOG, SUBDIR_QSUB, SUBDIR_ANNO
]
......@@ -128,9 +129,7 @@ workdir: DIR_PROJ
if RUN_CELSEQ2_TO_ST:
rule all:
input:
# '_done_annotation',
'_done_UMI',
# '_done_report',
'_done_ST',
output:
touch('_DONE'),
......@@ -149,9 +148,7 @@ if RUN_CELSEQ2_TO_ST:
else:
rule all:
input:
# '_done_annotation',
'_done_UMI',
# '_done_report',
output:
touch('_DONE'),
run:
......@@ -173,32 +170,36 @@ rule COUNT_MATRIX:
expid=list(set(sample_list))),
hdf = expand(join_path(DIR_PROJ, SUBDIR_EXPR, '{expid}', 'expr.h5'),
expid=list(set(sample_list))),
html = expand(join_path(DIR_PROJ, SUBDIR_QC_EXPR,
'{expid}', 'QC.html'),
expid=list(set(sample_list))),
output:
touch('_done_UMI')
message: 'Finished counting UMI-count matrix.'
run:
# if ALIGNER == 'star':
# shell('rm {}'.format('_done_star_genome_loaded'))
# print('Free memory loaded by STAR', flush=True)
# with tempfile.TemporaryDirectory() as tmpdirname:
# cmd = 'STAR '
# cmd += '--genomeLoad Remove '
# cmd += '--genomeDir {STAR_INDEX_DIR} '
# cmd += '--outFileNamePrefix '
# cmd += join_path(tmpdirname, '')
# shell(cmd)
if glob.glob('celseq2_job*.sh*'):
mkfolder(SUBDIR_QSUB)
shell('mv -f celseq2_job*.sh* {}'.format(SUBDIR_QSUB))
print_logger('UMI-count matrix is saved at {}'.format(input.csv))
rule QC_COUNT_MATRIX:
input:
html = expand(join_path(DIR_PROJ, SUBDIR_QC_EXPR,
'{expid}', 'QC.html'),
expid=list(set(sample_list))),
message: 'Finished QC UMI-count matrix.'
# join_path(DIR_PROJ, SUBDIR_QC_EXPR, '{expid}', 'QC_ST.html')
if RUN_CELSEQ2_TO_ST:
rule CELSEQ2_TO_ST:
input:
tsv = expand(join_path(DIR_PROJ, SUBDIR_ST, '{expid}', 'ST.tsv'),
expid=list(set(sample_list))),
html = expand(join_path(DIR_PROJ, SUBDIR_QC_EXPR,
'{expid}', 'QC_ST.html'),
expid=list(set(sample_list))),
message: 'Convert to ST format.'
output:
touch('_done_ST')
......@@ -207,7 +208,6 @@ if RUN_CELSEQ2_TO_ST:
mkfolder(SUBDIR_QSUB)
shell('mv -f celseq2_job*.sh* {}'.format(SUBDIR_QSUB))
rule _celseq2_to_st:
input:
hdf = join_path(DIR_PROJ, SUBDIR_EXPR, '{expid}', 'expr.h5'),
......@@ -228,6 +228,20 @@ if RUN_CELSEQ2_TO_ST:
cmd += ' --exclude-nondetected-genes '
shell(cmd)
rule qc_umi_matrix_per_experiment_ST:
input:
tsv = join_path(DIR_PROJ, SUBDIR_ST, '{expid}', 'ST.tsv'),
output:
html = join_path(DIR_PROJ, SUBDIR_QC_EXPR, '{expid}', 'QC_ST.html'),
params:
expid = '{expid}',
run:
cmd = 'celseq2-qc '
cmd += '{input.tsv} {output.html} '
cmd += '--name {params.expid} '
cmd += '--st'
shell(cmd)
# rule REPORT_ALIGNMENT_LOG:
# input:
......@@ -570,7 +584,19 @@ rule summarize_umi_matrix_per_experiment:
expr_df.to_hdf(join_path(DIR_PROJ, SUBDIR_EXPR,
exp_id, 'expr.h5'), 'table')
rule qc_umi_matrix_per_experiment:
input:
csv = join_path(DIR_PROJ, SUBDIR_EXPR, '{expid}', 'expr.csv'),
output:
html = join_path(DIR_PROJ, SUBDIR_QC_EXPR, '{expid}', 'QC.html'),
params:
expid = '{expid}',
run:
cmd = 'celseq2-qc '
cmd += '{input.csv} {output.html} '
cmd += '--name {params.expid} '
cmd += '--sep {}'.format(',')
shell(cmd)
# rule report_alignment_log:
# input:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment