Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Holly Ruess
celseq2
Commits
5f9bc606
Commit
5f9bc606
authored
May 09, 2018
by
yy1533
Browse files
🍻
plotly graph on the alignment stats
parent
0493caa7
Changes
3
Hide whitespace changes
Inline
Side-by-side
celseq2/count_umi.py
View file @
5f9bc606
...
...
@@ -8,6 +8,10 @@ import HTSeq
import
pickle
import
argparse
from
collections
import
defaultdict
,
Counter
import
plotly.graph_objs
as
go
from
plotly.offline
import
plot
import
pandas
as
pd
from
celseq2.helper
import
base_name
def
invert_strand
(
iv
):
...
...
@@ -102,6 +106,64 @@ def _flatten_umi_set(umi_set):
# pass
def
plotly_alignment_stats
(
fpaths
=
[],
saveto
=
''
,
fnames
=
[]):
'''
Save a plotly box graph with a list of alignment stats files
Parameters
----------
fpaths : list
A list of file paths
saveto : str
File path to save the html file as the plotly box graph
fnames : list
A list of strings to label each ``fpaths``
Returns
-------
bool
True if saving successfully, False otherwise
'''
if
not
fnames
:
fnames
=
[
base_name
(
f
)
for
f
in
fpaths
]
if
len
(
fnames
)
!=
len
(
fpaths
):
fnames
=
[
base_name
(
f
)
for
f
in
fpaths
]
trace_data
=
[]
# aln_diagnose_item = ["_unmapped",
# "_low_map_qual", '_multimapped', "_uniquemapped",
# "_no_feature", "_ambiguous",
# "_total"]
for
i
in
range
(
len
(
fpaths
)):
f
=
fpaths
[
i
]
fname
=
fnames
[
i
]
stats
=
pd
.
read_csv
(
f
,
index_col
=
0
)
mapped
=
stats
.
loc
[
'_multimapped'
,
:]
+
stats
.
loc
[
'_uniquemapped'
,
:]
rate_mapped
=
mapped
/
stats
.
loc
[
'_total'
,
:]
overall_mapped
=
mapped
.
sum
()
overall_total
=
stats
.
loc
[
'_total'
,
:].
sum
()
stats
.
fillna
(
value
=
0
,
inplace
=
True
)
trace_data
.
append
(
go
.
Box
(
y
=
rate_mapped
,
name
=
'{} (#Mapped={}/#Total={})'
.
format
(
fname
,
overall_mapped
,
overall_total
)))
layout
=
go
.
Layout
(
xaxis
=
dict
(
showticklabels
=
False
),
title
=
'Mapped/Total alignments per BC per item'
)
fig
=
go
.
Figure
(
data
=
trace_data
,
layout
=
layout
)
try
:
plot
(
fig
,
filename
=
saveto
,
auto_open
=
False
)
return
(
True
)
except
Exception
as
e
:
print
(
e
,
flush
=
True
)
return
(
False
)
def
main
():
parser
=
argparse
.
ArgumentParser
(
add_help
=
True
)
parser
.
add_argument
(
'--sam_fpath'
,
type
=
str
,
metavar
=
'FILENAME'
,
...
...
celseq2/demultiplex.py
View file @
5f9bc606
...
...
@@ -258,7 +258,7 @@ def plotly_demultiplexing_stats(fpaths=[], saveto='', fnames=[]):
overall_stats
.
loc
[
'total'
,
'Reads(#)'
])))
layout
=
go
.
Layout
(
legend
=
dict
(
x
=-
.
1
,
y
=-
.
2
),
#
legend=dict(x=-.1, y=-.2),
xaxis
=
dict
(
showticklabels
=
False
),
title
=
'Number of reads saved per BC per item'
)
fig
=
go
.
Figure
(
data
=
num_reads_data
,
layout
=
layout
)
...
...
celseq2/workflow/celseq2_beta.snakemake
View file @
5f9bc606
...
...
@@ -96,6 +96,7 @@ SUBDIR_ALIGN = 'small_sam'
SUBDIR_ALIGN_ITEM = 'item_sam'
SUBDIR_UMI_CNT = 'small_umi_count'
SUBDIR_UMI_SET = 'small_umi_set'
SUBDIR_ALN_STATS = 'small_aln_stats'
SUBDIR_EXPR = 'expr'
SUBDIR_ST = 'ST'
SUBDIR_LOG = 'small_log'
...
...
@@ -106,16 +107,12 @@ SUBDIR_QC_EXPR = 'qc_expr'
SUBDIRS = [SUBDIR_INPUT,
SUBDIR_FASTQ, SUBDIR_ALIGN, SUBDIR_ALIGN_ITEM,
SUBDIR_UMI_CNT, SUBDIR_UMI_SET,
SUBDIR_UMI_CNT, SUBDIR_UMI_SET,
SUBDIR_ALN_STATS,
SUBDIR_EXPR,
SUBDIR_REPORT, SUBDIR_QC_EXPR,
SUBDIR_LOG, SUBDIR_QSUB, SUBDIR_ANNO
]
# aln_diagnose_item = ["_unmapped",
# "_low_map_qual", '_multimapped', "_uniquemapped",
# "_no_feature", "_ambiguous",
# "_total"]
'''
Part-2: Snakemake rules
...
...
@@ -145,6 +142,7 @@ if RUN_CELSEQ2_TO_ST:
rmfolder(SUBDIR_LOG)
rmfolder(SUBDIR_UMI_CNT)
rmfolder(SUBDIR_UMI_SET)
rmfolder(SUBDIR_ALN_STATS)
else:
rule all:
...
...
@@ -165,6 +163,7 @@ else:
rmfolder(SUBDIR_LOG)
rmfolder(SUBDIR_UMI_CNT)
rmfolder(SUBDIR_UMI_SET)
rmfolder(SUBDIR_ALN_STATS)
'''
...
...
@@ -517,6 +516,8 @@ rule count_umi:
'{itemID}', '{bcID}.pkl'),
umiset = join_path(DIR_PROJ, SUBDIR_UMI_SET,
'{itemID}', '{bcID}.pkl'),
alncnt = join_path(DIR_PROJ, SUBDIR_ALN_STATS, ALIGNER,
'{itemID}', '{bcID}.pkl'),
message: 'Counting {input.sam}'
run:
features_f, _ = pickle.load(open(input.gff, 'rb'))
...
...
@@ -528,6 +529,7 @@ rule count_umi:
dumpto=None)
pickle.dump(umi_cnt, open(output.umicnt, 'wb'))
pickle.dump(umi_set, open(output.umiset, 'wb'))
pickle.dump(aln_cnt, open(output.alncnt, 'wb'))
# Pipeline Step 4a: Merge UMIs of cells to UMI matrix of item
...
...
@@ -637,6 +639,46 @@ rule qc_umi_matrix_per_experiment:
cmd += '--sep {}'.format(',')
shell(cmd)
rule summarize_aln_stats_per_item:
input:
alncnt = dynamic(join_path(DIR_PROJ, SUBDIR_ALN_STATS, ALIGNER,
'{itemID}', '{bcID}.pkl')),
output:
aln_item = expand(join_path(DIR_PROJ, SUBDIR_REPORT,
'{itemID}',
'alignment-' + ALIGNER + '.csv'),
itemID=item_names),
run:
aln_diagnose_item = ["_unmapped",
"_low_map_qual", '_multimapped', "_uniquemapped",
"_no_feature", "_ambiguous",
"_total"]
# { item -> dict(cell_bc -> Counter(stats)) }
item_stats = defaultdict(dict)
for f in input.alncnt:
bc_name = base_name(f) # BC-1-xxx
item_id = base_name(dir_name(f)) # item-1
item_stats[item_id][bc_name] = pickle.load(open(f, 'rb'))
# export to csv
for item_id, aln_dict in item_stats.items():
exp_id = SAMPLE_TABLE.loc[item_id, 'SAMPLE_NAME'] # E1
for bc, cnt in aln_dict.items():
aln_dict[bc] = pd.Series([cnt[x] for x in aln_diagnose_item],
index=aln_diagnose_item)
aln_stats_df = pd.DataFrame(
aln_dict, index=aln_diagnose_item).fillna(0)
aln_stats_df.to_csv(join_path(DIR_PROJ, SUBDIR_REPORT,
item_id,
'alignment-' + ALIGNER + '.csv'))
# rule report_alignment_log:
# input:
# df = dynamic(join_path(DIR_PROJ, SUBDIR_LOG, '{itemid}', ALIGNER,
...
...
@@ -669,7 +711,9 @@ rule qc_umi_matrix_per_experiment:
rule REPORT:
input:
demultiplexing_fastq = join_path(DIR_PROJ, SUBDIR_REPORT,
'demultiplexing_fastq.html')
'demultiplexing_fastq.html'),
alignment_stats = join_path(DIR_PROJ, SUBDIR_REPORT,
'alignment-{}.html'.format(ALIGNER)),
# Inputs: project/report/item-*/demultiplexing.csv
# Outputs:
...
...
@@ -691,6 +735,23 @@ rule report_combo_demultiplexing:
if not work:
touch('{output.html}')
rule report_alignment_stats:
input:
aln_item = rules.summarize_aln_stats_per_item.output.aln_item,
output:
html = join_path(DIR_PROJ, SUBDIR_REPORT,
'alignment-{}.html'.format(ALIGNER)),
run:
from celseq2.count_umi import plotly_alignment_stats
stats_fpaths_labels = [base_name(dir_name(f)) for f in input.aln_item]
work = plotly_alignment_stats(
fpaths=input.aln_item,
saveto=output.html,
fnames=stats_fpaths_labels)
if not work:
touch('{output.html}')
rule cleanall:
message: "Remove all files under {DIR_PROJ}"
run:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment