Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Holly Ruess
celseq2
Commits
66194ddc
Commit
66194ddc
authored
Mar 20, 2018
by
yy1533
Browse files
🐾
report star alignment log. Related to #3
parent
ac16e685
Changes
3
Hide whitespace changes
Inline
Side-by-side
celseq2/parse_log.py
View file @
66194ddc
...
...
@@ -38,12 +38,12 @@ def parse_bowtie2_report(raw_data):
'paired_aligned_mate_none'
:
r
"(\d+) \([\d\.]+%\) aligned 0 times"
}
}
parsed_data
=
OrderedDict
()
parsed_data
=
OrderedDict
(
{
k
:
0
for
k
in
regexes
[
'unpaired'
].
keys
()}
)
for
k
,
r
in
regexes
[
'unpaired'
].
items
():
r_search
=
re
.
search
(
r
,
raw_data
,
re
.
MULTILINE
)
if
r_search
:
parsed_data
[
k
]
=
float
(
r_search
.
group
(
1
))
assert
parsed_data
[
'total_reads'
]
==
parsed_data
[
'total_
reads
'
]
# single-end
assert
parsed_data
[
'total_reads'
]
==
parsed_data
[
'total_
unpaired
'
]
# single-end
return
(
parsed_data
)
...
...
@@ -78,7 +78,7 @@ def parse_star_report(raw_data):
'unmapped_tooshort_percent'
:
r
"% of reads unmapped: too short \|\s+([\d\.]+)"
,
'unmapped_other_percent'
:
r
"% of reads unmapped: other \|\s+([\d\.]+)"
,
}
parsed_data
=
OrderedDict
()
parsed_data
=
OrderedDict
(
{
k
:
0
for
k
in
regexes
.
keys
()}
)
for
k
,
r
in
regexes
.
items
():
r_search
=
re
.
search
(
r
,
raw_data
,
re
.
MULTILINE
)
if
r_search
:
...
...
celseq2/version.py
View file @
66194ddc
__version__
=
'0.4.
5
'
__version__
=
'0.4.
6
'
celseq2/workflow/celseq2.snakemake
View file @
66194ddc
...
...
@@ -115,7 +115,7 @@ aln_diagnose_item = ["_unmapped",
workdir: DIR_PROJ
rule
Count_Matrix
:
rule
all
:
input:
csv = expand(join_path(DIR_PROJ, SUBDIR_EXPR, '{expid}', 'expr.csv'),
expid=list(set(sample_list))),
...
...
@@ -124,6 +124,13 @@ rule Count_Matrix:
report = expand(join_path(DIR_PROJ, SUBDIR_REPORT, '{itemid}',
'alignment_'+ALIGNER+'.csv'), itemid=item_names),
rule COUNT_MATRIX:
input:
csv = expand(join_path(DIR_PROJ, SUBDIR_EXPR, '{expid}', 'expr.csv'),
expid=list(set(sample_list))),
hdf = expand(join_path(DIR_PROJ, SUBDIR_EXPR, '{expid}', 'expr.h5'),
expid=list(set(sample_list))),
output:
touch('_done_UMI')
message: 'Finished counting UMI-count matrix.'
...
...
@@ -306,48 +313,14 @@ if ALIGNER == 'bowtie2':
log = join_path(DIR_PROJ, SUBDIR_LOG, '{itemid}',
'Align-Bowtie2_Cell-{bc}.log'),
output:
report = join_path(DIR_PROJ, SUBDIR_LOG, '{itemid}',
'
Align-Bowtie2_Cell-
{bc}.pickle'),
report = join_path(DIR_PROJ, SUBDIR_LOG, '{itemid}',
ALIGNER,
'{bc}.pickle'),
run:
with open(input.log, 'r') as fin:
log_content = fin.readlines()
df = parse_bowtie2_report('\t'.join(log_content))
pickle.dump(df, open(output.report, 'wb'))
rule report_bowtie2_log:
input:
# flag = '_done_umimatrix_per_experiment',
df = dynamic(join_path(DIR_PROJ, SUBDIR_LOG, '{itemid}',
'Align-Bowtie2_Cell-{bc}.pickle')),
output:
report = expand(join_path(DIR_PROJ, SUBDIR_REPORT, '{itemid}',
'alignment_'+ALIGNER+'.csv'), itemid=item_names),
run:
for item in item_names:
logs_per_item = []
logs_name_item = []
report_item = join_path(DIR_PROJ, SUBDIR_REPORT, item,
'alignment_'+ALIGNER+'.csv')
logs_fpath_item = [x for x in input.df if item in x]
for log_fpath in logs_fpath_item:
log_df = pickle.load(open(log_fpath, 'rb'))
logs_per_item.append(log_df)
log_name = base_name(log_fpath)
# Align-Bowtie2_Cell-BC-29-ACCATG
log_name_re = re.search(
r"Align-Bowtie2_Cell-BC-((\d+)-(\w+))",
log_name)
if log_name_re:
log_name = log_name_re.group(1)
logs_name_item.append(log_name)
_ = merge_reports(reports=logs_per_item,
report_names=logs_name_item,
aligner_name=ALIGNER,
savetocsv=report_item)
if ALIGNER == 'star':
assert STAR_INDEX_DIR
...
...
@@ -376,6 +349,8 @@ if ALIGNER == 'star':
sam = join_path(DIR_PROJ, SUBDIR_ALIGN, '{itemid}', '{bc}.sam'),
starsam = join_path(DIR_PROJ, SUBDIR_ALIGN, '{itemid}', '{bc}',
'Aligned.out.sam'),
starlog = join_path(DIR_PROJ, SUBDIR_ALIGN, '{itemid}', '{bc}',
'Log.final.out'),
params:
star_prefix = join_path(DIR_PROJ, SUBDIR_ALIGN,
'{itemid}', '{bc}', ''),
...
...
@@ -394,6 +369,54 @@ if ALIGNER == 'star':
shell('ln -s {output.starsam} {output.sam} ')
shell('touch -h {output.sam} ')
rule parse_star_log:
input:
starlog = join_path(DIR_PROJ, SUBDIR_ALIGN, '{itemid}', '{bc}',
'Log.final.out'),
output:
report = join_path(DIR_PROJ, SUBDIR_LOG, '{itemid}', ALIGNER,
'{bc}.pickle'),
run:
with open(input.starlog, 'r') as fin:
log_content = fin.readlines()
df = parse_star_report('\t'.join(log_content))
pickle.dump(df, open(output.report, 'wb'))
rule REPORT_ALIGNMENT_LOG:
input:
report = expand(join_path(DIR_PROJ, SUBDIR_REPORT, '{itemid}',
'alignment_'+ALIGNER+'.csv'),
itemid=item_names),
rule report_alignment_log:
input:
'_done_umimatrix_per_experiment',
df = dynamic(join_path(DIR_PROJ, SUBDIR_LOG, '{itemid}', ALIGNER,
'{bc}.pickle')),
output:
report = expand(join_path(DIR_PROJ, SUBDIR_REPORT, '{itemid}',
'alignment_'+ALIGNER+'.csv'), itemid=item_names),
run:
for item in item_names:
logs_per_item = []
logs_name_item = []
report_item = join_path(DIR_PROJ, SUBDIR_REPORT, item,
'alignment_'+ALIGNER+'.csv')
logs_fpath_item = [x for x in input.df if item in x]
for log_fpath in logs_fpath_item:
log_df = pickle.load(open(log_fpath, 'rb'))
logs_per_item.append(log_df)
log_name = base_name(log_fpath)
logs_name_item.append(log_name)
_ = merge_reports(reports=logs_per_item,
report_names=logs_name_item,
aligner_name=ALIGNER,
savetocsv=report_item)
rule count_umi:
input:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment