Commit 66194ddc authored by yy1533's avatar yy1533
Browse files

🐾 report star alignment log. Related to #3

parent ac16e685
......@@ -38,12 +38,12 @@ def parse_bowtie2_report(raw_data):
'paired_aligned_mate_none': r"(\d+) \([\d\.]+%\) aligned 0 times"
}
}
parsed_data = OrderedDict()
parsed_data = OrderedDict({k: 0 for k in regexes['unpaired'].keys()})
for k, r in regexes['unpaired'].items():
r_search = re.search(r, raw_data, re.MULTILINE)
if r_search:
parsed_data[k] = float(r_search.group(1))
assert parsed_data['total_reads'] == parsed_data['total_reads'] # single-end
assert parsed_data['total_reads'] == parsed_data['total_unpaired'] # single-end
return(parsed_data)
......@@ -78,7 +78,7 @@ def parse_star_report(raw_data):
'unmapped_tooshort_percent': r"% of reads unmapped: too short \|\s+([\d\.]+)",
'unmapped_other_percent': r"% of reads unmapped: other \|\s+([\d\.]+)",
}
parsed_data = OrderedDict()
parsed_data = OrderedDict({k: 0 for k in regexes.keys()})
for k, r in regexes.items():
r_search = re.search(r, raw_data, re.MULTILINE)
if r_search:
......
__version__ = '0.4.5'
__version__ = '0.4.6'
......@@ -115,7 +115,7 @@ aln_diagnose_item = ["_unmapped",
workdir: DIR_PROJ
rule Count_Matrix:
rule all:
input:
csv = expand(join_path(DIR_PROJ, SUBDIR_EXPR, '{expid}', 'expr.csv'),
expid=list(set(sample_list))),
......@@ -124,6 +124,13 @@ rule Count_Matrix:
report = expand(join_path(DIR_PROJ, SUBDIR_REPORT, '{itemid}',
'alignment_'+ALIGNER+'.csv'), itemid=item_names),
rule COUNT_MATRIX:
input:
csv = expand(join_path(DIR_PROJ, SUBDIR_EXPR, '{expid}', 'expr.csv'),
expid=list(set(sample_list))),
hdf = expand(join_path(DIR_PROJ, SUBDIR_EXPR, '{expid}', 'expr.h5'),
expid=list(set(sample_list))),
output:
touch('_done_UMI')
message: 'Finished counting UMI-count matrix.'
......@@ -306,48 +313,14 @@ if ALIGNER == 'bowtie2':
log = join_path(DIR_PROJ, SUBDIR_LOG, '{itemid}',
'Align-Bowtie2_Cell-{bc}.log'),
output:
report = join_path(DIR_PROJ, SUBDIR_LOG, '{itemid}',
'Align-Bowtie2_Cell-{bc}.pickle'),
report = join_path(DIR_PROJ, SUBDIR_LOG, '{itemid}', ALIGNER,
'{bc}.pickle'),
run:
with open(input.log, 'r') as fin:
log_content = fin.readlines()
df = parse_bowtie2_report('\t'.join(log_content))
pickle.dump(df, open(output.report, 'wb'))
rule report_bowtie2_log:
input:
# flag = '_done_umimatrix_per_experiment',
df = dynamic(join_path(DIR_PROJ, SUBDIR_LOG, '{itemid}',
'Align-Bowtie2_Cell-{bc}.pickle')),
output:
report = expand(join_path(DIR_PROJ, SUBDIR_REPORT, '{itemid}',
'alignment_'+ALIGNER+'.csv'), itemid=item_names),
run:
for item in item_names:
logs_per_item = []
logs_name_item = []
report_item = join_path(DIR_PROJ, SUBDIR_REPORT, item,
'alignment_'+ALIGNER+'.csv')
logs_fpath_item = [x for x in input.df if item in x]
for log_fpath in logs_fpath_item:
log_df = pickle.load(open(log_fpath, 'rb'))
logs_per_item.append(log_df)
log_name = base_name(log_fpath)
# Align-Bowtie2_Cell-BC-29-ACCATG
log_name_re = re.search(
r"Align-Bowtie2_Cell-BC-((\d+)-(\w+))",
log_name)
if log_name_re:
log_name = log_name_re.group(1)
logs_name_item.append(log_name)
_ = merge_reports(reports=logs_per_item,
report_names=logs_name_item,
aligner_name=ALIGNER,
savetocsv=report_item)
if ALIGNER == 'star':
assert STAR_INDEX_DIR
......@@ -376,6 +349,8 @@ if ALIGNER == 'star':
sam = join_path(DIR_PROJ, SUBDIR_ALIGN, '{itemid}', '{bc}.sam'),
starsam = join_path(DIR_PROJ, SUBDIR_ALIGN, '{itemid}', '{bc}',
'Aligned.out.sam'),
starlog = join_path(DIR_PROJ, SUBDIR_ALIGN, '{itemid}', '{bc}',
'Log.final.out'),
params:
star_prefix = join_path(DIR_PROJ, SUBDIR_ALIGN,
'{itemid}', '{bc}', ''),
......@@ -394,6 +369,54 @@ if ALIGNER == 'star':
shell('ln -s {output.starsam} {output.sam} ')
shell('touch -h {output.sam} ')
rule parse_star_log:
input:
starlog = join_path(DIR_PROJ, SUBDIR_ALIGN, '{itemid}', '{bc}',
'Log.final.out'),
output:
report = join_path(DIR_PROJ, SUBDIR_LOG, '{itemid}', ALIGNER,
'{bc}.pickle'),
run:
with open(input.starlog, 'r') as fin:
log_content = fin.readlines()
df = parse_star_report('\t'.join(log_content))
pickle.dump(df, open(output.report, 'wb'))
rule REPORT_ALIGNMENT_LOG:
input:
report = expand(join_path(DIR_PROJ, SUBDIR_REPORT, '{itemid}',
'alignment_'+ALIGNER+'.csv'),
itemid=item_names),
rule report_alignment_log:
input:
'_done_umimatrix_per_experiment',
df = dynamic(join_path(DIR_PROJ, SUBDIR_LOG, '{itemid}', ALIGNER,
'{bc}.pickle')),
output:
report = expand(join_path(DIR_PROJ, SUBDIR_REPORT, '{itemid}',
'alignment_'+ALIGNER+'.csv'), itemid=item_names),
run:
for item in item_names:
logs_per_item = []
logs_name_item = []
report_item = join_path(DIR_PROJ, SUBDIR_REPORT, item,
'alignment_'+ALIGNER+'.csv')
logs_fpath_item = [x for x in input.df if item in x]
for log_fpath in logs_fpath_item:
log_df = pickle.load(open(log_fpath, 'rb'))
logs_per_item.append(log_df)
log_name = base_name(log_fpath)
logs_name_item.append(log_name)
_ = merge_reports(reports=logs_per_item,
report_names=logs_name_item,
aligner_name=ALIGNER,
savetocsv=report_item)
rule count_umi:
input:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment