Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Holly Ruess
celseq2
Commits
040d2014
Unverified
Commit
040d2014
authored
Feb 13, 2018
by
Yun YAN
Committed by
GitHub
Feb 13, 2018
Browse files
Merge pull request #8 from Puriney/master
🇨🇳
v0.4.3 improve pipeline design to avoid STAR memory over usage
parents
74b01927
756fd948
Changes
4
Hide whitespace changes
Inline
Side-by-side
celseq2/celseq2.py
View file @
040d2014
...
...
@@ -18,6 +18,7 @@ Select top-used parameters from snakemake.snakemake():
* -T
* -w 1800 # 30min
* --keep-going
* --restart-times 1
Select optional parameters from snakemake.snakemake():
* --ri v.s. --ii
...
...
@@ -127,6 +128,7 @@ def main():
latency_wait
=
1800
,
jobname
=
"celseq2_job.{rulename}.{jobid}.sh"
,
keepgoing
=
True
,
restart_times
=
1
,
dryrun
=
args
.
dryrun
,
lock
=
not
args
.
nolock
,
...
...
celseq2/version.py
View file @
040d2014
__version__
=
'0.4.
2
'
__version__
=
'0.4.
3
'
celseq2/workflow/celseq2.snakemake
View file @
040d2014
...
...
@@ -109,9 +109,24 @@ aln_diagnose_item = ["_unmapped",
workdir: DIR_PROJ
rule Count_Matrix:
input:
csv = expand(join_path(DIR_PROJ, SUBDIR_EXPR, '{expid}', 'expr.csv'),
expid=list(set(sample_list))),
hdf = expand(join_path(DIR_PROJ, SUBDIR_EXPR, '{expid}', 'expr.h5'),
expid=list(set(sample_list))),
# alignment = expand(join_path(DIR_PROJ, SUBDIR_DIAG,
# '{itemid}', 'alignment_diagnose.csv'),
# itemid=item_names),
output:
touch('_done_UMI')
message: 'Finished counting UMI-count matrix.'
run:
print_logger('UMI-count matrix is saved at {}'.format(input.csv))
rule all:
message: 'Finished UMI matrix'
rule celseq2:
message: 'Finished Entire Pipeline.'
input:
# Annotation
anno = join_path(DIR_PROJ, SUBDIR_ANNO,
...
...
@@ -143,7 +158,7 @@ rule all:
itemid=item_names),
output:
touch('_done_
UMI
')
touch('_done_
celseq2
')
run:
if glob.glob('celseq2_job*.sh*'):
shell('mv -f celseq2_job*.sh* {}'.format(SUBDIR_QSUB))
...
...
@@ -151,14 +166,13 @@ rule all:
print_logger('Expression UMI matrix is saved at {}'.format(input.csv))
if ALIGNER == 'star':
shell('rm {}'.format(rules.star_load_genome.output
.flag
))
shell('rm {}'.format(rules.star_load_genome.output))
print('Free memory loaded by STAR', flush=True)
cmd = 'STAR '
cmd += '--genomeLoad Remove '
cmd += '--genomeDir {STAR_INDEX_DIR} '
shell(cmd)
rule setup_dir:
input: SAMPLE_TABLE_FPATH
output:
...
...
@@ -203,8 +217,8 @@ rule cook_annotation:
# Combo-demultiplexing
rule combo_demultiplexing:
input:
flag1 = '_done_setupdir',
flag2 = '_done_annotation',
# flag1 = '_done_setupdir',
output:
dynamic(join_path(DIR_PROJ, SUBDIR_FASTQ, '{itemid}', '{bc}.fastq')),
message: 'Performing combo-demultiplexing'
...
...
@@ -247,6 +261,8 @@ rule combo_demultiplexing:
## Alignment ##
assert (ALIGNER), 'Error: Specify aligner.'
assert (ALIGNER in ['bowtie2', 'star']), 'Error: Unknown aligner.'
if ALIGNER == 'bowtie2':
rule align_bowtie2:
input:
...
...
@@ -268,25 +284,26 @@ if ALIGNER == 'bowtie2':
-S {output.sam} 2>{log} \
--seed 42
""")
elif ALIGNER == 'star':
assert STAR
assert STAR_INDEX_DIR
if ALIGNER == 'star':
assert STAR_INDEX_DIR
rule star_load_genome:
#
input:
#
flag = '_done_annotation',
input:
flag = '_done_annotation',
output:
flag =
'_done_star_genome_loaded'
,
touch(
'_done_star_genome_loaded'
)
message: 'Loading genome to memory for STAR'
shadow: "shallow"
run:
cmd = 'STAR '
cmd += '--genomeLoad LoadAndExit '
cmd += '--genomeDir {STAR_INDEX_DIR} '
shell(cmd)
shell('touch {output.flag} ')
#
shell('touch {output.flag} ')
# shell('echo loaded >> {output.flag} ')
if ALIGNER == 'star':
assert STAR
rule align_star:
input:
flag = '_done_star_genome_loaded',
...
...
@@ -313,9 +330,6 @@ elif ALIGNER == 'star':
shell('ln -s {output.starsam} {output.sam} ')
shell('touch -h {output.sam} ')
else:
print('Error: Unknown aligner', flush=True)
rule count_umi:
input:
...
...
@@ -386,19 +400,6 @@ rule summarize_umi_matrix_per_item:
# - merge umi-count using *_umiset.pkl -> correct umi count per experiment/plate
rule umi_matrix:
input:
csv = expand(join_path(DIR_PROJ, SUBDIR_EXPR, '{expid}', 'expr.csv'),
expid=list(set(sample_list))),
hdf = expand(join_path(DIR_PROJ, SUBDIR_EXPR, '{expid}', 'expr.h5'),
expid=list(set(sample_list))),
alignment = expand(join_path(DIR_PROJ, SUBDIR_DIAG,
'{itemid}', 'alignment_diagnose.csv'),
itemid=item_names),
message: 'UMI matrix per experiment'
rule summarize_umi_matrix_per_experiment:
input:
gff = join_path(DIR_PROJ, SUBDIR_ANNO,
...
...
docs/about/release_note.md
View file @
040d2014
...
...
@@ -10,13 +10,14 @@
---
## :fa-flag-checkered: **v0.4.
2
**
## :fa-flag-checkered: **v0.4.
3
**
:fa-calendar:
**2018-02-13**
:fa-star:
**Features**
-
Improve the logics of snakemake pipeline to avoid silent pre-inhibition.
-
Improve the design of snakemake pipeline to avoid silent pre-inhibition.
-
Better support STAR to avoid memory overuse.
---
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment