Unverified Commit 1d73626d authored by Yun YAN's avatar Yun YAN Committed by GitHub

Merge pull request #23 from Puriney/master

update
parents 2be19547 7bc05813
......@@ -82,6 +82,8 @@ def demultiplexing(read1_fpath, read2_fpath, dict_bc_id2seq,
bc_qual_min=10,
is_gzip=True,
save_unknown_bc_fastq=False,
tagging_only=False,
tag_to='tagged.fastq',
do_bc_rev_complement=False,
do_tx_rev_complement=False,
verbose=False):
......@@ -110,8 +112,18 @@ def demultiplexing(read1_fpath, read2_fpath, dict_bc_id2seq,
bc_fhout['UNKNOWNBC_R2'] = join_path(outdir, 'UNKNOWN',
'UNKNOWNBC_R2.fq')
if tagging_only:
out_fpath_tagged_fq = join_path(outdir, tag_to)
out_fh_tagged_fq = open(out_fpath_tagged_fq, 'w')
for bc_seq, v in bc_fhout.items():
bc_fhout[bc_seq] = open(v, 'w')
if bc_seq.startswith('UNKNOWN'):
bc_fhout[bc_seq] = open(v, 'w')
continue
if tagging_only:
bc_fhout[bc_seq] = out_fh_tagged_fq
else:
bc_fhout[bc_seq] = open(v, 'w')
i = 0
while(True):
......@@ -310,6 +322,16 @@ def main():
parser.add_argument('--save-unknown-bc-fastq',
dest='save_unknown_bc_fastq', action='store_true')
parser.set_defaults(save_unknown_bc_fastq=False)
parser.add_argument('--tagging-only',
dest='tagging_only', action='store_true',
help=('Demultiplexed reads are merged to a file named'
' \"tagged.fastq\" under --out-dir.'))
parser.set_defaults(tagging_only=False)
parser.add_argument(
'--tag-to',
dest='tag_to', default='tagged.fastq',
help=('File base name to save the tagged fastq file. '
'Only used when tagging_only.'))
parser.add_argument('--verbose', dest='verbose', action='store_true')
parser.set_defaults(verbose=False)
......@@ -333,6 +355,8 @@ def main():
bc_qual_min=args.min_bc_quality,
is_gzip=args.is_gzip,
save_unknown_bc_fastq=args.save_unknown_bc_fastq,
tagging_only=args.tagging_only,
tag_to=args.tag_to,
do_bc_rev_complement=False,
do_tx_rev_complement=False,
verbose=args.verbose)
......
......@@ -95,7 +95,7 @@ def rmfile(fpath):
def base_name(fpath, ext=None):
bs = os.path.basename(fpath)
if not (ext is None or ext == ""):
bs.replace(ext, '')
bs = bs.replace(ext, '')
bs = os.path.splitext(bs)[0]
return(bs)
......
......@@ -38,7 +38,7 @@ def celseq2stpipeline(celseq2_fpath, spatial_map, out,
genes = map(lambda x: x.replace(' ', '_'), expr_valid.index.values)
colnames = expr_valid.columns.values
# fhout.write('{}\t{}\n'.format('', '\t'.join(genes))) # header
fhout.write('{}\t{}\t{}\n'.format('Row', 'Col', '\t'.join(genes))) # header
fhout.write('{}\t{}\t{}\n'.format('X', 'Y', '\t'.join(genes))) # header
for colname in colnames:
tmp = colname.replace('.', '-') # BC-1-ATGC or ATGC
......
......@@ -12,15 +12,15 @@ BC_LENGTH: 6
####################################
## Alignment Tools
####################################
## Which RNA-seq aligner to use? 'bowtie2' or 'star'
## Which RNA-seq aligner to use: 'bowtie2', 'star', 'kallisto'
ALIGNER: 'bowtie2'
## What is the absolute path to command bowtie2?
## What is the absolute path to the command bowtie2?
BOWTIE2: '/absolute/path/to/bowtie2'
## What is the sharef prefix of bowtie2 index?
## What is the shared prefix of bowtie2 index file names?
BOWTIE2_INDEX_PREFIX: '/absolute/path/to/bowtie2_index'
## What is the absolute path to command STAR?
## What is the absolute path to the command STAR?
STAR: '/absolute/path/to/star'
## Whare is the directory to save STAR index?
## Where is the directory to save STAR index?
STAR_INDEX_DIR: '/absolute/path/to/star/folder/'
## Extra parameters to run aligner. For example:
......
__version__ = '0.5.3'
__version__ = '0.5.3.2'
......@@ -44,6 +44,8 @@ BOWTIE2_INDEX_PREFIX = config.get('BOWTIE2_INDEX_PREFIX', None)
BOWTIE2 = config.get('BOWTIE2', None) # '/local/apps/bowtie2/2.3.1/bowtie2'
STAR_INDEX_DIR = config.get('STAR_INDEX_DIR', None)
STAR = config.get('STAR', None)
# KALLISTO = config.get('KALLISTO', None)
# KALLISTO_INDEX = config.get('KALLISTO_INDEX', None)
ALIGNER_EXTRA_PARAMETERS = config.get('ALIGNER_EXTRA_PARAMETERS', '')
# Annotations
......@@ -119,6 +121,8 @@ Part-2: Snakemake rules
'''
workdir: DIR_PROJ
include: 'sub.snakemake'
'''
Default task named "all" to request all outputs.
'''
......@@ -343,11 +347,16 @@ rule tag_fastq:
for fq in input.fq:
fq_itemid = base_name(dir_name(fq))
dict_itemid_fq.setdefault(fq_itemid, []).append(fq)
for itemid, item_fq in dict_itemid_fq.items():
itemid_tag_fq = join_path(DIR_PROJ, SUBDIR_FASTQ,
itemid, 'TAGGED.bigfastq')
cmd = 'cat {} > {} '.format(' '.join(item_fq), itemid_tag_fq)
shell(cmd)
# cmd = 'cat {} > {} '.format(' '.join(item_fq), itemid_tag_fq)
if is_nonempty_file(itemid_tag_fq):
shell('rm {itemid_tag_fq}')
for fq in item_fq:
cmd = 'cat {} >> {}'.format(fq, itemid_tag_fq)
shell(cmd)
print_logger('Tagged FQ: {}'.format(itemid_tag_fq))
......@@ -422,6 +431,38 @@ if ALIGNER == 'star':
shell('mv {starsam} {output.sam} ')
shell('mv {starlog} {output.log} ')
# if ALIGNER == 'kallisto':
# rule align_kallisto_pseudobam:
# input:
# fq = join_path(DIR_PROJ, SUBDIR_FASTQ,
# '{itemID}', 'TAGGED.bigfastq'),
# output:
# sam = join_path(DIR_PROJ, SUBDIR_ALIGN_ITEM,
# '{itemID}', ALIGNER + '.bigsam'),
# params:
# threads = num_threads,
# kallisto_outdir_tmp = join_path(DIR_PROJ, SUBDIR_ALIGN_ITEM,
# '{itemID}', '.kallisto', ''),
# aligner_extra_parameters = ALIGNER_EXTRA_PARAMETERS,
# # shadow: "shallow"
# log:
# join_path(DIR_PROJ, SUBDIR_LOG, '{itemID}',
# 'Align-Kallisto.log')
# run:
# cmd = '{KALLISTO} '
# cmd += '--index {KALLISTO_INDEX} '
# cmd += '--output-dir {params.kallisto_outdir_tmp} '
# cmd += '--seed 42 --single --pseudobam '
# cmd += '--fragment-length {CUT_LENGTH} '
# cmd += '--sd 2 '
# cmd += '--threads {params.threads} '
# cmd += '{params.aligner_extra_parameters} '
# cmd += '{input.fq} '
# cmd += '>{output.sam} '
# cmd += '2>{log} '
# shell(cmd)
# Pipeline Step 2b: Combo-demultiplex the SAM file
rule combo_demultiplexing_sam:
input:
......
'''
Test if snakemake can include multiple snakmake files so that
we can better modulize the long workflow.
Answer is Yes.
'''
rule sub_test:
output: '_test_sub'
run:
shell('touch {output} ')
\ No newline at end of file
......@@ -5,9 +5,26 @@
:fa-calendar: **YYYY-MM-DD**
:fa-code: []()
:fa-star: **Features**
-->
---
## :fa-flag-checkered: **v0.5.3**
:fa-calendar: **2018-05-10**
:fa-code: [2be1954](https://github.com/yanailab/celseq2/tree/2be195470f6b98e42f5d86f4f2736f29a543103f)
:fa-star: **Features**
- Plot demultiplexing and alignment stats to help users assess their data.
- Column names of UMI-count matrix is named in a format of 'BC-i-xxxx' to suit users needs.
---
## :fa-flag-checkered: **v0.5.2**
......
......@@ -149,11 +149,13 @@ expr/
Results of <kbd>item-X</kbd> are useful to assess variation when FASTQ
files from multiple lanes, or technical/biological replicates are present.
## About
## Authors
Authors: See <https://github.com/yanailab/celseq2/blob/master/AUTHORS>
See <https://github.com/yanailab/celseq2/blob/master/AUTHORS>
License: See <https://github.com/yanailab/celseq2/blob/master/LICENSE>
## License
See <https://github.com/yanailab/celseq2/blob/master/LICENSE>
[^Hashimshony2016]: Hashimshony, T. et al. CEL-Seq2: sensitive highly-
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment