Commit dde40ffa authored by yy1533's avatar yy1533
Browse files

💪 use genometools to select gene names based on gene_biotype

parent 243e6b68
......@@ -5,6 +5,7 @@ import argparse
import pickle
from celseq2.helper import print_logger
from genometools.ensembl.annotations import get_genes
def cook_anno_model(gff_fpath, feature_atrr='gene_id', feature_type='exon',
......@@ -45,13 +46,17 @@ def cook_anno_model(gff_fpath, feature_atrr='gene_id', feature_type='exon',
exported_genes.add(gff.attr[feature_atrr].strip())
continue
if gff.attr.get('gene_biotype', None) in gene_types:
exported_genes.add(gff.attr[feature_atrr].strip())
# if gff.attr.get('gene_biotype', None) in gene_types:
# exported_genes.add(gff.attr[feature_atrr].strip())
print_logger('Processed {:,} lines of GFF...'.format(i))
# Use genometools to select exported_genes
if gene_types:
exported_genes = get_genes(gff_fpath, valid_biotypes=set(gene_types))
exported_genes = list(exported_genes['name'].values)
if exported_genes:
exported_genes = sorted(exported_genes)
exported_genes = tuple(sorted(exported_genes))
if dumpto:
with open(dumpto, 'wb') as fh:
pickle.dump((features, exported_genes), fh)
......
......@@ -21,7 +21,7 @@ GFF: '/absolute/path/to/wonderful.gtf.gz'
# Refer: http://htseq.readthedocs.io/en/master/count.html
FEATURE_ID: 'gene_name'
FEATURE_CONTENT: 'exon'
# GENE_BIOTYPE: ['protein_coding', 'lincRNA']
# GENE_BIOTYPE: ['protein_coding', 'lincRNA'] If nothing is set, report all.
GENE_BIOTYPE:
## Demultiplexing ##
......
......@@ -244,7 +244,7 @@ rule COOK_ANNOTATION:
# Combo-demultiplexing
rule COMBO_DEMULTIPLEXING:
input:
flag2 = '_done_annotation',
# flag2 = '_done_annotation',
flag1 = '_done_setupdir',
output:
'_done_combodemultiplex', # dynamic() is not allowed if task being called on terminal
......@@ -288,7 +288,7 @@ rule COMBO_DEMULTIPLEXING:
rule combo_demultiplexing:
input:
flag2 = '_done_annotation',
# flag2 = '_done_annotation',
flag1 = '_done_setupdir',
output:
fq = dynamic(join_path(DIR_PROJ, SUBDIR_FASTQ, '{itemid}', '{bc}.fastq')),
......
......@@ -21,6 +21,7 @@ install_requires = [
'pandas>=0.20.0',
'numpy>=1.12.0',
'tables>=3.4.2',
'genometools',
]
# do not require installation if built by ReadTheDocs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment