Skip to content
Snippets Groups Projects
Commit 69ff9517 authored by Christopher Bennett's avatar Christopher Bennett
Browse files

Update Trenco code to weight expressed genes

parent bef23f92
1 merge request!1Trenco devel
#!/usr/bin/env python3 #!/usr/bin/env python3
# --------------------------------------------------------------------------- #
# # Wraper script for running all of trenco. The main scripts are found in #
# Wraper script for running all of trenco # trenco_core module and are imported. #
# #
import os, json # Inputs required: #
# DESIGN tab separated file containing link information of samples #
# to names #
# ALIGNMENT bam files form ChIP-seq experiments for enhancers #
# BED file of TAD regions to use #
# EXPRESSION tsv file containing transcript expression #
# PEAKS bed files with called enhancer peaks #
# ANNOTAIONS gtf file vM4 recommended with gene annotations #
# #
# Major Outputs: #
# Three folders: #
# - Results - contains graph results of the networks #
# - Log - contains the logs of the scripts #
# - Process - contains intermediate files and plots #
# EDGE csv formatted information containing graph link information between #
# in an array #
# NODE csv formatted information containining expression data for each gene #
# node #
# --------------------------------------------------------------------------- #
import os
import json
import logging import logging
import multiprocessing as mp import multiprocessing as mp
import trenco_modules.trenco_args as targs import trenco_modules.trenco_args as targs
...@@ -102,6 +123,16 @@ def initial_setup(version, ...@@ -102,6 +123,16 @@ def initial_setup(version,
return (bedgene, bedenh, txn_log2TPM, enh_log2TPM, mtcs[0], mtcs[1]) return (bedgene, bedenh, txn_log2TPM, enh_log2TPM, mtcs[0], mtcs[1])
def _DEBUG():
bedgene = "process/gene.bed"
bedenh = "process/enh.bed"
txn_log2TPM = "process/transcript_expression_log2TPM_matrix.txt"
enh_log2TPM = "process/enhancer_H3K27ac_log2TPM_signal_matrix.txt"
mtx1 = "process/TFxgencode.vM4.annotation_capped_sites_"\
"1000u200d_promoter.bed_score_matrix.txt"
mtx2 = "process/TFxenh.bed_score_matrix.txt"
return (bedgene, bedenh, txn_log2TPM, enh_log2TPM, mtx1, mtx2)
def full_build(samples, def full_build(samples,
gene, gene,
enh, enh,
...@@ -132,7 +163,10 @@ def full_build(samples, ...@@ -132,7 +163,10 @@ def full_build(samples,
if __name__ == '__main__': if __name__ == '__main__':
parser = ArgumentParser( parser = ArgumentParser(
description = "TAD aware Regulatory Network Construction (TReNCo)", description = "TAD aware Regulatory Network Construction (TReNCo)",
usage="trenco --design [DESIGN FILE (txt)] --alignment [ALIGNMENT FILES (tsv)] --expression [EXPRESSION FILES (bam)] --peaks [PEAK FILES (bed)] -g [GENOME VERSION] [OPTIONS]") usage="trenco --design [DESIGN FILE (txt)] \
--alignment [ALIGNMENT FILES (tsv)] \
--expression [EXPRESSION FILES (bam)] \
--peaks [PEAK FILES (bed)] -g [GENOME VERSION] [OPTIONS]")
targs.full_trenco_args(parser) targs.full_trenco_args(parser)
targs.enh_bound_args(parser, False) targs.enh_bound_args(parser, False)
...@@ -141,7 +175,11 @@ if __name__ == '__main__': ...@@ -141,7 +175,11 @@ if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
files = [args.peaks, args.alignment, args.expression, args.tadBED, args.design] files = [args.peaks,
args.alignment,
args.expression,
args.tadBED,
args.design]
for fi in files: for fi in files:
if not isinstance(fi, list): if not isinstance(fi, list):
fi = [fi] fi = [fi]
...@@ -154,7 +192,8 @@ if __name__ == '__main__': ...@@ -154,7 +192,8 @@ if __name__ == '__main__':
peaks, alignments, expressions, tads, design = files peaks, alignments, expressions, tads, design = files
prange = [int(x) for x in args.promoter_range.split("-")] prange = [int(x) for x in args.promoter_range.split("-")]
design_key, design_key_inv = {}, {} design_key = {}
design_key_inv = {}
with open(design, 'r') as ifi: with open(design, 'r') as ifi:
for line in ifi: for line in ifi:
line = line.split() line = line.split()
...@@ -174,21 +213,35 @@ if __name__ == '__main__': ...@@ -174,21 +213,35 @@ if __name__ == '__main__':
args.gvers = _ORAGNISM_GENOME[args.organism] args.gvers = _ORAGNISM_GENOME[args.organism]
if not debug: if not debug:
geneBED, enhBED, txn_log2TPM, enh_log2TPM, tf_gene_mx, tf_enh_mx = initial_setup(args.annotations, geneBED, \
args.organism, enhBED, \
args.biotypes, txn_log2TPM, \
args.annotfname, enh_log2TPM, \
args.region, tf_gene_mx, \
args.distance, tf_enh_mx \
peaks, = initial_setup(args.annotations,
prange, args.organism,
args.expression, args.biotypes,
args.alignment, args.annotfname,
args.target, args.region,
args.gvers, args.distance,
args.meme_db, peaks,
memedb, prange,
logger) args.expression,
args.alignment,
args.target,
args.gvers,
args.meme_db,
memedb,
logger)
else:
geneBED, \
enhBED, \
txn_log2TPM, \
enh_log2TPM, \
tf_gene_mx, \
tf_enh_mx \
= _DEBUG()
full_build(design_key, full_build(design_key,
txn_log2TPM, txn_log2TPM,
......
This diff is collapsed.
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment