Newer
Older

Venkat Malladi
committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
# Find nearest genes
# Grab TF FPKM levels
fpkm = pd.read_table("rna.tsv")
gene_names_mapping = pd.read_csv("../gencode.v19.annotation_protein_coding_ids.txt",names=['gene_id', 'symbol'])
fpkm_symbol = fpkm.merge(gene_names_mapping)
fpkm_symbol = fpkm_symbol.set_index(['gene_id'])
# Enhancers
enhancers_universe = pd.DataFrame.from_csv("GRO-seq_enhancers.bed", sep="\t", header=None, index_col=3)
# Read in cluster 4 enhancers
cluster_4 = pd.DataFrame.from_csv("cluster_4_enhancers.csv", sep=",", header=0, index_col=0)
# Choose enhacners exprssed in cluster 4
enhancers_universe_cluster_4 = enhancers_universe.loc[cluster_4.index.values]
enhancers_universe_cluster_4.to_csv("cluster_4_enhancers_locations.bed", sep="\t",header=None, index=False)
# Read in nearest genes
genes_id = pd.DataFrame.from_csv("cluster_4_genes.txt", sep="\t", header=None, index_col=None)
needed_rows = [row for row in fpkm_symbol.index if row in genes_id[0].values]
cluster4_genes_expressed = fpkm_symbol.loc[needed_rows]
# col_colors
plt.style.use('classic')
colors = ["#FFD66F","#2E6A44","#862743", "#4FA6C7", "#3398CC"]
medianprops = dict(linestyle='-', linewidth=2, color='black')
box = cluster4_genes_expressed.boxplot(column=['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'],patch_artist=True,showfliers=False,manage_xticks=False,widths = 0.6, medianprops = medianprops)
plt.setp(box['whiskers'], color='k', linestyle='-', linewidth = 5)
plt.setp(box['boxes'], color='k', linestyle='-', linewidth = 5)
for patch, color in zip(box['boxes'], colors):
patch.set_facecolor(color)
plt.tick_params(axis='y', direction='out')
plt.tick_params(axis='x', direction='out')
plt.tick_params(top='off', right='off')
plt.grid(b=False)
plt.ylim((-5,60))
plt.xticks([1,2,3,4,5], ['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'])
plt.savefig('box_plot_cluster_4_genes_fpkm.png')
plt.clf()
# Cluster tfs 1 e-4
scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D2'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D2'],cluster4_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D2'],cluster4_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D2'],cluster4_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D5'],cluster4_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D5'],cluster4_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D7'],cluster4_genes_expressed['ES_D10'])
# Read in cluster 3 enhancers
cluster_3 = pd.DataFrame.from_csv("cluster_3_enhancers.csv", sep=",", header=0, index_col=0)
# Choose enhacners exprssed in cluster 3
enhancers_universe_cluster_3 = enhancers_universe.loc[cluster_3.index.values]
enhancers_universe_cluster_3.to_csv("cluster_3_enhancers_locations.bed", sep="\t",header=None, index=False)
# Read in nearest genes
genes_id = pd.DataFrame.from_csv("cluster_4_genes.txt", sep="\t", header=None, index_col=None)
needed_rows = [row for row in fpkm_symbol.index if row in genes_id[0].values]
cluster4_genes_expressed = fpkm_symbol.loc[needed_rows]
# col_colors
plt.style.use('classic')
colors = ["#FFD66F","#2E6A44","#862743", "#4FA6C7", "#3398CC"]
medianprops = dict(linestyle='-', linewidth=2, color='black')
box = cluster4_genes_expressed.boxplot(column=['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'],patch_artist=True,showfliers=False,manage_xticks=False,widths = 0.6, medianprops = medianprops)
plt.setp(box['whiskers'], color='k', linestyle='-', linewidth = 5)
plt.setp(box['boxes'], color='k', linestyle='-', linewidth = 5)
for patch, color in zip(box['boxes'], colors):
patch.set_facecolor(color)
plt.tick_params(axis='y', direction='out')
plt.tick_params(axis='x', direction='out')
plt.tick_params(top='off', right='off')
plt.grid(b=False)
plt.ylim((-5,60))
plt.xticks([1,2,3,4,5], ['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'])
plt.savefig('box_plot_cluster_4_genes_fpkm.png')
plt.clf()
# Cluster tfs 1 e-4
scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D2'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D2'],cluster4_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D2'],cluster4_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D2'],cluster4_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D5'],cluster4_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D5'],cluster4_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D7'],cluster4_genes_expressed['ES_D10'])