An error occurred while loading the file. Please try again.
-
Venkat Malladi authored0248c449
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
Forked from
Venkat Malladi / TFSEE
43 commits behind the upstream repository.
histone_enhancer_plots.py 5.78 KiB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
rpkm_file = pd.read_csv("Histone_pe_filtered_peaks.tsv", sep='\t')
# Filter for potential enhancers with both H3K27ac and H3K4me1 are >= 1 or active enhancers
potential_enhancers = rpkm_file[((rpkm_file['ES_D0_H3K4me1'] >=1) & (rpkm_file['ES_D0_H3K27ac'] < 1)) | ((rpkm_file['ES_D2_H3K4me1'] >=1) & (rpkm_file['ES_D2_H3K27ac'] < 1))
| ((rpkm_file['ES_D5_H3K4me1'] >=1) & (rpkm_file['ES_D5_H3K27ac'] < 1)) | ((rpkm_file['ES_D7_H3K4me1'] >=1) & (rpkm_file['ES_D7_H3K27ac'] < 1)) | ((rpkm_file['ES_D10_H3K4me1'] >=1) & (rpkm_file['ES_D10_H3K27ac'] < 1))]
# Filter for each cell type
ES_D0 = potential_enhancers.filter(items=['ES_D0_H3K4me1', 'ES_D0_H3K27ac'])
ES_D2 = potential_enhancers.filter(items=['ES_D2_H3K4me1', 'ES_D2_H3K27ac'])
ES_D5 = potential_enhancers.filter(items=['ES_D5_H3K4me1', 'ES_D5_H3K27ac'])
ES_D7 = potential_enhancers.filter(items=['ES_D7_H3K4me1', 'ES_D7_H3K27ac'])
ES_D10 = potential_enhancers.filter(items=['ES_D10_H3K4me1', 'ES_D10_H3K27ac'])
# get numbers for each
es_d0_k4_only = ES_D0[(ES_D0['ES_D0_H3K4me1'] >=1) & (ES_D0['ES_D0_H3K27ac'] < 1)]
es_d0_both = ES_D0[(ES_D0['ES_D0_H3K4me1'] >=1) & (ES_D0['ES_D0_H3K27ac'] >= 1)]
es_d0_none = ES_D0[(ES_D0['ES_D0_H3K4me1'] < 1) & (ES_D0['ES_D0_H3K27ac'] < 1)]
es_d2_k4_only = ES_D2[(ES_D2['ES_D2_H3K4me1'] >=1) & (ES_D2['ES_D2_H3K27ac'] < 1)]
es_d2_both = ES_D2[(ES_D2['ES_D2_H3K4me1'] >=1) & (ES_D2['ES_D2_H3K27ac'] >= 1)]
es_d2_none = ES_D2[(ES_D2['ES_D2_H3K4me1'] < 1) & (ES_D2['ES_D2_H3K27ac'] < 1)]
es_d5_k4_only = ES_D5[(ES_D5['ES_D5_H3K4me1'] >=1) & (ES_D5['ES_D5_H3K27ac'] < 1)]
es_d5_both = ES_D5[(ES_D5['ES_D5_H3K4me1'] >=1) & (ES_D5['ES_D5_H3K27ac'] >= 1)]
es_d5_none = ES_D5[(ES_D5['ES_D5_H3K4me1'] < 1) & (ES_D5['ES_D5_H3K27ac'] < 1)]
es_d7_k4_only = ES_D7[(ES_D7['ES_D7_H3K4me1'] >=1) & (ES_D7['ES_D7_H3K27ac'] < 1)]
es_d7_both = ES_D7[(ES_D7['ES_D7_H3K4me1'] >=1) & (ES_D7['ES_D7_H3K27ac'] >= 1)]
es_d7_none = ES_D7[(ES_D7['ES_D7_H3K4me1'] < 1) & (ES_D7['ES_D7_H3K27ac'] < 1)]
es_d10_k4_only = ES_D10[(ES_D10['ES_D10_H3K4me1'] >=1) & (ES_D10['ES_D10_H3K27ac'] < 1)]
es_d10_both = ES_D10[(ES_D10['ES_D10_H3K4me1'] >=1) & (ES_D10['ES_D10_H3K27ac'] >= 1)]
es_d10_none = ES_D10[(ES_D10['ES_D10_H3K4me1'] < 1) & (ES_D10['ES_D10_H3K27ac'] < 1)]
# Write out files for each processing
potential_enhancers.to_csv("ES_Histone_universe_enhancers.tsv",sep='\t',header=True,index=False)
es_d0_enhancers = potential_enhancers[(potential_enhancers['ES_D0_H3K4me1'] >=1) & (potential_enhancers['ES_D0_H3K27ac'] >= 1)]
es_d2_enhancers = potential_enhancers[(potential_enhancers['ES_D2_H3K4me1'] >=1) & (potential_enhancers['ES_D2_H3K27ac'] >= 1)]
es_d5_enhancers = potential_enhancers[(potential_enhancers['ES_D5_H3K4me1'] >=1) & (potential_enhancers['ES_D5_H3K27ac'] >= 1)]
es_d7_enhancers = potential_enhancers[(potential_enhancers['ES_D7_H3K4me1'] >=1) & (potential_enhancers['ES_D7_H3K27ac'] >= 1)]
es_d10_enhancers = potential_enhancers[(potential_enhancers['ES_D10_H3K4me1'] >=1) & (potential_enhancers['ES_D10_H3K27ac'] >= 1)]
es_d0_enhancers.to_csv("ES_D0_Histone_enhancers.bed", index_label="gene_id",sep='\t', columns=["Chr", "Start", "End","name"],header=False,index=False)
es_d2_enhancers.to_csv("ES_D2_Histone_enhancers.bed", index_label="gene_id",sep='\t', columns=["Chr", "Start", "End","name"],header=False,index=False)
es_d5_enhancers.to_csv("ES_D5_Histone_enhancers.bed", index_label="gene_id",sep='\t', columns=["Chr", "Start", "End","name"],header=False,index=False)
es_d7_enhancers.to_csv("ES_D7_Histone_enhancers.bed", index_label="gene_id",sep='\t', columns=["Chr", "Start", "End","name"],header=False,index=False)
es_d10_enhancers.to_csv("ES_D10_Histone_enhancers.bed", index_label="gene_id",sep='\t', columns=["Chr", "Start", "End","name"],header=False,index=False)
# Make the Graph
raw_data = {'Cell': ['hES', 'DE', 'GT', 'FG', 'PE'],
'unmarked': [float(len(es_d0_none)),float(len(es_d2_none)),float(len(es_d5_none)),float(len(es_d7_none)),float(len(es_d10_none))],
'H3K4me1 alone': [len(es_d0_k4_only),len(es_d2_k4_only),len(es_d5_k4_only),len(es_d7_k4_only),len(es_d10_k4_only)],
'H3K4me1+, H3K27ac+': [len(es_d0_both),len(es_d2_both),len(es_d5_both),len(es_d7_both),len(es_d10_both)]}
df = pd.DataFrame(raw_data, columns = ['Cell', 'unmarked', 'H3K4me1 alone', 'H3K4me1+, H3K27ac+'])
# Create a figure with a single subplot
f, ax = plt.subplots(1, figsize=(10,5))
# Set bar width at 1
bar_width = 1
# positions of the left bar-boundaries
bar_l = [i for i in range(len(df['unmarked']))]
# positions of the x-axis ticks (center of the bars as bar labels)
tick_pos = [i+(bar_width/2) for i in bar_l]
# Create the total enhancers
totals = [i+j+k for i,j,k in zip(df['unmarked'], df['H3K4me1 alone'], df['H3K4me1+, H3K27ac+'])]
# Create the percentage of the total unmarked enhancers value for each cell was
unmarked_rel = [i / float(j) * 100 for i,j in zip(df['unmarked'], totals)]
# Create the percentage of the total H3K4me1 alone enhancers value for each cell
alone_rel = [i / float(j) * 100 for i,j in zip(df['H3K4me1 alone'], totals)]
# Create the percentage of the total H3K4me1+, H3K27ac+ enhancers value for each cell
both_rel = [i / float(j) * 100 for i,j in zip(df['H3K4me1+, H3K27ac+'], totals)]
N = 5
ind = np.arange(N) # the x locations for the groups
width = 0.5 # the width of the bars: can also be len(x) sequence
p1 = plt.bar(ind, both_rel, width, color='#E62128')
p2 = plt.bar(ind, alone_rel, width,
bottom=both_rel,color='#108DCC')
p3 = plt.bar(ind, unmarked_rel, width,
bottom=[i+j for i,j in zip(both_rel, alone_rel)],color='#D2D5D4')
plt.ylabel('% total enhancers')
plt.title('Stage')
plt.xticks(ind, ('hES', 'DE', 'GT', 'FG', 'PE'))
plt.yticks(np.arange(0, 110, 10))
plt.savefig('Histone_percentages.png')
plt.clf()