Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
Forked from Venkat Malladi / TFSEE
43 commits behind the upstream repository.
histone_enhancer_plots.py 5.78 KiB
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

rpkm_file = pd.read_csv("Histone_pe_filtered_peaks.tsv", sep='\t')


# Filter for potential enhancers with both H3K27ac and H3K4me1 are >= 1 or active enhancers
potential_enhancers = rpkm_file[((rpkm_file['ES_D0_H3K4me1'] >=1) & (rpkm_file['ES_D0_H3K27ac'] < 1)) | ((rpkm_file['ES_D2_H3K4me1'] >=1) & (rpkm_file['ES_D2_H3K27ac'] < 1))
 | ((rpkm_file['ES_D5_H3K4me1'] >=1) & (rpkm_file['ES_D5_H3K27ac'] < 1))  | ((rpkm_file['ES_D7_H3K4me1'] >=1) & (rpkm_file['ES_D7_H3K27ac'] < 1))  | ((rpkm_file['ES_D10_H3K4me1'] >=1) & (rpkm_file['ES_D10_H3K27ac'] < 1))]


# Filter for each cell type

ES_D0 = potential_enhancers.filter(items=['ES_D0_H3K4me1', 'ES_D0_H3K27ac'])
ES_D2 = potential_enhancers.filter(items=['ES_D2_H3K4me1', 'ES_D2_H3K27ac'])
ES_D5 = potential_enhancers.filter(items=['ES_D5_H3K4me1', 'ES_D5_H3K27ac'])
ES_D7 = potential_enhancers.filter(items=['ES_D7_H3K4me1', 'ES_D7_H3K27ac'])
ES_D10 = potential_enhancers.filter(items=['ES_D10_H3K4me1', 'ES_D10_H3K27ac'])

# get numbers for each
es_d0_k4_only =  ES_D0[(ES_D0['ES_D0_H3K4me1'] >=1) & (ES_D0['ES_D0_H3K27ac'] < 1)]
es_d0_both =  ES_D0[(ES_D0['ES_D0_H3K4me1'] >=1) & (ES_D0['ES_D0_H3K27ac'] >= 1)]
es_d0_none = ES_D0[(ES_D0['ES_D0_H3K4me1'] < 1) & (ES_D0['ES_D0_H3K27ac'] < 1)]

es_d2_k4_only =  ES_D2[(ES_D2['ES_D2_H3K4me1'] >=1) & (ES_D2['ES_D2_H3K27ac'] < 1)]
es_d2_both =  ES_D2[(ES_D2['ES_D2_H3K4me1'] >=1) & (ES_D2['ES_D2_H3K27ac'] >= 1)]
es_d2_none = ES_D2[(ES_D2['ES_D2_H3K4me1'] < 1) & (ES_D2['ES_D2_H3K27ac'] < 1)]

es_d5_k4_only =  ES_D5[(ES_D5['ES_D5_H3K4me1'] >=1) & (ES_D5['ES_D5_H3K27ac'] < 1)]
es_d5_both =  ES_D5[(ES_D5['ES_D5_H3K4me1'] >=1) & (ES_D5['ES_D5_H3K27ac'] >= 1)]
es_d5_none = ES_D5[(ES_D5['ES_D5_H3K4me1'] < 1) & (ES_D5['ES_D5_H3K27ac'] < 1)]

es_d7_k4_only =  ES_D7[(ES_D7['ES_D7_H3K4me1'] >=1) & (ES_D7['ES_D7_H3K27ac'] < 1)]
es_d7_both =  ES_D7[(ES_D7['ES_D7_H3K4me1'] >=1) & (ES_D7['ES_D7_H3K27ac'] >= 1)]
es_d7_none = ES_D7[(ES_D7['ES_D7_H3K4me1'] < 1) & (ES_D7['ES_D7_H3K27ac'] < 1)]

es_d10_k4_only =  ES_D10[(ES_D10['ES_D10_H3K4me1'] >=1) & (ES_D10['ES_D10_H3K27ac'] < 1)]
es_d10_both =  ES_D10[(ES_D10['ES_D10_H3K4me1'] >=1) & (ES_D10['ES_D10_H3K27ac'] >= 1)]
es_d10_none = ES_D10[(ES_D10['ES_D10_H3K4me1'] < 1) & (ES_D10['ES_D10_H3K27ac'] < 1)]


# Write out files for each processing
potential_enhancers.to_csv("ES_Histone_universe_enhancers.tsv",sep='\t',header=True,index=False)
es_d0_enhancers =  potential_enhancers[(potential_enhancers['ES_D0_H3K4me1'] >=1) & (potential_enhancers['ES_D0_H3K27ac'] >= 1)]
es_d2_enhancers =  potential_enhancers[(potential_enhancers['ES_D2_H3K4me1'] >=1) & (potential_enhancers['ES_D2_H3K27ac'] >= 1)]
es_d5_enhancers =  potential_enhancers[(potential_enhancers['ES_D5_H3K4me1'] >=1) & (potential_enhancers['ES_D5_H3K27ac'] >= 1)]
es_d7_enhancers =  potential_enhancers[(potential_enhancers['ES_D7_H3K4me1'] >=1) & (potential_enhancers['ES_D7_H3K27ac'] >= 1)]
es_d10_enhancers =  potential_enhancers[(potential_enhancers['ES_D10_H3K4me1'] >=1) & (potential_enhancers['ES_D10_H3K27ac'] >= 1)]

es_d0_enhancers.to_csv("ES_D0_Histone_enhancers.bed", index_label="gene_id",sep='\t', columns=["Chr", "Start", "End","name"],header=False,index=False)
es_d2_enhancers.to_csv("ES_D2_Histone_enhancers.bed", index_label="gene_id",sep='\t', columns=["Chr", "Start", "End","name"],header=False,index=False)
es_d5_enhancers.to_csv("ES_D5_Histone_enhancers.bed", index_label="gene_id",sep='\t', columns=["Chr", "Start", "End","name"],header=False,index=False)
es_d7_enhancers.to_csv("ES_D7_Histone_enhancers.bed", index_label="gene_id",sep='\t', columns=["Chr", "Start", "End","name"],header=False,index=False)
es_d10_enhancers.to_csv("ES_D10_Histone_enhancers.bed", index_label="gene_id",sep='\t', columns=["Chr", "Start", "End","name"],header=False,index=False)

# Make the Graph
raw_data = {'Cell': ['hES', 'DE', 'GT', 'FG', 'PE'],
        'unmarked': [float(len(es_d0_none)),float(len(es_d2_none)),float(len(es_d5_none)),float(len(es_d7_none)),float(len(es_d10_none))],
        'H3K4me1 alone': [len(es_d0_k4_only),len(es_d2_k4_only),len(es_d5_k4_only),len(es_d7_k4_only),len(es_d10_k4_only)],
        'H3K4me1+, H3K27ac+': [len(es_d0_both),len(es_d2_both),len(es_d5_both),len(es_d7_both),len(es_d10_both)]}

df = pd.DataFrame(raw_data, columns = ['Cell', 'unmarked', 'H3K4me1 alone', 'H3K4me1+, H3K27ac+'])

# Create a figure with a single subplot
f, ax = plt.subplots(1, figsize=(10,5))

# Set bar width at 1
bar_width = 1

# positions of the left bar-boundaries
bar_l = [i for i in range(len(df['unmarked']))]

# positions of the x-axis ticks (center of the bars as bar labels)
tick_pos = [i+(bar_width/2) for i in bar_l]

# Create the total enhancers
totals = [i+j+k for i,j,k in zip(df['unmarked'], df['H3K4me1 alone'], df['H3K4me1+, H3K27ac+'])]

# Create the percentage of the total unmarked enhancers value for each cell was
unmarked_rel = [i / float(j) * 100 for  i,j in zip(df['unmarked'], totals)]

# Create the percentage of the total H3K4me1 alone enhancers value for each cell
alone_rel = [i / float(j) * 100 for  i,j in zip(df['H3K4me1 alone'], totals)]

# Create the percentage of the total H3K4me1+, H3K27ac+ enhancers value for each cell
both_rel = [i / float(j) * 100 for  i,j in zip(df['H3K4me1+, H3K27ac+'], totals)]


N = 5
ind = np.arange(N)    # the x locations for the groups
width = 0.5       # the width of the bars: can also be len(x) sequence

p1 = plt.bar(ind, both_rel, width, color='#E62128')
p2 = plt.bar(ind, alone_rel, width,
             bottom=both_rel,color='#108DCC')
p3 = plt.bar(ind, unmarked_rel, width,
              bottom=[i+j for i,j in zip(both_rel, alone_rel)],color='#D2D5D4')

plt.ylabel('% total enhancers')
plt.title('Stage')
plt.xticks(ind, ('hES', 'DE', 'GT', 'FG', 'PE'))
plt.yticks(np.arange(0, 110, 10))

plt.savefig('Histone_percentages.png')
plt.clf()