Skip to content
Snippets Groups Projects
Commit f1d15a09 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Added in rank order anaylsis, and analysis for cluster 3.

parent d3461a96
Branches
No related merge requests found
GRO_seq_TFSEE/box_plot_cluster_3_enhancers_rpkm.png

12.7 KiB

GRO_seq_TFSEE/box_plot_cluster_3_tfs_fpkm.png

12.1 KiB

GRO_seq_TFSEE/cluster3_enriched_tfs.png

96.1 KiB

GRO_seq_TFSEE/cluster4_enriched_tfs.png

103 KiB

This diff is collapsed.
symbol,ES_D0,ES_D2,ES_D5,ES_D7,ES_D10
ATF4,212.425,195.03,196.87,163.705,170.855
BACH1,10.78,10.375,12.685,15.295,11.275
CENPB,20.49,15.53,20.99,21.09,25.775
CREB1,33.125,28.415,30.6,30.75,29.89
CTCF,31.91,26.56,32.98,25.825,35.205
E2F4,45.005,31.09,42.27,33.1,39.23
E2F6,12.765,12.05,13.85,15.7,13.92
E2F8,3.76,0.915,4.715,0.98,5.53
ELK4,6.175,5.23,7.985,4.75,5.61
FOXD3,12.855,0.57,0.0,0.0,0.0
FOXJ3,19.505,21.48,34.235,47.38,24.415
FOXK1,2.245,7.215,8.445,6.16,7.32
GABPA,7.11,9.695,9.135,7.24,7.335
GLI2,17.815,4.495,1.7,0.37,1.075
HINFP,17.6,17.74,20.445,22.71,16.795
IRF2,6.345,5.31,6.055,8.94,6.925
KLF1,0.32,0.805,1.18,0.59,0.45
KLF16,17.695,18.915,10.375,6.245,6.515
MAFG,14.79,14.55,9.585,13.625,12.465
NR2C2,11.36,11.285,12.59,10.985,11.54
NRL,0.635,1.2,0.86,0.67,1.13
OTX2,16.925,441.385,30.42,3.66,1.185
PLAG1,3.42,3.9,4.71,6.5,4.775
RARG,13.71,12.405,9.065,4.305,2.855
REST,14.235,18.47,22.54,21.62,17.925
SP1,25.235,42.015,35.205,28.575,32.855
SP2,6.75,6.92,10.8,8.075,12.81
SP4,3.915,1.865,3.535,3.225,3.745
SP8,1.075,2.045,0.515,0.295,0.05
TCF4,21.4,12.385,19.12,26.95,17.01
TFAP4,17.345,11.925,23.92,8.635,8.93
ZIC3,27.06,95.46,4.98,0.695,0.085
ZNF143,24.635,31.6,23.82,23.235,25.555
ZNF263,11.53,10.735,14.55,12.815,15.215
......@@ -302,8 +302,8 @@ for h in headers:
# Note 5 TFs not represented ['TCFE2A', 'RAR', 'ZFP423', 'RXR', 'TCFCP2L1']
tf_cell_lines.columns = new_headers
tf_cell_lines = tf_fpkm[subset]
tf_cell_lines.columns = new_headers
tf_cell_lines = tf_fpkm[subset]
# Log2 scale FPKM
x = tf_cell_lines.stack()
......@@ -393,7 +393,7 @@ from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
from sklearn.manifold import TSNE
plt..style.use('classic')
plt.style.use('classic')
#TNSE
......@@ -663,6 +663,81 @@ scipy.stats.ranksums(cluster4_enhancers['ES_D5'],cluster4_enhancers['ES_D10'])
scipy.stats.ranksums(cluster4_enhancers['ES_D7'],cluster4_enhancers['ES_D10'])
# Look at Cluster 3 for expression of TF's
cluster3_tfs = tf_cell_lines.loc[cell_tf_values_std_cluster_3.index.values]
cluster3_tfs.to_csv("cluster_3_tfs.csv")
box = cluster3_tfs.boxplot(column=['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'],patch_artist=True,showfliers=False,manage_xticks=False,widths = 0.6, medianprops = medianprops)
plt.setp(box['whiskers'], color='k', linestyle='-', linewidth = 3)
plt.setp(box['boxes'], color='k', linestyle='-', linewidth = 3)
for patch, color in zip(box['boxes'], colors):
patch.set_facecolor(color)
plt.tick_params(axis='y', direction='out')
plt.tick_params(axis='x', direction='out')
plt.tick_params(top='off', right='off')
plt.grid(b=False)
plt.ylim((-5,55))
plt.xticks([1,2,3,4,5], ['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'])
plt.savefig('box_plot_cluster_3_tfs_fpkm.png')
plt.clf()
# Cluster tfs 1 e-3 (NS)
scipy.stats.ranksums(cluster3_tfs['ES_D0'],cluster3_tfs['ES_D2'])
scipy.stats.ranksums(cluster3_tfs['ES_D0'],cluster3_tfs['ES_D5'])
scipy.stats.ranksums(cluster3_tfs['ES_D0'],cluster3_tfs['ES_D7'])
scipy.stats.ranksums(cluster3_tfs['ES_D0'],cluster3_tfs['ES_D10'])
scipy.stats.ranksums(cluster3_tfs['ES_D2'],cluster3_tfs['ES_D5'])
scipy.stats.ranksums(cluster3_tfs['ES_D2'],cluster3_tfs['ES_D7'])
scipy.stats.ranksums(cluster3_tfs['ES_D2'],cluster3_tfs['ES_D10'])
scipy.stats.ranksums(cluster3_tfs['ES_D5'],cluster3_tfs['ES_D7'])
scipy.stats.ranksums(cluster3_tfs['ES_D5'],cluster3_tfs['ES_D10'])
scipy.stats.ranksums(cluster3_tfs['ES_D7'],cluster3_tfs['ES_D10'])
cluster3_motifs = motif_enhancers.loc[cell_tf_values_std_cluster_3.index.values]
cluster3_enhancers = only_rpkm_values.loc[cluster3_motifs.loc[:, (cluster3_motifs != 0).all(axis=0)].columns.values]
cluster3_enhancers.to_csv("cluster_3_enhancers.csv")
box = cluster3_enhancers.boxplot(column=['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'],patch_artist=True,showfliers=False,manage_xticks=False,widths = 0.6, medianprops = medianprops)
plt.setp(box['whiskers'], color='k', linestyle='-', linewidth = 3)
plt.setp(box['boxes'], color='k', linestyle='-', linewidth = 3)
for patch, color in zip(box['boxes'], colors):
patch.set_facecolor(color)
plt.tick_params(axis='y', direction='out')
plt.tick_params(axis='x', direction='out')
plt.tick_params(top='off', right='off')
plt.grid(b=False)
plt.ylim((-5,65))
plt.xticks([1,2,3,4,5], ['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'])
plt.savefig('box_plot_cluster_3_enhancers_rpkm.png')
plt.clf()
# Cluster tfs 1 e-12
scipy.stats.ranksums(cluster3_enhancers['ES_D0'],cluster3_enhancers['ES_D2'])
scipy.stats.ranksums(cluster3_enhancers['ES_D0'],cluster3_enhancers['ES_D5'])
scipy.stats.ranksums(cluster3_enhancers['ES_D0'],cluster3_enhancers['ES_D7'])
scipy.stats.ranksums(cluster3_enhancers['ES_D0'],cluster3_enhancers['ES_D10'])
scipy.stats.ranksums(cluster3_enhancers['ES_D2'],cluster3_enhancers['ES_D5'])
scipy.stats.ranksums(cluster3_enhancers['ES_D2'],cluster3_enhancers['ES_D7'])
scipy.stats.ranksums(cluster3_enhancers['ES_D2'],cluster3_enhancers['ES_D10'])
scipy.stats.ranksums(cluster3_enhancers['ES_D5'],cluster3_enhancers['ES_D7'])
scipy.stats.ranksums(cluster3_enhancers['ES_D5'],cluster3_enhancers['ES_D10'])
scipy.stats.ranksums(cluster3_enhancers['ES_D7'],cluster3_enhancers['ES_D10'])
## Analysis of only RNA-seq
# 1. Z-score Standardize for each cell line to see important TF's
......
import numpy as np
import pandas as pd
import csv
import matplotlib.pyplot as plt
tfsee = pd.read_csv('clustering_tfs.csv')
tfsee_cluster4 = tfsee[tfsee['cluster'] == 3]
tfsee_cluster4['early'] = tfsee_cluster4[['ES_D0','ES_D2','ES_D5']].mean(axis=1)
tfsee_cluster4['late'] = tfsee_cluster4[['ES_D7','ES_D10']].mean(axis=1)
tfsee_cluster4['diff'] = tfsee_cluster4['late'] - tfsee_cluster4['early']
tfsee_cluster4['rank'] = tfsee_cluster4['diff'].rank()
x = list(tfsee_cluster4['rank'])
z = np.polyfit(tfsee_cluster4['rank'], tfsee_cluster4['diff'], 3)
f = np.poly1d(z)
x_new = np.linspace(1, 36, num=len(x)*10)
plt.figure(figsize=(25,20))
plt.plot(x_new, f(x_new), color = 'k', linewidth=4.0)
plt.scatter(x=tfsee_cluster4['rank'], y=tfsee_cluster4['diff'], color='#3999CA', s=600)
plt.ylim([-0.5,3.5])
plt.suptitle('Late Differentiation Enriched TFS', fontsize=8, fontweight='bold')
plt.xlabel('Rank Order',fontsize=8, fontweight='bold')
plt.ylabel('delta z',fontsize=8, fontweight='bold')
plt.savefig('cluster4_enriched_tfs.png')
plt.clf()
tfsee_cluster3 = tfsee[tfsee['cluster'] == 2]
tfsee_cluster3['early'] = tfsee_cluster3[['ES_D0','ES_D2','ES_D5']].mean(axis=1)
tfsee_cluster3['late'] = tfsee_cluster3[['ES_D7','ES_D10']].mean(axis=1)
tfsee_cluster3['diff'] = tfsee_cluster3['early'] - tfsee_cluster3['late']
tfsee_cluster3['rank'] = tfsee_cluster3['diff'].rank()
x = list(tfsee_cluster3['rank'])
z = np.polyfit(tfsee_cluster3['rank'], tfsee_cluster3['diff'], 3)
f = np.poly1d(z)
x_new = np.linspace(1, 36, num=len(x)*10)
plt.figure(figsize=(25,20))
plt.plot(x_new, f(x_new), color = 'k', linewidth=4.0)
plt.scatter(x=tfsee_cluster3['rank'], y=tfsee_cluster3['diff'], color='#316A45', s=600)
plt.ylim([-0.5,3.5])
plt.suptitle('Early Enriched TFS', fontsize=8, fontweight='bold')
plt.xlabel('Rank Order',fontsize=8, fontweight='bold')
plt.ylabel('delta z',fontsize=8, fontweight='bold')
plt.savefig('cluster3_enriched_tfs.png')
plt.clf()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment