diff --git a/GRO_seq_TFSEE/box_plot_cluster_3_enhancers_rpkm.png b/GRO_seq_TFSEE/box_plot_cluster_3_enhancers_rpkm.png new file mode 100644 index 0000000000000000000000000000000000000000..0ac45f4e8bce4eb3708a0ca3b4b4ec5d1a722648 Binary files /dev/null and b/GRO_seq_TFSEE/box_plot_cluster_3_enhancers_rpkm.png differ diff --git a/GRO_seq_TFSEE/box_plot_cluster_3_tfs_fpkm.png b/GRO_seq_TFSEE/box_plot_cluster_3_tfs_fpkm.png new file mode 100644 index 0000000000000000000000000000000000000000..946058de24a2ea14dd52d55fb40dba93bcd57de3 Binary files /dev/null and b/GRO_seq_TFSEE/box_plot_cluster_3_tfs_fpkm.png differ diff --git a/GRO_seq_TFSEE/matrix_analysis.py b/GRO_seq_TFSEE/matrix_analysis.py index 141d502619482d194189f956d46688ddd9cb0fec..d15ca83775008c552ba1c104b333341f275c95d4 100644 --- a/GRO_seq_TFSEE/matrix_analysis.py +++ b/GRO_seq_TFSEE/matrix_analysis.py @@ -518,10 +518,146 @@ plt.clf() # Wilcox rank sum test: +# Cluster 1 1 e-5 +scipy.stats.ranksums(cell_tf_values_std_cluster_1['ES_D0'],cell_tf_values_std_cluster_1['ES_D2']) +scipy.stats.ranksums(cell_tf_values_std_cluster_1['ES_D0'],cell_tf_values_std_cluster_1['ES_D5']) +scipy.stats.ranksums(cell_tf_values_std_cluster_1['ES_D0'],cell_tf_values_std_cluster_1['ES_D7']) +scipy.stats.ranksums(cell_tf_values_std_cluster_1['ES_D0'],cell_tf_values_std_cluster_1['ES_D10']) + + +scipy.stats.ranksums(cell_tf_values_std_cluster_1['ES_D2'],cell_tf_values_std_cluster_1['ES_D5']) +scipy.stats.ranksums(cell_tf_values_std_cluster_1['ES_D2'],cell_tf_values_std_cluster_1['ES_D7']) +scipy.stats.ranksums(cell_tf_values_std_cluster_1['ES_D2'],cell_tf_values_std_cluster_1['ES_D10']) + +scipy.stats.ranksums(cell_tf_values_std_cluster_1['ES_D5'],cell_tf_values_std_cluster_1['ES_D7']) +scipy.stats.ranksums(cell_tf_values_std_cluster_1['ES_D5'],cell_tf_values_std_cluster_1['ES_D10']) + +scipy.stats.ranksums(cell_tf_values_std_cluster_1['ES_D7'],cell_tf_values_std_cluster_1['ES_D10']) + + +# Cluster 2 1 e-5 +scipy.stats.ranksums(cell_tf_values_std_cluster_2['ES_D0'],cell_tf_values_std_cluster_2['ES_D2']) +scipy.stats.ranksums(cell_tf_values_std_cluster_2['ES_D0'],cell_tf_values_std_cluster_2['ES_D5']) +scipy.stats.ranksums(cell_tf_values_std_cluster_2['ES_D0'],cell_tf_values_std_cluster_2['ES_D7']) +scipy.stats.ranksums(cell_tf_values_std_cluster_2['ES_D0'],cell_tf_values_std_cluster_2['ES_D10']) + + +scipy.stats.ranksums(cell_tf_values_std_cluster_2['ES_D2'],cell_tf_values_std_cluster_2['ES_D5']) +scipy.stats.ranksums(cell_tf_values_std_cluster_2['ES_D2'],cell_tf_values_std_cluster_2['ES_D7']) +scipy.stats.ranksums(cell_tf_values_std_cluster_2['ES_D2'],cell_tf_values_std_cluster_2['ES_D10']) + +scipy.stats.ranksums(cell_tf_values_std_cluster_2['ES_D5'],cell_tf_values_std_cluster_2['ES_D7']) +scipy.stats.ranksums(cell_tf_values_std_cluster_2['ES_D5'],cell_tf_values_std_cluster_2['ES_D10']) + +scipy.stats.ranksums(cell_tf_values_std_cluster_2['ES_D7'],cell_tf_values_std_cluster_2['ES_D10']) + +# Cluster 3 1 e-5 +scipy.stats.ranksums(cell_tf_values_std_cluster_3['ES_D0'],cell_tf_values_std_cluster_3['ES_D2']) +scipy.stats.ranksums(cell_tf_values_std_cluster_3['ES_D0'],cell_tf_values_std_cluster_3['ES_D5']) scipy.stats.ranksums(cell_tf_values_std_cluster_3['ES_D0'],cell_tf_values_std_cluster_3['ES_D7']) scipy.stats.ranksums(cell_tf_values_std_cluster_3['ES_D0'],cell_tf_values_std_cluster_3['ES_D10']) -scipy.stats.ranksums(cell_tf_values_std_cluster_3['ES_D10'],cell_tf_values_std_cluster_3['ES_D7']) -scipy.stats.ranksums(cell_tf_values_std_cluster_3['ES_D0'],cell_tf_values_std_cluster_3['ES_D7']) + + +scipy.stats.ranksums(cell_tf_values_std_cluster_3['ES_D2'],cell_tf_values_std_cluster_3['ES_D5']) +scipy.stats.ranksums(cell_tf_values_std_cluster_3['ES_D2'],cell_tf_values_std_cluster_3['ES_D7']) +scipy.stats.ranksums(cell_tf_values_std_cluster_3['ES_D2'],cell_tf_values_std_cluster_3['ES_D10']) + +scipy.stats.ranksums(cell_tf_values_std_cluster_3['ES_D5'],cell_tf_values_std_cluster_3['ES_D7']) +scipy.stats.ranksums(cell_tf_values_std_cluster_3['ES_D5'],cell_tf_values_std_cluster_3['ES_D10']) + +scipy.stats.ranksums(cell_tf_values_std_cluster_3['ES_D7'],cell_tf_values_std_cluster_3['ES_D10']) + +# Cluster 4 1 e-5 +scipy.stats.ranksums(cell_tf_values_std_cluster_4['ES_D0'],cell_tf_values_std_cluster_4['ES_D2']) +scipy.stats.ranksums(cell_tf_values_std_cluster_4['ES_D0'],cell_tf_values_std_cluster_4['ES_D5']) +scipy.stats.ranksums(cell_tf_values_std_cluster_4['ES_D0'],cell_tf_values_std_cluster_4['ES_D7']) +scipy.stats.ranksums(cell_tf_values_std_cluster_4['ES_D0'],cell_tf_values_std_cluster_4['ES_D10']) + + +scipy.stats.ranksums(cell_tf_values_std_cluster_4['ES_D2'],cell_tf_values_std_cluster_4['ES_D5']) +scipy.stats.ranksums(cell_tf_values_std_cluster_4['ES_D2'],cell_tf_values_std_cluster_4['ES_D7']) +scipy.stats.ranksums(cell_tf_values_std_cluster_4['ES_D2'],cell_tf_values_std_cluster_4['ES_D10']) + +scipy.stats.ranksums(cell_tf_values_std_cluster_4['ES_D5'],cell_tf_values_std_cluster_4['ES_D7']) +scipy.stats.ranksums(cell_tf_values_std_cluster_4['ES_D5'],cell_tf_values_std_cluster_4['ES_D10']) + +scipy.stats.ranksums(cell_tf_values_std_cluster_4['ES_D7'],cell_tf_values_std_cluster_4['ES_D10']) + + +# Look at Cluster 3 for expression of TF's +cluster3_tfs = tf_cell_lines.loc[cell_tf_values_std_cluster_3.index.values] + +box = cluster3_tfs.boxplot(column=['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'],patch_artist=True,showfliers=False,manage_xticks=False,widths = 0.6, medianprops = medianprops) +plt.setp(box['whiskers'], color='k', linestyle='-', linewidth = 3) +plt.setp(box['boxes'], color='k', linestyle='-', linewidth = 3) + +for patch, color in zip(box['boxes'], colors): + patch.set_facecolor(color) + +plt.tick_params(axis='y', direction='out') +plt.tick_params(axis='x', direction='out') +plt.tick_params(top='off', right='off') +plt.grid(b=False) +plt.ylim((-5,65)) +plt.xticks([1,2,3,4,5], ['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10']) +plt.savefig('box_plot_cluster_3_tfs_fpkm.png') +plt.clf() + +# Cluster tfs 1 e-4 +scipy.stats.ranksums(cluster3_tfs['ES_D0'],cluster3_tfs['ES_D2']) +scipy.stats.ranksums(cluster3_tfs['ES_D0'],cluster3_tfs['ES_D5']) +scipy.stats.ranksums(cluster3_tfs['ES_D0'],cluster3_tfs['ES_D7']) +scipy.stats.ranksums(cluster3_tfs['ES_D0'],cluster3_tfs['ES_D10']) + + +scipy.stats.ranksums(cluster3_tfs['ES_D2'],cluster3_tfs['ES_D5']) +scipy.stats.ranksums(cluster3_tfs['ES_D2'],cluster3_tfs['ES_D7']) +scipy.stats.ranksums(cluster3_tfs['ES_D2'],cluster3_tfs['ES_D10']) + +scipy.stats.ranksums(cluster3_tfs['ES_D5'],cluster3_tfs['ES_D7']) +scipy.stats.ranksums(cluster3_tfs['ES_D5'],cluster3_tfs['ES_D10']) + +scipy.stats.ranksums(cluster3_tfs['ES_D7'],cluster3_tfs['ES_D10']) + + + + +cluster3_motifs = motif_enhancers.loc[cell_tf_values_std_cluster_3.index.values] +cluster3_enhancers = only_rpkm_values.loc[cluster3_motifs.loc[:, (cluster3_motifs != 0).all(axis=0)].columns.values] + +box = cluster3_enhancers.boxplot(column=['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'],patch_artist=True,showfliers=False,manage_xticks=False,widths = 0.6, medianprops = medianprops) +plt.setp(box['whiskers'], color='k', linestyle='-', linewidth = 3) +plt.setp(box['boxes'], color='k', linestyle='-', linewidth = 3) + +for patch, color in zip(box['boxes'], colors): + patch.set_facecolor(color) + +plt.tick_params(axis='y', direction='out') +plt.tick_params(axis='x', direction='out') +plt.tick_params(top='off', right='off') +plt.grid(b=False) +plt.ylim((-5,105)) +plt.xticks([1,2,3,4,5], ['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10']) +plt.savefig('box_plot_cluster_3_enhancers_rpkm.png') +plt.clf() + +# Cluster tfs 1 e-4 +scipy.stats.ranksums(cluster3_enhancers['ES_D0'],cluster3_enhancers['ES_D2']) +scipy.stats.ranksums(cluster3_enhancers['ES_D0'],cluster3_enhancers['ES_D5']) +scipy.stats.ranksums(cluster3_enhancers['ES_D0'],cluster3_enhancers['ES_D7']) +scipy.stats.ranksums(cluster3_enhancers['ES_D0'],cluster3_enhancers['ES_D10']) + + +scipy.stats.ranksums(cluster3_enhancers['ES_D2'],cluster3_enhancers['ES_D5']) +scipy.stats.ranksums(cluster3_enhancers['ES_D2'],cluster3_enhancers['ES_D7']) +scipy.stats.ranksums(cluster3_enhancers['ES_D2'],cluster3_enhancers['ES_D10']) + +scipy.stats.ranksums(cluster3_enhancers['ES_D5'],cluster3_enhancers['ES_D7']) +scipy.stats.ranksums(cluster3_enhancers['ES_D5'],cluster3_enhancers['ES_D10']) + +scipy.stats.ranksums(cluster3_enhancers['ES_D7'],cluster3_enhancers['ES_D10']) + + ## Analysis of only RNA-seq # 1. Z-score Standardize for each cell line to see important TF's