diff --git a/GRO_seq_TFSEE/closest_genes.py b/GRO_seq_TFSEE/closest_genes.py index a91da84b0f47a24cee2e6c7247e791cbfc5d5fcb..320acf93953a44243610b00994d91eda2c2c9751 100644 --- a/GRO_seq_TFSEE/closest_genes.py +++ b/GRO_seq_TFSEE/closest_genes.py @@ -11,7 +11,7 @@ import scipy fpkm = pd.read_table("rna.tsv") gene_names_mapping = pd.read_csv("../gencode.v19.annotation_protein_coding_ids.txt",names=['gene_id', 'symbol']) fpkm_symbol = fpkm.merge(gene_names_mapping) -fpkm_symbol = fpkm_symbol.set_index(['gene_id']) +fpkm_symbol = fpkm.set_index(['gene_id']) # Enhancers enhancers_universe = pd.DataFrame.from_csv("GRO-seq_enhancers.bed", sep="\t", header=None, index_col=3) @@ -52,7 +52,7 @@ plt.xticks([1,2,3,4,5], ['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10']) plt.savefig('box_plot_cluster_4_genes_fpkm.png') plt.clf() -# Cluster tfs 1 e-4 +# Cluster tfs 0.05 scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D2']) scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D5']) scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D7']) @@ -78,17 +78,17 @@ enhancers_universe_cluster_3.to_csv("cluster_3_enhancers_locations.bed", sep="\t # Read in nearest genes -genes_id = pd.DataFrame.from_csv("cluster_4_genes.txt", sep="\t", header=None, index_col=None) +genes_id = pd.DataFrame.from_csv("cluster_3_genes.txt", sep="\t", header=None, index_col=None) needed_rows = [row for row in fpkm_symbol.index if row in genes_id[0].values] -cluster4_genes_expressed = fpkm_symbol.loc[needed_rows] +cluster3_genes_expressed = fpkm_symbol.loc[needed_rows] # col_colors plt.style.use('classic') colors = ["#FFD66F","#2E6A44","#862743", "#4FA6C7", "#3398CC"] medianprops = dict(linestyle='-', linewidth=2, color='black') -box = cluster4_genes_expressed.boxplot(column=['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'],patch_artist=True,showfliers=False,manage_xticks=False,widths = 0.6, medianprops = medianprops) +box = cluster3_genes_expressed.boxplot(column=['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'],patch_artist=True,showfliers=False,manage_xticks=False,widths = 0.6, medianprops = medianprops) plt.setp(box['whiskers'], color='k', linestyle='-', linewidth = 5) plt.setp(box['boxes'], color='k', linestyle='-', linewidth = 5) @@ -99,23 +99,23 @@ plt.tick_params(axis='y', direction='out') plt.tick_params(axis='x', direction='out') plt.tick_params(top='off', right='off') plt.grid(b=False) -plt.ylim((-5,60)) +plt.ylim((-5,50)) plt.xticks([1,2,3,4,5], ['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10']) -plt.savefig('box_plot_cluster_4_genes_fpkm.png') +plt.savefig('box_plot_cluster_3_genes_fpkm.png') plt.clf() # Cluster tfs 1 e-4 -scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D2']) -scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D5']) -scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D7']) -scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D10']) +scipy.stats.ranksums(cluster3_genes_expressed['ES_D0'],cluster3_genes_expressed['ES_D2']) +scipy.stats.ranksums(cluster3_genes_expressed['ES_D0'],cluster3_genes_expressed['ES_D5']) +scipy.stats.ranksums(cluster3_genes_expressed['ES_D0'],cluster3_genes_expressed['ES_D7']) +scipy.stats.ranksums(cluster3_genes_expressed['ES_D0'],cluster3_genes_expressed['ES_D10']) -scipy.stats.ranksums(cluster4_genes_expressed['ES_D2'],cluster4_genes_expressed['ES_D5']) -scipy.stats.ranksums(cluster4_genes_expressed['ES_D2'],cluster4_genes_expressed['ES_D7']) -scipy.stats.ranksums(cluster4_genes_expressed['ES_D2'],cluster4_genes_expressed['ES_D10']) +scipy.stats.ranksums(cluster3_genes_expressed['ES_D2'],cluster3_genes_expressed['ES_D5']) +scipy.stats.ranksums(cluster3_genes_expressed['ES_D2'],cluster3_genes_expressed['ES_D7']) +scipy.stats.ranksums(cluster3_genes_expressed['ES_D2'],cluster3_genes_expressed['ES_D10']) -scipy.stats.ranksums(cluster4_genes_expressed['ES_D5'],cluster4_genes_expressed['ES_D7']) -scipy.stats.ranksums(cluster4_genes_expressed['ES_D5'],cluster4_genes_expressed['ES_D10']) +scipy.stats.ranksums(cluster3_genes_expressed['ES_D5'],cluster3_genes_expressed['ES_D7']) +scipy.stats.ranksums(cluster3_genes_expressed['ES_D5'],cluster3_genes_expressed['ES_D10']) -scipy.stats.ranksums(cluster4_genes_expressed['ES_D7'],cluster4_genes_expressed['ES_D10']) +scipy.stats.ranksums(cluster3_genes_expressed['ES_D7'],cluster3_genes_expressed['ES_D10'])