Skip to content
Snippets Groups Projects
Commit 0e8e488c authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Update Histone analysis.

parent 81b2a391
Branches
No related merge requests found
......@@ -59,3 +59,54 @@ plt.xlabel('Rank Order',fontsize=8, fontweight='bold')
plt.ylabel('delta z',fontsize=8, fontweight='bold')
plt.savefig('cluster3_enriched_tfs.png')
plt.clf()
tfsee_cluster2 = tfsee[tfsee['cluster'] == 1]
tfsee_cluster2['early'] = tfsee_cluster2[['ES_D0','ES_D2','ES_D7','ES_D10']].mean(axis=1)
tfsee_cluster2['late'] = tfsee_cluster2[['ES_D5']].mean(axis=1)
tfsee_cluster2['diff'] = tfsee_cluster2['late'] - tfsee_cluster2['early']
tfsee_cluster2['rank'] = tfsee_cluster2['diff'].rank()
x = list(tfsee_cluster2['rank'])
z = np.polyfit(tfsee_cluster2['rank'], tfsee_cluster2['diff'], 3)
f = np.poly1d(z)
x_new = np.linspace(1, 24, num=len(x)*10)
plt.figure(figsize=(25,20))
plt.plot(x_new, f(x_new), color = 'k', linewidth=4.0)
plt.scatter(x=tfsee_cluster2['rank'], y=tfsee_cluster2['diff'], color='#C42555', s=600)
plt.ylim([-0.5,3.0])
plt.suptitle('GT TFS', fontsize=8, fontweight='bold')
plt.xlabel('Rank Order',fontsize=8, fontweight='bold')
plt.ylabel('delta z',fontsize=8, fontweight='bold')
plt.savefig('cluster2_enriched_tfs.png')
plt.clf()
tfsee_cluster1 = tfsee[tfsee['cluster'] == 0]
tfsee_cluster1['early'] = tfsee_cluster1[['ES_D0','ES_D2','ES_D7','ES_D10']].mean(axis=1)
tfsee_cluster1['late'] = tfsee_cluster1[['ES_D5']].mean(axis=1)
tfsee_cluster1['diff'] = tfsee_cluster1['early'] - tfsee_cluster1['late']
tfsee_cluster1['rank'] = tfsee_cluster1['diff'].rank()
x = list(tfsee_cluster1['rank'])
z = np.polyfit(tfsee_cluster1['rank'], tfsee_cluster1['diff'], 3)
f = np.poly1d(z)
x_new = np.linspace(1, 64, num=len(x)*10)
plt.figure(figsize=(25,20))
plt.plot(x_new, f(x_new), color = 'k', linewidth=4.0)
plt.scatter(x=tfsee_cluster1['rank'], y=tfsee_cluster1['diff'], color='#DACF5D', s=600)
plt.ylim([-1.0,1.5])
plt.suptitle('Not GT TFS', fontsize=8, fontweight='bold')
plt.xlabel('Rank Order',fontsize=8, fontweight='bold')
plt.ylabel('delta z',fontsize=8, fontweight='bold')
plt.savefig('cluster1_enriched_tfs.png')
plt.clf()
Histone_TFSEE/box_plot_cluster_2_genes_fpkm.png

35.2 KiB

Histone_TFSEE/box_plot_cluster_3_genes_fpkm.png

35.2 KiB

import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
# Find nearest genes
# Grab TF FPKM levels
fpkm = pd.read_table("rna.tsv")
gene_names_mapping = pd.read_csv("../gencode.v19.annotation_protein_coding_ids.txt",names=['gene_id', 'symbol'])
fpkm_symbol = fpkm.merge(gene_names_mapping)
fpkm_symbol = fpkm.set_index(['gene_id'])
# Enhancers
enhancers_universe = pd.DataFrame.from_csv("Histone_pe_filtered_peaks.bed", sep="\t", header=None, index_col=3)
# Read in cluster 3 enhancers
cluster_3 = pd.DataFrame.from_csv("cluster_3_enhancers.csv", sep=",", header=0, index_col=0)
# Choose enhacners exprssed in cluster 3
enhancers_universe_cluster_3 = enhancers_universe.loc[cluster_3.index.values]
enhancers_universe_cluster_3.to_csv("cluster_3_enhancers_locations.bed", sep="\t",header=None, index=False)
# Read in nearest genes
genes_id = pd.DataFrame.from_csv("cluster_3_genes.txt", sep="\t", header=None, index_col=None)
needed_rows = [row for row in fpkm_symbol.index if row in genes_id[0].values]
cluster3_genes_expressed = fpkm_symbol.loc[needed_rows]
# col_colors
plt.style.use('classic')
colors = ["#FFD66F","#2E6A44","#862743", "#4FA6C7", "#3398CC"]
medianprops = dict(linestyle='-', linewidth=12, color='black')
box = cluster3_genes_expressed.boxplot(column=['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'],patch_artist=True,showfliers=False,manage_xticks=False,widths = 0.6, medianprops = medianprops)
plt.setp(box['whiskers'], color='k', linestyle='-', linewidth = 12)
plt.setp(box['boxes'], color='k', linestyle='-', linewidth = 12)
for patch, color in zip(box['boxes'], colors):
patch.set_facecolor(color)
plt.tick_params(axis='y', direction='out')
plt.tick_params(axis='x', direction='out')
plt.tick_params(top='off', right='off')
plt.grid(b=False)
plt.ylim((-5,70))
plt.xticks([1,2,3,4,5], ['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'])
plt.savefig('box_plot_cluster_3_genes_fpkm.png')
plt.clf()
# Cluster tfs 1 0.05
scipy.stats.ranksums(cluster3_genes_expressed['ES_D0'],cluster3_genes_expressed['ES_D2'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D0'],cluster3_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D0'],cluster3_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D0'],cluster3_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D2'],cluster3_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D2'],cluster3_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D2'],cluster3_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D5'],cluster3_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D5'],cluster3_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D7'],cluster3_genes_expressed['ES_D10'])
# Read in cluster 2 enhancers
cluster_2 = pd.DataFrame.from_csv("cluster_2_enhancers.csv", sep=",", header=0, index_col=0)
# Choose enhacners exprssed in cluster 2
enhancers_universe_cluster_2 = enhancers_universe.loc[cluster_2.index.values]
enhancers_universe_cluster_2.to_csv("cluster_2_enhancers_locations.bed", sep="\t",header=None, index=False)
# Read in nearest genes
genes_id = pd.DataFrame.from_csv("cluster_2_genes.txt", sep="\t", header=None, index_col=None)
needed_rows = [row for row in fpkm_symbol.index if row in genes_id[0].values]
cluster2_genes_expressed = fpkm_symbol.loc[needed_rows]
# col_colors
plt.style.use('classic')
colors = ["#FFD66F","#2E6A44","#862743", "#4FA6C7", "#3398CC"]
medianprops = dict(linestyle='-', linewidth=12, color='black')
box = cluster2_genes_expressed.boxplot(column=['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'],patch_artist=True,showfliers=False,manage_xticks=False,widths = 0.6, medianprops = medianprops)
plt.setp(box['whiskers'], color='k', linestyle='-', linewidth = 12)
plt.setp(box['boxes'], color='k', linestyle='-', linewidth = 12)
for patch, color in zip(box['boxes'], colors):
patch.set_facecolor(color)
plt.tick_params(axis='y', direction='out')
plt.tick_params(axis='x', direction='out')
plt.tick_params(top='off', right='off')
plt.grid(b=False)
plt.ylim((-5,70))
plt.xticks([1,2,3,4,5], ['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'])
plt.savefig('box_plot_cluster_2_genes_fpkm.png')
plt.clf()
# Cluster tfs 1 e-4
scipy.stats.ranksums(cluster2_genes_expressed['ES_D0'],cluster2_genes_expressed['ES_D2'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D0'],cluster2_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D0'],cluster2_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D0'],cluster2_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D2'],cluster2_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D2'],cluster2_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D2'],cluster2_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D5'],cluster2_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D5'],cluster2_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D7'],cluster2_genes_expressed['ES_D10'])
#Closest genes Cluster 3
bedtools sort -i cluster_3_enhancers_locations.bed | bedtools closest -a - -b gencode.v19.annotation_protein_coding_sorted.gtf | cut -f12 | cut -f1 -d ';' | cut -f2 -d ' ' | sort | uniq | sed 's/"//g' > cluster_3_genes.txt
#Closest genes Cluster 2
bedtools sort -i cluster_2_enhancers_locations.bed | bedtools closest -a - -b gencode.v19.annotation_protein_coding_sorted.gtf | cut -f12 | cut -f1 -d ';' | cut -f2 -d ' ' | sort | uniq | sed 's/"//g' > cluster_2_genes.txt
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment