Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • venkat.malladi/tfsee
  • gcrb/tfsee
Show changes
Showing
with 365 additions and 0 deletions
analysis/GRO_seq_TFSEE/box_plot_cluster_1_tfs_fpkm.png

11.6 KiB

analysis/GRO_seq_TFSEE/box_plot_cluster_2.png

13.9 KiB

analysis/GRO_seq_TFSEE/box_plot_cluster_2_enhancers_rpkm.png

14.1 KiB

analysis/GRO_seq_TFSEE/box_plot_cluster_2_genes_fpkm.png

11.4 KiB

analysis/GRO_seq_TFSEE/box_plot_cluster_2_tfs_fpkm.png

12 KiB

analysis/GRO_seq_TFSEE/box_plot_cluster_3.png

9.87 KiB

analysis/GRO_seq_TFSEE/box_plot_cluster_3_enhancers_rpkm.png

11.4 KiB

analysis/GRO_seq_TFSEE/box_plot_cluster_3_genes_fpkm.png

11.1 KiB

analysis/GRO_seq_TFSEE/box_plot_cluster_3_tfs_fpkm.png

12 KiB

analysis/GRO_seq_TFSEE/box_plot_cluster_4.png

10.1 KiB

analysis/GRO_seq_TFSEE/box_plot_cluster_4_enhancers_rpkm.png

12.9 KiB

analysis/GRO_seq_TFSEE/box_plot_cluster_4_genes_fpkm.png

12 KiB

analysis/GRO_seq_TFSEE/box_plot_cluster_4_tfs_fpkm.png

12.6 KiB

import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
# Find nearest genes
# Grab TF FPKM levels
fpkm = pd.read_table("rna.tsv")
gene_names_mapping = pd.read_csv("../gencode.v19.annotation_protein_coding_ids.txt",names=['gene_id', 'symbol'])
fpkm_symbol = fpkm.merge(gene_names_mapping)
fpkm_symbol = fpkm.set_index(['gene_id'])
# Enhancers
enhancers_universe = pd.DataFrame.from_csv("GRO-seq_enhancers.bed", sep="\t", header=None, index_col=3)
# Read in cluster 4 enhancers
cluster_4 = pd.DataFrame.from_csv("cluster_4_enhancers.csv", sep=",", header=0, index_col=0)
# Choose enhacners exprssed in cluster 4
enhancers_universe_cluster_4 = enhancers_universe.loc[cluster_4.index.values]
enhancers_universe_cluster_4.to_csv("cluster_4_enhancers_locations.bed", sep="\t",header=None, index=False)
# Read in nearest genes
genes_id = pd.DataFrame.from_csv("cluster_4_genes.txt", sep="\t", header=None, index_col=None)
needed_rows = [row for row in fpkm_symbol.index if row in genes_id[0].values]
cluster4_genes_expressed = fpkm_symbol.loc[needed_rows]
# col_colors
plt.style.use('classic')
colors = ["#FFD66F","#2E6A44","#862743", "#4FA6C7", "#3398CC"]
medianprops = dict(linestyle='-', linewidth=2, color='black')
box = cluster4_genes_expressed.boxplot(column=['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'],patch_artist=True,showfliers=False,manage_xticks=False,widths = 0.6, medianprops = medianprops)
plt.setp(box['whiskers'], color='k', linestyle='-', linewidth = 5)
plt.setp(box['boxes'], color='k', linestyle='-', linewidth = 5)
for patch, color in zip(box['boxes'], colors):
patch.set_facecolor(color)
plt.tick_params(axis='y', direction='out')
plt.tick_params(axis='x', direction='out')
plt.tick_params(top='off', right='off')
plt.grid(b=False)
plt.ylim((-5,60))
plt.xticks([1,2,3,4,5], ['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'])
plt.savefig('box_plot_cluster_4_genes_fpkm.png')
plt.clf()
# Cluster tfs 0.05
scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D2'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D0'],cluster4_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D2'],cluster4_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D2'],cluster4_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D2'],cluster4_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D5'],cluster4_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D5'],cluster4_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster4_genes_expressed['ES_D7'],cluster4_genes_expressed['ES_D10'])
# Read in cluster 3 enhancers
cluster_3 = pd.DataFrame.from_csv("cluster_3_enhancers.csv", sep=",", header=0, index_col=0)
# Choose enhacners exprssed in cluster 3
enhancers_universe_cluster_3 = enhancers_universe.loc[cluster_3.index.values]
enhancers_universe_cluster_3.to_csv("cluster_3_enhancers_locations.bed", sep="\t",header=None, index=False)
# Read in nearest genes
genes_id = pd.DataFrame.from_csv("cluster_3_genes.txt", sep="\t", header=None, index_col=None)
needed_rows = [row for row in fpkm_symbol.index if row in genes_id[0].values]
cluster3_genes_expressed = fpkm_symbol.loc[needed_rows]
# col_colors
plt.style.use('classic')
colors = ["#FFD66F","#2E6A44","#862743", "#4FA6C7", "#3398CC"]
medianprops = dict(linestyle='-', linewidth=2, color='black')
box = cluster3_genes_expressed.boxplot(column=['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'],patch_artist=True,showfliers=False,manage_xticks=False,widths = 0.6, medianprops = medianprops)
plt.setp(box['whiskers'], color='k', linestyle='-', linewidth = 5)
plt.setp(box['boxes'], color='k', linestyle='-', linewidth = 5)
for patch, color in zip(box['boxes'], colors):
patch.set_facecolor(color)
plt.tick_params(axis='y', direction='out')
plt.tick_params(axis='x', direction='out')
plt.tick_params(top='off', right='off')
plt.grid(b=False)
plt.ylim((-5,50))
plt.xticks([1,2,3,4,5], ['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'])
plt.savefig('box_plot_cluster_3_genes_fpkm.png')
plt.clf()
# Cluster tfs 1 e-4
scipy.stats.ranksums(cluster3_genes_expressed['ES_D0'],cluster3_genes_expressed['ES_D2'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D0'],cluster3_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D0'],cluster3_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D0'],cluster3_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D2'],cluster3_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D2'],cluster3_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D2'],cluster3_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D5'],cluster3_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D5'],cluster3_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster3_genes_expressed['ES_D7'],cluster3_genes_expressed['ES_D10'])
# Read in cluster 1 enhancers
cluster_1 = pd.DataFrame.from_csv("cluster_1_enhancers.csv", sep=",", header=0, index_col=0)
# Choose enhacners exprssed in cluster 1
enhancers_universe_cluster_1 = enhancers_universe.loc[cluster_1.index.values]
enhancers_universe_cluster_1.to_csv("cluster_1_enhancers_locations.bed", sep="\t",header=None, index=False)
# Read in nearest genes
genes_id = pd.DataFrame.from_csv("cluster_1_genes.txt", sep="\t", header=None, index_col=None)
needed_rows = [row for row in fpkm_symbol.index if row in genes_id[0].values]
cluster1_genes_expressed = fpkm_symbol.loc[needed_rows]
# col_colors
plt.style.use('classic')
colors = ["#FFD66F","#2E6A44","#862743", "#4FA6C7", "#3398CC"]
medianprops = dict(linestyle='-', linewidth=2, color='black')
box = cluster1_genes_expressed.boxplot(column=['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'],patch_artist=True,showfliers=False,manage_xticks=False,widths = 0.6, medianprops = medianprops)
plt.setp(box['whiskers'], color='k', linestyle='-', linewidth = 5)
plt.setp(box['boxes'], color='k', linestyle='-', linewidth = 5)
for patch, color in zip(box['boxes'], colors):
patch.set_facecolor(color)
plt.tick_params(axis='y', direction='out')
plt.tick_params(axis='x', direction='out')
plt.tick_params(top='off', right='off')
plt.grid(b=False)
plt.ylim((-5,55))
plt.xticks([1,2,3,4,5], ['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'])
plt.savefig('box_plot_cluster_1_genes_fpkm.png')
plt.clf()
# Cluster tfs 1 e-4
scipy.stats.ranksums(cluster1_genes_expressed['ES_D0'],cluster1_genes_expressed['ES_D2'])
scipy.stats.ranksums(cluster1_genes_expressed['ES_D0'],cluster1_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster1_genes_expressed['ES_D0'],cluster1_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster1_genes_expressed['ES_D0'],cluster1_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster1_genes_expressed['ES_D2'],cluster1_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster1_genes_expressed['ES_D2'],cluster1_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster1_genes_expressed['ES_D2'],cluster1_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster1_genes_expressed['ES_D5'],cluster1_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster1_genes_expressed['ES_D5'],cluster1_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster1_genes_expressed['ES_D7'],cluster1_genes_expressed['ES_D10'])
# Read in cluster 2 enhancers
cluster_2 = pd.DataFrame.from_csv("cluster_2_enhancers.csv", sep=",", header=0, index_col=0)
# Choose enhacners exprssed in cluster 2
enhancers_universe_cluster_2 = enhancers_universe.loc[cluster_2.index.values]
enhancers_universe_cluster_2.to_csv("cluster_2_enhancers_locations.bed", sep="\t",header=None, index=False)
# Read in nearest genes
genes_id = pd.DataFrame.from_csv("cluster_2_genes.txt", sep="\t", header=None, index_col=None)
needed_rows = [row for row in fpkm_symbol.index if row in genes_id[0].values]
cluster2_genes_expressed = fpkm_symbol.loc[needed_rows]
# col_colors
plt.style.use('classic')
colors = ["#FFD66F","#2E6A44","#862743", "#4FA6C7", "#3398CC"]
medianprops = dict(linestyle='-', linewidth=2, color='black')
box = cluster2_genes_expressed.boxplot(column=['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'],patch_artist=True,showfliers=False,manage_xticks=False,widths = 0.6, medianprops = medianprops)
plt.setp(box['whiskers'], color='k', linestyle='-', linewidth = 5)
plt.setp(box['boxes'], color='k', linestyle='-', linewidth = 5)
for patch, color in zip(box['boxes'], colors):
patch.set_facecolor(color)
plt.tick_params(axis='y', direction='out')
plt.tick_params(axis='x', direction='out')
plt.tick_params(top='off', right='off')
plt.grid(b=False)
plt.ylim((-5,50))
plt.xticks([1,2,3,4,5], ['ES_D0', 'ES_D2', 'ES_D5', 'ES_D7', 'ES_D10'])
plt.savefig('box_plot_cluster_2_genes_fpkm.png')
plt.clf()
# Cluster tfs 1 e-4
scipy.stats.ranksums(cluster2_genes_expressed['ES_D0'],cluster2_genes_expressed['ES_D2'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D0'],cluster2_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D0'],cluster2_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D0'],cluster2_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D2'],cluster2_genes_expressed['ES_D5'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D2'],cluster2_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D2'],cluster2_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D5'],cluster2_genes_expressed['ES_D7'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D5'],cluster2_genes_expressed['ES_D10'])
scipy.stats.ranksums(cluster2_genes_expressed['ES_D7'],cluster2_genes_expressed['ES_D10'])
#Closest genes Cluster 4
bedtools sort -i cluster_4_enhancers_locations.bed | bedtools closest -a - -b gencode.v19.annotation_protein_coding_sorted.gtf | cut -f12 | cut -f1 -d ';' | cut -f2 -d ' ' | sort | uniq | sed 's/"//g' > cluster_4_genes.txt
#Closest genes Cluster 3
bedtools sort -i cluster_3_enhancers_locations.bed | bedtools closest -a - -b gencode.v19.annotation_protein_coding_sorted.gtf | cut -f12 | cut -f1 -d ';' | cut -f2 -d ' ' | sort | uniq | sed 's/"//g' > cluster_3_genes.txt
#Closest genes Cluster 1
bedtools sort -i cluster_1_enhancers_locations.bed | bedtools closest -a - -b gencode.v19.annotation_protein_coding_sorted.gtf | cut -f12 | cut -f1 -d ';' | cut -f2 -d ' ' | sort | uniq | sed 's/"//g' > cluster_1_genes.txt
#Closest genes Cluster 2
bedtools sort -i cluster_2_enhancers_locations.bed | bedtools closest -a - -b gencode.v19.annotation_protein_coding_sorted.gtf | cut -f12 | cut -f1 -d ';' | cut -f2 -d ' ' | sort | uniq | sed 's/"//g' > cluster_2_genes.txt
This diff is collapsed.
analysis/GRO_seq_TFSEE/cluster1_enriched_tfs.png

96.2 KiB

This diff is collapsed.
analysis/GRO_seq_TFSEE/cluster2_enriched_tfs.png

85.6 KiB

This diff is collapsed.