Skip to content
Snippets Groups Projects
Commit cd3784ba authored by Venkat Malladi's avatar Venkat Malladi
Browse files

get 1kb center of enhancer ans figures.

parent 7010f0ea
Branches
Tags
No related merge requests found
Showing
with 13052 additions and 462 deletions
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Gro_percentages.png

15.4 KiB

This diff is collapsed.
This diff is collapsed.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
ssp_file = pd.read_csv("SSP_sum.tsv", sep='\t')
ssp_file_filtered = pd.read_csv("SSP_filtered_peaks.tsv" , sep='\t')
sunp_potential_enhancers = pd.read_csv("SUNP_filtered_peaks.tsv" , sep='\t')
named_enhancers = list(ssp_file_filtered['name'])
ssp_potential_enhancers = ssp_file[ssp_file['name'].isin(named_enhancers)]
ssp_potential_enhancers.to_csv('SSP_filtered_peaks_sum.tsv', header=True, index=True, sep='\t')
# get numbers for each
es_d0_none = len(ssp_potential_enhancers[ssp_potential_enhancers['ES_D0'] < 0.5 ]) + len(sunp_potential_enhancers[sunp_potential_enhancers['ES_D0'] < 1])
es_d0_enhancers = len(ssp_potential_enhancers[ssp_potential_enhancers['ES_D0'] >= 0.5 ]) + len(sunp_potential_enhancers[sunp_potential_enhancers['ES_D0'] >= 1])
es_d2_none = len(ssp_potential_enhancers[ssp_potential_enhancers['ES_D2'] < 0.5 ]) + len(sunp_potential_enhancers[sunp_potential_enhancers['ES_D2'] < 1])
es_d2_enhancers = len(ssp_potential_enhancers[ssp_potential_enhancers['ES_D2'] >= 0.5 ]) + len(sunp_potential_enhancers[sunp_potential_enhancers['ES_D2'] >= 1])
es_d5_none = len(ssp_potential_enhancers[ssp_potential_enhancers['ES_D5'] < 0.5 ]) + len(sunp_potential_enhancers[sunp_potential_enhancers['ES_D5'] < 1])
es_d5_enhancers = len(ssp_potential_enhancers[ssp_potential_enhancers['ES_D5'] >= 0.5 ]) + len(sunp_potential_enhancers[sunp_potential_enhancers['ES_D5'] >= 1])
es_d7_none = len(ssp_potential_enhancers[ssp_potential_enhancers['ES_D7'] < 0.5 ]) + len(sunp_potential_enhancers[sunp_potential_enhancers['ES_D7'] < 1])
es_d7_enhancers = len(ssp_potential_enhancers[ssp_potential_enhancers['ES_D7'] >= 0.5 ]) + len(sunp_potential_enhancers[sunp_potential_enhancers['ES_D7'] >= 1])
es_d10_none = len(ssp_potential_enhancers[ssp_potential_enhancers['ES_D10'] < 0.5 ]) + len(sunp_potential_enhancers[sunp_potential_enhancers['ES_D10'] < 1])
es_d10_enhancers = len(ssp_potential_enhancers[ssp_potential_enhancers['ES_D10'] >= 0.5 ]) + len(sunp_potential_enhancers[sunp_potential_enhancers['ES_D10'] >= 1])
raw_data = {'Cell': ['hES', 'DE', 'GT', 'FG', 'PE'],
'unmarked': [float(es_d0_none),float(es_d2_none),float(es_d5_none),float(es_d7_none),float(es_d10_none)],
'GRO-seq': [es_d0_enhancers,es_d2_enhancers ,es_d5_enhancers,es_d7_enhancers,es_d10_enhancers]}
df = pd.DataFrame(raw_data, columns = ['Cell', 'unmarked', 'GRO-seq'])
# Create a figure with a single subplot
f, ax = plt.subplots(1, figsize=(10,5))
# Set bar width at 1
bar_width = 1
# positions of the left bar-boundaries
bar_l = [i for i in range(len(df['unmarked']))]
# positions of the x-axis ticks (center of the bars as bar labels)
tick_pos = [i+(bar_width/2) for i in bar_l]
# Create the total enhancers
totals = [i+j for i,j in zip(df['unmarked'], df['GRO-seq''])]
# Create the percentage of the total unmarked enhancers value for each cell was
unmarked_rel = [i / float(j) * 100 for i,j in zip(df['unmarked'], totals)]
# Create the percentage of the total H3K4me1 alone enhancers value for each cell
alone_rel = [i / float(j) * 100 for i,j in zip(df['GRO-seq'], totals)]
N = 5
ind = np.arange(N) # the x locations for the groups
width = 0.5 # the width of the bars: can also be len(x) sequence
p1 = plt.bar(ind, alone_rel, width, color='#E62128')
p2 = plt.bar(ind, unmarked_rel, width,
bottom=alone_rel,color='#D2D5D4')
plt.ylabel('% total enhancers')
plt.title('Stage')
plt.xticks(ind, ('hES', 'DE', 'GT', 'FG', 'PE'))
plt.yticks(np.arange(0, 110, 10))
plt.savefig('Gro_percentages.png')
plt.clf()
# Get 1kb centered files
short_short = pd.read_csv("short_short_paired_1kb.bed", sep='\t', header=None, names= ['chr', 'start', 'end', 'name', 'score', 'strand'])
short_unpaired = pd.read_csv("short_unpaired_1kb.bed", sep='\t', header=None, names= ['chr', 'start', 'end', 'name', 'score', 'strand'])
universe_enhancers = pd.concat([short_short,short_unpaired])
es_d0_names = list(ssp_potential_enhancers[ssp_potential_enhancers['ES_D0'] >= 0.5 ]['name']) + list(sunp_potential_enhancers[sunp_potential_enhancers['ES_D0'] >= 1 ]['name'])
es_d2_names = list(ssp_potential_enhancers[ssp_potential_enhancers['ES_D2'] >= 0.5 ]['name']) + list(sunp_potential_enhancers[sunp_potential_enhancers['ES_D2'] >= 1 ]['name'])
es_d5_names = list(ssp_potential_enhancers[ssp_potential_enhancers['ES_D5'] >= 0.5 ]['name']) + list(sunp_potential_enhancers[sunp_potential_enhancers['ES_D5'] >= 1 ]['name'])
es_d7_names = list(ssp_potential_enhancers[ssp_potential_enhancers['ES_D7'] >= 0.5 ]['name']) + list(sunp_potential_enhancers[sunp_potential_enhancers['ES_D7'] >= 1 ]['name'])
es_d10_names = list(ssp_potential_enhancers[ssp_potential_enhancers['ES_D10'] >= 0.5 ]['name']) + list(sunp_potential_enhancers[sunp_potential_enhancers['ES_D10'] >= 1 ]['name'])
es_d0_enhancers = universe_enhancers[universe_enhancers['name'].isin(es_d0_names)]
es_d2_enhancers = universe_enhancers[universe_enhancers['name'].isin(es_d2_names)]
es_d5_enhancers = universe_enhancers[universe_enhancers['name'].isin(es_d5_names)]
es_d7_enhancers = universe_enhancers[universe_enhancers['name'].isin(es_d7_names)]
es_d10_enhancers = universe_enhancers[universe_enhancers['name'].isin(es_d10_names)]
es_d0_enhancers.to_csv("ES_D0_gro-seq_enhancers.bed", index_label="gene_id",sep='\t', columns=["chr", "start", "end","name"],header=False,index=False)
es_d2_enhancers.to_csv("ES_D2_gro-seq_enhancers.bed", index_label="gene_id",sep='\t', columns=["chr", "start", "end","name"],header=False,index=False)
es_d5_enhancers.to_csv("ES_D5_gro-seq_enhancers.bed", index_label="gene_id",sep='\t', columns=["chr", "start", "end","name"],header=False,index=False)
es_d7_enhancers.to_csv("ES_D7_gro-seq_enhancers.bed", index_label="gene_id",sep='\t', columns=["chr", "start", "end","name"],header=False,index=False)
es_d10_enhancers.to_csv("ES_D10_gro-seq_enhancers.bed", index_label="gene_id",sep='\t', columns=["chr", "start", "end","name"],header=False,index=False)
......@@ -70,8 +70,37 @@ bedtools intersect -a /Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT
./cutoff_analysis_gro.py --rpkm SUNP_sum.tsv --color '#FF7C21' --factor SUNP -l 1
# Filter SSP Paired and merge
cut -f4 SSP_filtered_peaks.bed | sort -u > t
for i in $(cat t); do grep $i short_short_paired_1kb.bed >> gro_seq_potential_enhancers.bed; done
cut -f4 SUNP_filtered_peaks.bed | sort -u > t
for i in $(cat t); do grep $i short_unpaired_1kb.bed >> gro_seq_potential_enhancers.bed; done
for i in $(cat t); do grep $i SSP_filtered_peaks.bed > tt; bedtools merge -c 4 -o distinct -i tt >> SSP_filtered_peaks_merged.bed; done
# Acutal Processing
python gro-seq_enhancer_plots.py
bedtools sort -i ES_D0_gro-seq_enhancers.bed > ES_D0_gro-seq_enhancers_1kb.bed
bedtools sort -i ES_D2_gro-seq_enhancers.bed > ES_D2_gro-seq_enhancers_1kb.bed
bedtools sort -i ES_D5_gro-seq_enhancers.bed > ES_D5_gro-seq_enhancers_1kb.bed
bedtools sort -i ES_D7_gro-seq_enhancers.bed > ES_D7_gro-seq_enhancers_1kb.bed
bedtools sort -i ES_D10_gro-seq_enhancers.bed > ES_D10_gro-seq_enhancers_1kb.bed
# Meme processing
#!/bin/bash
#SBATCH --job-name=fasta_processing
#SBATCH --partition=super
#SBATCH --nodes=2
#SBATCH --ntasks=64
#SBATCH --time=0-36:00:00
#SBATCH --output=fasta_processing.%j.out
#SBATCH --error=fasta_processing.%j.err
#SBATCH --mail-user=venkat.malladi@utsouthwestern.edu
#SBATCH --mail-type=ALL
# Fasta file processing
module load bedtools
bedtools getfasta -fi /project/apps_database/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa -bed ES_D0_gro-seq_enhancers_1kb.bed -fo ES_D0_gro-seq_enhancers.fasta -name
bedtools getfasta -fi /project/apps_database/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa -bed ES_D2_gro-seq_enhancers_1kb.bed -fo ES_D2_gro-seq_enhancers.fasta -name
bedtools getfasta -fi /project/apps_database/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa -bed ES_D5_gro-seq_enhancers_1kb.bed -fo ES_D5_gro-seq_enhancers.fasta -name
bedtools getfasta -fi /project/apps_database/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa -bed ES_D7_gro-seq_enhancers_1kb.bed -fo ES_D7_gro-seq_enhancers.fasta -name
bedtools getfasta -fi /project/apps_database/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa -bed ES_D10_gro-seq_enhancers_1kb.bed -fo ES_D10_gro-seq_enhancers.fasta -name
This diff is collapsed.
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment