-
Venkat Malladi authored6e3b1de3
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
ChIP.Rmd 5.79 KiB
Analysis of ChIP-seq Enhancer Prediction
Setup and Imports
source("http://bioconductor.org/biocLite.R")
biocLite("GenomicFeatures")
biocLite("org.Hs.eg.db")
library(groHMM)
library(org.Hs.eg.db)
library(GenomicAlignments)
library(GenomicFeatures)
Alignments
ES_D10_R1 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D10/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145823.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D10_R2 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D10/H3K27ac/remove-duplicates.sh-1.1.0/SRR2130154.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D2_R1 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D2/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145802.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D2_R2 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D2/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145803.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D0_R1 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D0/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145796.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D0_R2 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D0/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145795.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D7_R1 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D7/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145817.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D7_R2 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D7/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145816.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D5_R1 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D5/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145810.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D5_R2 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D5/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145809.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
# Combine replicates
ES_D10 <- c(ES_D10_R1, ES_D10_R2)
ES_D2 <- c(ES_D2_R1, ES_D2_R2)
ES_D0 <- c(ES_D0_R1, ES_D0_R1)
ES_D7 <- c(ES_D7_R1, ES_D7_R2)
ES_D5 <- c(ES_D5_R1, ES_D5_R2)
# Library
library_ES_D10 <- NROW(ES_D10)
library_ES_D2 <- NROW(ES_D2)
library_ES_D0 <- NROW(ES_D0)
library_ES_D7 <- NROW(ES_D7)
library_ES_D5 <- NROW(ES_D5)
Read in bed files
ES_D0_true <- import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D0_true.bed", format = "BED")
ES_D10_true <-import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D10_true.bed", format = "BED")
ES_D2_true <-import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D2_true.bed", format = "BED")
ES_D5_true <-import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D5_true.bed", format = "BED")
ES_D7_true <-import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D7_true.bed", format = "BED")
ES_D0_false <- import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D0_false.bed", format = "BED")
ES_D10_false <- import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D10_false.bed", format = "BED")
ES_D2_false <- import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D2_false.bed", format = "BED")
ES_D5_false <- import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D5_false.bed", format = "BED")
ES_D7_false <- import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D7_false.bed", format = "BED")
RPKM = numReads / ( geneLength/1000 * totalNumReads/1,000,000 )
# True
rpkm_ES_D10_true <- countOverlaps(ES_D10_true,ES_D10)/ (width(ES_D10_true)/1000 * library_ES_D10/1000000)
rpkm_ES_D0_true <- countOverlaps(ES_D0_true,ES_D0)/ (width(ES_D0_true)/1000 * library_ES_D0/1000000)
rpkm_ES_D2_true <- countOverlaps(ES_D2_true,ES_D2)/ (width(ES_D2_true)/1000 * library_ES_D2/1000000)
rpkm_ES_D5_true <- countOverlaps(ES_D5_true,ES_D5)/ (width(ES_D5_true)/1000 * library_ES_D5/1000000)
rpkm_ES_D7_true <- countOverlaps(ES_D7_true,ES_D7)/ (width(ES_D7_true)/1000 * library_ES_D7/1000000)
# FALSE
rpkm_ES_D10_false <- countOverlaps(ES_D10_false,ES_D10)/ (width(ES_D10_false)/1000 * library_ES_D10/1000000)
rpkm_ES_D0_false <- countOverlaps(ES_D0_false,ES_D0)/ (width(ES_D0_false)/1000 * library_ES_D0/1000000)
rpkm_ES_D2_false <- countOverlaps(ES_D2_false,ES_D2)/ (width(ES_D2_false)/1000 * library_ES_D2/1000000)
rpkm_ES_D5_false <- countOverlaps(ES_D5_false,ES_D5)/ (width(ES_D5_false)/1000 * library_ES_D5/1000000)
rpkm_ES_D7_false <- countOverlaps(ES_D7_false,ES_D7)/ (width(ES_D7_false)/1000 * library_ES_D7/1000000)
# Combine and plot
rpkm_true <- c(rpkm_ES_D10_true,rpkm_ES_D0_true,rpkm_ES_D2_true,rpkm_ES_D5_true,rpkm_ES_D7_true)
rpkm_false <- c(rpkm_ES_D10_false,rpkm_ES_D0_false,rpkm_ES_D2_false,rpkm_ES_D5_false,rpkm_ES_D7_false)
hist(rpkm_true, col="red")
hist(rpkm_false, add=T, col=rgb(0, 1, 0, 0.5) )
summary(rpkm_true)
summary(rpkm_false)
# Leads to an RPKM cutoff of 1