Commit 80b1e5df authored by Venkat Malladi's avatar Venkat Malladi

Add in recount data.

parent 87282494
# Recount TCGA and GTEX data sets
source('http://bioconductor.org/biocLite.R')
biocLite('recount')
biocLite("org.Hs.eg.db")
install.packages("Rmisc")
library('recount')
library('org.Hs.eg.db')
library(ggplot2)
library(stringr)
library(reshape2)
library(Rmisc)
#GTEX breast
load(file.path('recount/rse_gene_breast_GTEX.Rdata'))
rpkm_breast_gtex <- getRPKM(scale_counts(rse_gene, by = 'mapped_reads'))
#TCGA breast
load(file.path('recount/rse_gene_TCGA.Rdata'))
#rse_tcga <- scale_counts(rse_gene)
#rpkm_tcga <- getRPKM(scale_counts(rse_gene))
load(file.path('recount/rse_TCGA.Rdata'))
load(file.path('recount/rpkm_TCGA.Rdata'))
#TCGAbiolinksGUI Metadata
load('recount/TCGA-BRCA_clinical.rda')
load('recount/TCGA-BRCA_Gene_expression_Gene_expression_quantification_hg19.rda')
# Get only set of Genes
# SNGH3 ENSG00000242125.3
# SNGH4 ENSG00000281398.2
host_gtex <-data.frame(t(rpkm_breast_gtex[which(rownames(rpkm_breast_gtex) %in% c("ENSG00000242125.3","ENSG00000281398.2")),]))
colnames(host_gtex) <- c('SNGH3','SNGH4')
phen <- data.frame(rse_tcga$bigwig_file,rse_tcga$gdc_cases.project.primary_site, rse_tcga$cgc_sample_sample_type,rse_tcga$cgc_case_pathologic_stage, rse_tcga$xml_breast_carcinoma_estrogen_receptor_status, rse_tcga$xml_breast_carcinoma_progesterone_receptor_status,rse_tcga$gdc_cases.samples.submitter_id)
colnames(phen) <- c('Experiment', 'Site', 'Type', 'Stage', 'ER', 'PR','sample')
phen$Experiment <- gsub(".bw", "", phen$Experiment)
rownames(phen) <- phen$Experiment
host_tcga <- data.frame(t(rpkm_tcga[which(rownames(rpkm_tcga) %in% c("ENSG00000242125.3","ENSG00000281398.2")),]))
colnames(host_tcga) <- c('SNGH3','SNGH4')
host_tcga['Experiment'] <- rownames(host_tcga)
tt <- merge(phen,host_tcga)
host_tcga_breast <- tt[which(tt$Site %in% c('Breast')),]
tcga_meta_data <- colData(data)[c('sample','subtype_PAM50.mRNA')]
colnames(tcga_meta_data) <- c('sample', 'PAM')
host_tcga_breast_pam <- merge(host_tcga_breast, tcga_meta_data, by.x=c("sample"), by.y=c("sample"))
# Merge TCGA and GTeX
df_pam <- data.frame(host_tcga_breast_pam[,c('SNGH3','SNGH4','PAM','Type')])
host_gtex$PAM <- "GTEX"
host_gtex$Type <- "GTEX"
df_tcag_gtex <- rbind(df_pam,host_gtex)
# Update Metaststic to Primary Tumor Type
df_tcag_gtex$Type[df_tcag_gtex$Type == 'Metastatic'] <- 'Primary Tumor'
df_tcag_gtex_pam <- df_tcag_gtex[complete.cases(df_tcag_gtex[ , 3]),]
df_tcag_gtex_type <- df_tcag_gtex[complete.cases(df_tcag_gtex[ , 4]),]
# Plot based SNGH3
df_SNGH3_type <- data.frame(df_tcag_gtex_type[,c('SNGH3','Type')])
p <- ggplot(aes(y = log2(SNGH3+1), x = Type, fill= Type ), data = df_SNGH3_type,) + stat_boxplot(geom ='errorbar',lwd=1.5) + geom_boxplot(lwd=1.5,outlier.size = 1.5,outlier.shape = NA) + labs(y="log2(RPKM+1)",x="Type", title="SNGH3") + theme_bw() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.ticks=element_line(size=1)) + scale_y_continuous(limits=c(0,6))
jpeg('figures/SNGH3_breast_correlation_type.jpg')
p
dev.off()
df_SNGH3_pam <- data.frame(df_tcag_gtex_pam[,c('SNGH3','PAM')])
p <- ggplot(aes(y = log2(SNGH3+1), x = PAM, fill= PAM ), data = df_SNGH3_pam,) + stat_boxplot(geom ='errorbar',lwd=1.5) + geom_boxplot(lwd=1.5,outlier.size = 1.5,outlier.shape = NA) + labs(y="log2(RPKM+1)",x="Type",title="SNGH3") + theme_bw() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.ticks=element_line(size=1)) + scale_y_continuous(limits=c(0,6))
jpeg('figures/SNGH3_breast_correlation_pam.jpg')
p
dev.off()
# Plot based SNGH4
df_SNGH4_type <- data.frame(df_tcag_gtex_type[,c('SNGH4','Type')])
p <- ggplot(aes(y = log2(SNGH4+1), x = Type, fill= Type ), data = df_SNGH4_type,) + stat_boxplot(geom ='errorbar',lwd=1.5) + geom_boxplot(lwd=1.5,outlier.size = 1.5,outlier.shape = NA) + labs(y="log2(RPKM+1)",x="Type", title="SNGH4") + theme_bw() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.ticks=element_line(size=1)) + scale_y_continuous(limits=c(0,3))
jpeg('figures/SNGH4_breast_correlation_type.jpg')
p
dev.off()
df_SNGH4_pam <- data.frame(df_tcag_gtex_pam[,c('SNGH4','PAM')])
p <- ggplot(aes(y = log2(SNGH4+1), x = PAM, fill= PAM ), data = df_SNGH4_pam,) + stat_boxplot(geom ='errorbar',lwd=1.5) + geom_boxplot(lwd=1.5,outlier.size = 1.5,outlier.shape = NA) + labs(y="log2(RPKM+1)",x="Type",title="SNGH4") + theme_bw() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.ticks=element_line(size=1)) + scale_y_continuous(limits=c(0,3))
jpeg('figures/SNGH4_breast_correlation_pam.jpg')
p
dev.off()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment