tcga_expression.Rmd 5.2 KB
Newer Older
1 2
source("https://bioconductor.org/biocLite.R")
biocLite("SummarizedExperiment")
3
install.packages("Rmisc")
4
library(SummarizedExperiment)
5 6
library(reshape2)
library(ggplot2)
7
library(Rmisc)
8 9 10 11 12

load('/Users/venkatmalladi/TCGAbiolinksGUI/TCGA-BRCA_Gene_expression_Gene_expression_quantification_hg19.rda')
load('/Users/venkatmalladi/TCGAbiolinksGUI/TCGA-BRCA_clinical.rda')

# Analysis # Read in host gene
13
bound <- read.csv("./mcf-7-basal-snoRNA_bound_overlap_host.txt", header=F, sep='\t')
14 15 16 17 18

bound$V1 <- as.character(bound$V1)

bound_gene_expression = data[rowRanges(data)$ensembl_gene_id %in% list(bound$V1)[[1]],]

19
bound_SNGH3 = data[rowRanges(data)$ensembl_gene_id %in% ('ENSG00000242125'),]
20

21
unbound <- read.csv("./mcf-7-basal-snoRNA_unbound_overlap_host.txt", header=F, sep='\t')
22 23 24 25 26 27

unbound$V1 <- as.character(unbound$V1)

unbound_gene_expression = data[rowRanges(data)$ensembl_gene_id %in% list(unbound$V1)[[1]],]


28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
bound_rpkm <- data.frame(t(assays(bound_gene_expression)$normalized_count))
bound_rpkm['barcode'] <- rownames(bound_rpkm)
columna_data_bound <- colData(bound_gene_expression)
unbound_rpkm <- data.frame(t(assays(unbound_gene_expression)$normalized_count))
columna_data_unbound <- colData(unbound_gene_expression)
unbound_rpkm['barcode'] <- rownames(unbound_rpkm)

columna_data_bound_subest <- columna_data_bound[c('barcode',"subtype_PAM50.mRNA")]
bound_rpkm_patients <- data.frame(merge(bound_rpkm,columna_data_bound_subest,by="barcode"))

columna_data_unbound_subest <- columna_data_unbound[c('barcode',"subtype_PAM50.mRNA")]
unbound_rpkm_patients <- data.frame(merge(unbound_rpkm,columna_data_unbound_subest,by="barcode"))

bound_rpkm_patients.m <- melt(na.omit(within(bound_rpkm_patients, rm("barcode"))), id=c("subtype_PAM50.mRNA"),na.rm = TRUE)
bound_rpkm_patients.m$Parp1 <- "bound"

unbound_rpkm_patients.m <- melt(na.omit(within(unbound_rpkm_patients, rm("barcode"))), id=c("subtype_PAM50.mRNA"),na.rm = TRUE)
unbound_rpkm_patients.m$Parp1 <- "unbound"

rpkm_patients <- rbind(bound_rpkm_patients.m,unbound_rpkm_patients.m)

jpeg('boxplot-bound.jpg')
ggplot(rpkm_patients, aes(x =subtype_PAM50.mRNA, y = log10(value+1), fill=Parp1)) + geom_boxplot() + labs(y="FPKM",x="Subtype")
dev.off()
52 53

jpeg('boxplot-bound_unbound.jpg')
54
boxplot(log10(unbound_rpkm+1),log10(bound_rpkm+1),col=(c("orange","blue")),yaxt="n", cex.axis=1,las=2,lwd=4,lty=1)
55 56 57 58
axis(2, at=seq(-10,25,5))
dev.off()


59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
# SNHG3
bound_rpkm <- data.frame(t(assays(bound_SNGH3)$normalized_count))
bound_rpkm['barcode'] <- rownames(bound_rpkm)
columna_data_bound <- colData(bound_SNGH3)


columna_data_bound_subest <- columna_data_bound[c('barcode',"subtype_PAM50.mRNA")]
bound_rpkm_patients <- data.frame(merge(bound_rpkm,columna_data_bound_subest,by="barcode"))

columna_data_unbound_subest <- columna_data_unbound[c('barcode',"subtype_PAM50.mRNA")]
unbound_rpkm_patients <- data.frame(merge(unbound_rpkm,columna_data_unbound_subest,by="barcode"))

bound_rpkm_patients.m <- melt(na.omit(within(bound_rpkm_patients, rm("barcode"))), id=c("subtype_PAM50.mRNA"),na.rm = TRUE)

tgc <- summarySE(bound_rpkm_patients.m, measurevar="value", groupvars=c("subtype_PAM50.mRNA"))

jpeg('barplot_SNGH3.jpg')
ggplot(tgc, aes(x=subtype_PAM50.mRNA, y=value)) +
    geom_bar(position=position_dodge(), stat="identity",
             colour="black", # Use black outlines,
             size=.3) +      # Thinner lines
    geom_errorbar(aes(ymin=value-se, ymax=value+se),
                  size=.3,    # Thinner lines
                  width=.2,
                  position=position_dodge(.9)) +
    xlab("Subtype") +
    ylab("FPKM") +
    theme_bw()
dev.off()
88 89 90 91 92 93 94

# Seperate into High ESR1,FOXA1, DRAIC

genes <- c("ENSG00000245750",'ENSG00000091831','ENSG00000129514')

expression_152 <- data[rowRanges(data)$ensembl_gene_id %in% genes,]

95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
data_expression_152 <- data.frame(t(data.frame(assays(expression_152)$normalized_count)))
columna_data_expression_152 <- colData(expression_152)

summary(as.factor( columna_data_expression_152$subtype_PAM50.mRNA))

t = gsub('\\.','-',rownames(data_expression_152))
rownames(data_expression_152) <- t
data_expression_152['barcode'] <- t

columna_data_expression_152_subest <- columna_data_expression_152[c('barcode',"subtype_PAM50.mRNA")]

data_patients <- data.frame(merge(data_expression_152,columna_data_expression_152_subest,by="barcode"))

data_patients_na_removed <- na.omit(data_patients)

data_subset <- data.frame(data_patients_na_removed[c("FOXA1","RP11.279F6.1","subtype_PAM50.mRNA")])
data_subset.m <- melt(data_subset, id=c("subtype_PAM50.mRNA"),na.rm = TRUE)




 pdf('boxplot-DRAIC.pdf')
 ggplot(data_subset.m, aes(x =variable, y = log10(value+1), fill=subtype_PAM50.mRNA))  +
     geom_violin(trim=FALSE)
 dev.off()



for (id in tbl$submitter_id) {
    print (grep(id,t)[1])
    }
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142

# Filter

data_expression_152_low_ESR1 <- data_expression_152[data_expression_152[,1] <= quantile(data_expression_152[1,],c(.10)),]
data_expression_152_high_ESR1 <- data_expression_152[data_expression_152[,1] > quantile(data_expression_152[1,],c(.10)),]

draic <- data.frame()

draic$low <- data_expression_152_low_ESR1[3]

jpeg('boxplot-DRAIC.jpg')
boxplot(log2(unlistdata_expression_152_low_ESR1[3]+1),log2(data_expression_152_high_ESR1[3]+1),col=(c("orange","blue")), cex.axis=1,las=2,lwd=4,lty=1)

dev.off()


TCGA-BRCA_clinical.rda