Commit 87282494 authored by Venkat Malladi's avatar Venkat Malladi

First pass at RNA-seq analysis.

parent 6806c4a7
DDX21 RNA-seq Analsysis
=====================================
## Setup and Imports
```{r init}
source("http://bioconductor.org/biocLite.R")
biocLite("cummeRbund")
install.packages("stringr")
install.packages("ggplot2")
install.packages("VennDiagram")
install.packages("reshape")
install.packages("eulerr")
library(cummeRbund)
library(stringr)
library(ggplot2)
library(VennDiagram)
library(reshape)
library(eulerr)
```
## Initialize cuffData.db and establish connection
```{r cuff}
cuff_diff_path="/Volumes/project/GCRB/Lee_Lab/s163035/MCF7_DDX21_inducibile_kd_Daeseok/compare-expression-cufflinks.sh-2.0.0"
gtf_path="/Volumes/project/GCRB/Lee_Lab/s163035/MCF7_DDX21_inducibile_kd_Daeseok/transcripts/merge-transcripts-cufflinks.sh-1.0.0/merged.gtf"
genome_path="/Volumes/project/apps_database/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa"
cuff <- readCufflinks(dir=cuff_diff_path,gtfFile=gtf_path,genome=genome_path)
```
### Significant gene expression differences (FDR <5%)
```{r sigGenes}
alpha<-0.05
sigGeneIDs<-getSig(cuff,alpha=alpha)
sigGenes<-getGenes(cuff,sigGeneIDs)
```
### Get the table of the significant differentially expressed genes
```{r sigTable}
mySigMat<-sigMatrix(cuff,level='genes',alpha=alpha)
jpeg('figures/sig_matrix.jpg')
mySigMat
dev.off()
mySigTable<-getSigTable(cuff,alpha=alpha,level='genes')
mySigFrame <- as.data.frame(mySigTable)
geneNames<-unique(featureNames(sigGenes))
rownames(geneNames)<-geneNames[,1]
mySigFrameGenes<-merge(mySigFrame,geneNames, by=0)
sigFPKMs<-fpkmMatrix(sigGenes)
mysigFPKMs<-merge(sigFPKMs,geneNames, by=0)
```
### Look at the set of differentially expressed genes
```{r diff}
# LUC vs KD
diffgenes_kd<-mySigFrameGenes[which(mySigFrameGenes$lucvskd==1),]
diffgenes_kd_FPKM<-mysigFPKMs[which(mySigFrameGenes$lucvskd==1),]
diffgenes_kd_FPKM$fc <- log(diffgenes_kd_FPKM$kd/diffgenes_kd_FPKM$luc,2)
kd_up_genes <- diffgenes_kd_FPKM[which(diffgenes_kd_FPKM$fc>1),]
kd_down_genes <- diffgenes_kd_FPKM[which(diffgenes_kd_FPKM$fc< -1),]
kd_genes <- diffgenes_kd_FPKM[which(diffgenes_kd_FPKM$fc>1 | diffgenes_kd_FPKM$fc< -1 ),]
# Genes
cat("up: ", NROW(unique(kd_up_genes$gene_short_name)), "\n")
cat("down: ", NROW(unique(kd_down_genes$gene_short_name)), "\n")
write.table(kd_up_genes$gene_short_name, "tables/kd_up_genes.txt",sep="\t", row.names = FALSE)
write.table(kd_down_genes$gene_short_name, "tables/kd_down_genes.txt",sep="\t", row.names = FALSE)
# LUC vs WT
diffgenes_wt<-mySigFrameGenes[which(mySigFrameGenes$lucvswt==1),]
diffgenes_wt_FPKM<-mysigFPKMs[which(mySigFrameGenes$lucvswt==1),]
diffgenes_wt_FPKM$fc <- log(diffgenes_wt_FPKM$wt/diffgenes_wt_FPKM$luc,2)
wt_up_genes <- diffgenes_wt_FPKM[which(diffgenes_wt_FPKM$fc>1),]
wt_down_genes <- diffgenes_wt_FPKM[which(diffgenes_wt_FPKM$fc< -1),]
wt_genes <- diffgenes_wt_FPKM[which(diffgenes_wt_FPKM$fc>1 | diffgenes_wt_FPKM$fc< -1 ),]
# Genes
cat("up: ", NROW(unique(wt_up_genes$gene_short_name)), "\n")
cat("down: ", NROW(unique(wt_down_genes$gene_short_name)), "\n")
write.table(wt_up_genes$gene_short_name, "tables/wt_up_genes.txt",sep="\t", row.names = FALSE)
write.table(wt_down_genes$gene_short_name, "tables/wt_down_genes.txt",sep="\t", row.names = FALSE)
# LUC vs MT
diffgenes_mt<-mySigFrameGenes[which(mySigFrameGenes$lucvsmt==1),]
diffgenes_mt_FPKM<-mysigFPKMs[which(mySigFrameGenes$lucvsmt==1),]
diffgenes_mt_FPKM$fc <- log(diffgenes_mt_FPKM$mt/diffgenes_mt_FPKM$luc,2)
mt_up_genes <- diffgenes_mt_FPKM[which(diffgenes_mt_FPKM$fc>1),]
mt_down_genes <- diffgenes_mt_FPKM[which(diffgenes_mt_FPKM$fc< -1),]
mt_genes <- diffgenes_mt_FPKM[which(diffgenes_mt_FPKM$fc>1 | diffgenes_mt_FPKM$fc< -1 ),]
# Genes
cat("up: ", NROW(unique(mt_up_genes$gene_short_name)), "\n")
cat("down: ", NROW(unique(mt_down_genes$gene_short_name)), "\n")
write.table(mt_up_genes$gene_short_name, "tables/mt_up_genes.txt",sep="\t", row.names = FALSE)
write.table(mt_down_genes$gene_short_name, "tables/mt_down_genes.txt",sep="\t", row.names = FALSE)
# VennDiagram of genes
overlap_genes <- euler(list("KD" = kd_genes$gene_short_name,"WT" = wt_genes$gene_short_name, 'MT' = mt_genes$gene_short_name))
jpeg('figures/venndigram_genes_ddx21.jpg')
plot(overlap_genes,counts=T,labels=c("KD","WT","MT"))
dev.off()
"x"
"PHACTR4,RNU6ATAC27P"
"C1orf228"
"PDE4B"
"KCNK2"
"DDX21"
"CALML5"
"AP002348.1,GRIK4"
"PDE3A"
"RPL41"
"ASCL1"
"GFAP"
"CBLN2"
"ZNF804A"
"CXCR4"
"TFF1"
"TNFSF10"
"FAM53A,Y_RNA"
"LEF1"
"DUSP4"
"x"
"PTPRC"
"RP11-495P10.9"
"CHORDC1"
"PPP2R1B"
"LHFP"
"LCP1,RN7SL288P"
"STYX"
"KIF26A"
"SMG1,snoU13"
"AC105030.1,IGF2BP1,RNU6-826P,RP11-501C14.7"
"RP11-509E16.1"
"AC079395.1,FAM178B,RNA5SP101,snoU13"
"POLR2D"
"SDC4"
"RRP1B"
"LDOC1L"
"UBE2E2,Y_RNA"
"PCDHB16"
"SAP30L"
"AC020606.1,FOXP2,MIR3666"
"WDYHV1"
"KLHL9"
"FAM127A"
"x"
"C1orf228"
"PDE4B"
"KCNK2"
"PADI2"
"CALML5"
"ASCL1"
"KRT81"
"SLCO3A1"
"GFAP"
"ZNF804A"
"IGFBP5"
"TFF1"
"RN7SL703P,RNU6-4P,SOX2,SOX2-OT"
"TM4SF1"
"DGKG,ETV5"
"CLDN1"
"FAM53A,Y_RNA"
"NEURL1B"
"MAN1A1"
"IGFBP3"
"NRCAM"
"DUSP4"
"RUNX1T1"
"ENPP2,RP11-99I9.2"
"MAGEA10,MAGEA5,RP11-1007I13.4"
"x"
"AC015691.13,AC087380.14,AC104389.28,HBE1,HBG2,HNRNPA1P53,OR51A10P,OR51AB1P,OR51B2,OR51B4,OR51B5,OR51B8P,OR51I1,OR51K1P,OR52H1,OR52H2P,OR52T1P,OR52V1P,UBQLN3,UBQLNL"
"CHORDC1"
"LHFP"
"LCP1,RN7SL288P"
"STYX"
"KIF26A"
"RP11-77H9.6"
"CNTNAP4,RP11-58C22.1,RP11-96P7.1"
"RAB34"
"CSH2"
"CSH1"
"SMCHD1,Y_RNA,snoU13"
"RP11-509E16.1"
"AC104135.4"
"POLR2D"
"SDC4"
"RRP1B"
"CACNA1I"
"LDOC1L"
"CSTA"
"SPINK2"
"PCDHB8"
"SAP30L"
"GS1-124K5.3"
"WDYHV1"
"LCNL1,PTGDS"
"FAM127A"
"x"
"PHACTR4,RNU6ATAC27P"
"PCSK9"
"PDE4B"
"KCNK2"
"CALML5"
"AP002348.1,GRIK4"
"RCOR2"
"ASCL1"
"RP11-669N7.2"
"KRT81"
"TNFRSF19"
"SLITRK6"
"RGMA"
"SBK1"
"GFAP"
"CTD-2200P10.1,RNU1-108P,RNU1-52P,RNU1-60P,RNU1-85P,TEX14,U3"
"CBLN2"
"B3GNT8"
"ZNF804A"
"AC007271.3"
"CXCR4"
"MIR1258,ZNF385B"
"IGFBP5"
"FAM83D"
"UBE2C"
"MAFB"
"TFF1"
"RN7SL703P,RNU6-4P,SOX2,SOX2-OT"
"TM4SF1"
"TNFSF10"
"FAM53A,Y_RNA"
"LEF1"
"FBXL7"
"NEURL1B"
"FYN"
"MAN1A1"
"AMPH,RN7SL83P"
"IGFBP3"
"CALCR"
"DUSP4"
"VCX3B"
"CXorf67,RP11-348F1.2"
"SH3BGRL"
"FAM133A"
"GABRE"
"x"
"HPX"
"CHORDC1"
"ALDH1L2"
"LHFP"
"LCP1,RN7SL288P"
"STYX"
"CHAC1"
"NDRG4,RNU6-103P"
"SLC7A5"
"ST6GALNAC1"
"POLR2D"
"MIR5001,TIGD1"
"SDC4"
"RRP1B"
"ADM2"
"LDOC1L"
"Y_RNA,ZNF589"
"ILDR1"
"SLC7A11"
"SAP30L"
"ULBP1"
"ASNS"
"WDYHV1"
"PSAT1"
"KLHL9"
"FAM127A"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment