RNA-seq.Rmd 4.15 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
DDX21 RNA-seq Analsysis
=====================================

## Setup and Imports
```{r init}
source("http://bioconductor.org/biocLite.R")
biocLite("cummeRbund")
install.packages("stringr")
install.packages("ggplot2")
install.packages("VennDiagram")
install.packages("reshape")
install.packages("eulerr")
library(cummeRbund)
library(stringr)
library(ggplot2)
library(VennDiagram)
library(reshape)
library(eulerr)
```

## Initialize cuffData.db and establish connection
```{r cuff}
cuff_diff_path="/Volumes/project/GCRB/Lee_Lab/s163035/MCF7_DDX21_inducibile_kd_Daeseok/compare-expression-cufflinks.sh-2.0.0"
gtf_path="/Volumes/project/GCRB/Lee_Lab/s163035/MCF7_DDX21_inducibile_kd_Daeseok/transcripts/merge-transcripts-cufflinks.sh-1.0.0/merged.gtf"
genome_path="/Volumes/project/apps_database/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa"

cuff <- readCufflinks(dir=cuff_diff_path,gtfFile=gtf_path,genome=genome_path)
```

### Significant gene expression differences (FDR <5%)
```{r sigGenes}
alpha<-0.05
sigGeneIDs<-getSig(cuff,alpha=alpha)
sigGenes<-getGenes(cuff,sigGeneIDs)
```

### Get the table of the significant differentially expressed genes
```{r sigTable}
mySigMat<-sigMatrix(cuff,level='genes',alpha=alpha)

jpeg('figures/sig_matrix.jpg')
mySigMat
dev.off()

mySigTable<-getSigTable(cuff,alpha=alpha,level='genes')
mySigFrame <- as.data.frame(mySigTable)
geneNames<-unique(featureNames(sigGenes))
rownames(geneNames)<-geneNames[,1]
mySigFrameGenes<-merge(mySigFrame,geneNames, by=0)
sigFPKMs<-fpkmMatrix(sigGenes)
mysigFPKMs<-merge(sigFPKMs,geneNames, by=0)
```

### Look at the set of differentially expressed genes
```{r diff}

# LUC vs KD
diffgenes_kd<-mySigFrameGenes[which(mySigFrameGenes$lucvskd==1),]
diffgenes_kd_FPKM<-mysigFPKMs[which(mySigFrameGenes$lucvskd==1),]
diffgenes_kd_FPKM$fc <- log(diffgenes_kd_FPKM$kd/diffgenes_kd_FPKM$luc,2)
kd_up_genes <- diffgenes_kd_FPKM[which(diffgenes_kd_FPKM$fc>1),]
kd_down_genes <- diffgenes_kd_FPKM[which(diffgenes_kd_FPKM$fc< -1),]
kd_genes <-  diffgenes_kd_FPKM[which(diffgenes_kd_FPKM$fc>1 | diffgenes_kd_FPKM$fc< -1 ),]

# Genes
cat("up: ",  NROW(unique(kd_up_genes$gene_short_name)), "\n")
cat("down: ", NROW(unique(kd_down_genes$gene_short_name)), "\n")
write.table(kd_up_genes$gene_short_name, "tables/kd_up_genes.txt",sep="\t", row.names = FALSE)
write.table(kd_down_genes$gene_short_name, "tables/kd_down_genes.txt",sep="\t", row.names = FALSE)

# LUC vs WT
diffgenes_wt<-mySigFrameGenes[which(mySigFrameGenes$lucvswt==1),]
diffgenes_wt_FPKM<-mysigFPKMs[which(mySigFrameGenes$lucvswt==1),]
diffgenes_wt_FPKM$fc <- log(diffgenes_wt_FPKM$wt/diffgenes_wt_FPKM$luc,2)
wt_up_genes <- diffgenes_wt_FPKM[which(diffgenes_wt_FPKM$fc>1),]
wt_down_genes <- diffgenes_wt_FPKM[which(diffgenes_wt_FPKM$fc< -1),]
wt_genes <-  diffgenes_wt_FPKM[which(diffgenes_wt_FPKM$fc>1 | diffgenes_wt_FPKM$fc< -1 ),]

# Genes
cat("up: ",  NROW(unique(wt_up_genes$gene_short_name)), "\n")
cat("down: ", NROW(unique(wt_down_genes$gene_short_name)), "\n")
write.table(wt_up_genes$gene_short_name, "tables/wt_up_genes.txt",sep="\t", row.names = FALSE)
write.table(wt_down_genes$gene_short_name, "tables/wt_down_genes.txt",sep="\t", row.names = FALSE)

# LUC vs MT
diffgenes_mt<-mySigFrameGenes[which(mySigFrameGenes$lucvsmt==1),]
diffgenes_mt_FPKM<-mysigFPKMs[which(mySigFrameGenes$lucvsmt==1),]
diffgenes_mt_FPKM$fc <- log(diffgenes_mt_FPKM$mt/diffgenes_mt_FPKM$luc,2)
mt_up_genes <- diffgenes_mt_FPKM[which(diffgenes_mt_FPKM$fc>1),]
mt_down_genes <- diffgenes_mt_FPKM[which(diffgenes_mt_FPKM$fc< -1),]
mt_genes <-  diffgenes_mt_FPKM[which(diffgenes_mt_FPKM$fc>1 | diffgenes_mt_FPKM$fc< -1 ),]

# Genes
cat("up: ",  NROW(unique(mt_up_genes$gene_short_name)), "\n")
cat("down: ", NROW(unique(mt_down_genes$gene_short_name)), "\n")
write.table(mt_up_genes$gene_short_name, "tables/mt_up_genes.txt",sep="\t", row.names = FALSE)
write.table(mt_down_genes$gene_short_name, "tables/mt_down_genes.txt",sep="\t", row.names = FALSE)


# VennDiagram of genes
overlap_genes <- euler(list("KD" = kd_genes$gene_short_name,"WT" = wt_genes$gene_short_name, 'MT' = mt_genes$gene_short_name))
jpeg('figures/venndigram_genes_ddx21.jpg')
plot(overlap_genes,counts=T,labels=c("KD","WT","MT"))
dev.off()