DDX21 RNA-seq Analsysis ===================================== ## Setup and Imports ```{r init} source("http://bioconductor.org/biocLite.R") biocLite("cummeRbund") install.packages("stringr") install.packages("ggplot2") install.packages("VennDiagram") install.packages("reshape") install.packages("eulerr") library(cummeRbund) library(stringr) library(ggplot2) library(VennDiagram) library(reshape) library(eulerr) ``` ## Initialize cuffData.db and establish connection ```{r cuff} cuff_diff_path="/Volumes/project/GCRB/Lee_Lab/s163035/MCF7_DDX21_inducibile_kd_Daeseok/compare-expression-cufflinks.sh-2.0.0" gtf_path="/Volumes/project/GCRB/Lee_Lab/s163035/MCF7_DDX21_inducibile_kd_Daeseok/transcripts/merge-transcripts-cufflinks.sh-1.0.0/merged.gtf" genome_path="/Volumes/project/apps_database/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" cuff <- readCufflinks(dir=cuff_diff_path,gtfFile=gtf_path,genome=genome_path) ``` ### Significant gene expression differences (FDR <5%) ```{r sigGenes} alpha<-0.05 sigGeneIDs<-getSig(cuff,alpha=alpha) sigGenes<-getGenes(cuff,sigGeneIDs) ``` ### Get the table of the significant differentially expressed genes ```{r sigTable} mySigMat<-sigMatrix(cuff,level='genes',alpha=alpha) jpeg('figures/sig_matrix.jpg') mySigMat dev.off() mySigTable<-getSigTable(cuff,alpha=alpha,level='genes') mySigFrame <- as.data.frame(mySigTable) geneNames<-unique(featureNames(sigGenes)) rownames(geneNames)<-geneNames[,1] mySigFrameGenes<-merge(mySigFrame,geneNames, by=0) sigFPKMs<-fpkmMatrix(sigGenes) mysigFPKMs<-merge(sigFPKMs,geneNames, by=0) ``` ### Look at the set of differentially expressed genes ```{r diff} # LUC vs KD diffgenes_kd<-mySigFrameGenes[which(mySigFrameGenes$lucvskd==1),] diffgenes_kd_FPKM<-mysigFPKMs[which(mySigFrameGenes$lucvskd==1),] diffgenes_kd_FPKM$fc <- log(diffgenes_kd_FPKM$kd/diffgenes_kd_FPKM$luc,2) kd_up_genes <- diffgenes_kd_FPKM[which(diffgenes_kd_FPKM$fc>1),] kd_down_genes <- diffgenes_kd_FPKM[which(diffgenes_kd_FPKM$fc< -1),] kd_genes <- diffgenes_kd_FPKM[which(diffgenes_kd_FPKM$fc>1 | diffgenes_kd_FPKM$fc< -1 ),] # Genes cat("up: ", NROW(unique(kd_up_genes$gene_short_name)), "\n") cat("down: ", NROW(unique(kd_down_genes$gene_short_name)), "\n") write.table(kd_up_genes$gene_short_name, "tables/kd_up_genes.txt",sep="\t", row.names = FALSE) write.table(kd_down_genes$gene_short_name, "tables/kd_down_genes.txt",sep="\t", row.names = FALSE) # LUC vs WT diffgenes_wt<-mySigFrameGenes[which(mySigFrameGenes$lucvswt==1),] diffgenes_wt_FPKM<-mysigFPKMs[which(mySigFrameGenes$lucvswt==1),] diffgenes_wt_FPKM$fc <- log(diffgenes_wt_FPKM$wt/diffgenes_wt_FPKM$luc,2) wt_up_genes <- diffgenes_wt_FPKM[which(diffgenes_wt_FPKM$fc>1),] wt_down_genes <- diffgenes_wt_FPKM[which(diffgenes_wt_FPKM$fc< -1),] wt_genes <- diffgenes_wt_FPKM[which(diffgenes_wt_FPKM$fc>1 | diffgenes_wt_FPKM$fc< -1 ),] # Genes cat("up: ", NROW(unique(wt_up_genes$gene_short_name)), "\n") cat("down: ", NROW(unique(wt_down_genes$gene_short_name)), "\n") write.table(wt_up_genes$gene_short_name, "tables/wt_up_genes.txt",sep="\t", row.names = FALSE) write.table(wt_down_genes$gene_short_name, "tables/wt_down_genes.txt",sep="\t", row.names = FALSE) # LUC vs MT diffgenes_mt<-mySigFrameGenes[which(mySigFrameGenes$lucvsmt==1),] diffgenes_mt_FPKM<-mysigFPKMs[which(mySigFrameGenes$lucvsmt==1),] diffgenes_mt_FPKM$fc <- log(diffgenes_mt_FPKM$mt/diffgenes_mt_FPKM$luc,2) mt_up_genes <- diffgenes_mt_FPKM[which(diffgenes_mt_FPKM$fc>1),] mt_down_genes <- diffgenes_mt_FPKM[which(diffgenes_mt_FPKM$fc< -1),] mt_genes <- diffgenes_mt_FPKM[which(diffgenes_mt_FPKM$fc>1 | diffgenes_mt_FPKM$fc< -1 ),] # Genes cat("up: ", NROW(unique(mt_up_genes$gene_short_name)), "\n") cat("down: ", NROW(unique(mt_down_genes$gene_short_name)), "\n") write.table(mt_up_genes$gene_short_name, "tables/mt_up_genes.txt",sep="\t", row.names = FALSE) write.table(mt_down_genes$gene_short_name, "tables/mt_down_genes.txt",sep="\t", row.names = FALSE) # VennDiagram of genes overlap_genes <- euler(list("KD" = kd_genes$gene_short_name,"WT" = wt_genes$gene_short_name, 'MT' = mt_genes$gene_short_name)) jpeg('figures/venndigram_genes_ddx21.jpg') plot(overlap_genes,counts=T,labels=c("KD","WT","MT")) dev.off()