diff --git a/.gitignore b/.gitignore index 74eacbfdda55345c05d3f29b0c55db02b45babc6..09396370d55024e1bbffaac11cf0789212434eb9 100755 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ analysis/* -!.gitkeep .vscode/ WR/ +stress/* *.err *.out *.Rhistory @@ -10,3 +10,4 @@ WR/ *~ temp_png.png Rplots.pdf +!.gitkeep diff --git a/r.scripts/StressCompare.R b/r.scripts/StressCompare.R index b42fe8ae22c12fa6f30c65d36e213f10f6760b69..cf0825ffa39d99b11f165951a01ff68b5b8c736c 100644 --- a/r.scripts/StressCompare.R +++ b/r.scripts/StressCompare.R @@ -6,23 +6,119 @@ library(fBasics) library(pastecs) library(qusage) library(RColorBrewer) +library(VennDiagram) +library(Cairo) -load("./analysis/sc10x.Stress.Rda") +load("../analysis/sc10x.Stress.Rda") sc10x.Stress <- SetAllIdent(object=sc10x.Stress,id="ALL") gene.exp <- as.data.frame(as.matrix(sc10x.Stress@raw.data[,colnames(sc10x.Stress@data)])) -genes.stress <- read_csv("./genesets/DEG_C2.CGP.M10970.txt") -genes.stress <- genes.stress[2:nrow(genes.stress),] -genes.stress <- as.list(genes.stress) -gene.stress.pct <- apply(gene.exp,2,function(x) sum(x[rownames(gene.exp) %in% unlist(genes.stress)])/sum(x)) +genes.stress.go <- read_csv("../genesets/DEG_C2.CGP.M10970.txt") +genes.stress.go <- genes.stress[2:nrow(genes.stress.go),] +genes.stress.go <- as.list(genes.stress.go) -histo <- hist(gene.stress.pct,breaks=100,prob=TRUE,plot=TRUE,main="Distribution of Stress Gene Expression in Transcriptome",xlab="% of transcriptome associated to stress genes") -abline(v=.00575,col="red") +genes.stress.dws <- read_delim("../genesets/genes.deg.Stress.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +genes.stress.dws <- genes.stress.dws[1] +colnames(genes.stress.dws) <- "scDWS.Stress" +genes.stress.dws <- as.list(genes.stress.dws) +genes.stress.vdb <- read_delim("./vanderBrink.Stress.mus.tsv","\t",escape_double=FALSE,trim_ws=TRUE,col_names=FALSE) +colnames(genes.stress.vdb) <- "vdBrink.Stress" +genes.homolog <- read_delim("./Ensemble.mus-hum.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +genes.stress.vdb <- list("vdBrink.Stress"=merge(genes.stress.vdb,genes.homolog[,c(2,4)],by.x="vdBrink.Stress",by.y="Gene name")[,2]) -sc10x.Stress <- SetAllIdent(object=sc10x.Stress,id="ALL+Stress") -cell.index.stress <- names(sc10x.Stress@ident[sc10x.Stress@ident=="Stress"]) -cell.index.stress <- which(colnames(gene.exp) %in% cell.index.stress) -cell.index.ntstress <- names(sc10x.Stress@ident[sc10x.Stress@ident=="ALL"]) -cell.index.ntstress <- which(colnames(gene.exp) %in% cell.index.ntstress) +venn.diagram(list(go=unlist(genes.stress.go),dws=unlist(genes.stress.dws),vdb=unlist(genes.stress.vdb)),"./Venn.tiff") +for (i in c("go","dws","vdb")){ + genes.stress=get(paste0("genes.stress.",i)) + + gene.stress.pct <- apply(gene.exp,2,function(x) sum(x[rownames(gene.exp) %in% unlist(genes.stress)])/sum(x)) + + #histo <- hist(gene.stress.pct,breaks=100,prob=TRUE,plot=TRUE,main="Distribution of Stress Gene Expression in Transcriptome",xlab="% of transcriptome associated to stress genes") + #abline(v=.0575,col="red") + + sc10x.Stress <- SetAllIdent(object=sc10x.Stress,id="ALL+Stress") + cell.index.stress <- names(sc10x.Stress@ident[sc10x.Stress@ident=="Stress"]) + cell.index.stress <- which(colnames(gene.exp) %in% cell.index.stress) + cell.index.ntstress <- names(sc10x.Stress@ident[sc10x.Stress@ident=="ALL"]) + cell.index.ntstress <- which(colnames(gene.exp) %in% cell.index.ntstress) + + histo.stress <- hist(gene.stress.pct[cell.index.stress],breaks=seq(0,ceiling(max(gene.stress.pct)*100)/100,0.005),main="Distribution of Stress Gene Expression in Transcriptome",xlab="% of transcriptome associated to stress genes",plot=FALSE) + #abline(v=.0575,col="red") + histo.ntstress <- hist(gene.stress.pct[cell.index.ntstress],breaks=seq(0,ceiling(max(gene.stress.pct)*100)/100,0.005),main="Distribution of Stress Gene Expression in Transcriptome",xlab="% of transcriptome associated to stress genes",plot=FALSE) + #abline(v=.0575,col="red") + + Cairo(paste0("./Hist.Stress.",i,".png"),width=1000,height=500,bg="white") + plot(histo.ntstress$mids,histo.ntstress$density,col=rgb(0,1,0,1/5),xlim=c(0,ceiling(max(gene.stress.pct)*100)/100),ylim=c(0,ceiling(max(histo.stress$density,histo.ntstress$density)/10)*10),type="h",lend="square",lwd=10,xlab="Fraction of transcriptome associated to stress genes",ylab="Density") + par(new=TRUE) + plot(histo.stress$mids,histo.stress$density,col=rgb(1,0,0,1/5),xlim=c(0,ceiling(max(gene.stress.pct)*100)/100),ylim=c(0,ceiling(max(histo.stress$density,histo.ntstress$density)/10)*10),type="h",lend="square",lwd=10,axes=FALSE,xlab="",ylab="") + abline(v=0.0575,col="red") + text(0.0575+0.01,(ceiling(max(histo.stress$density,histo.ntstress$density)/10)*10)/4*3+2.5,paste0(round(sum(histo.ntstress$counts[histo.ntstress$breaks>0.035][!is.na(histo.ntstress$counts[histo.ntstress$breaks>0.035])])/sum(histo.ntstress$counts[!is.na(histo.ntstress$counts)])*100),"%"),col="green") + text(0.0575-0.01,(ceiling(max(histo.stress$density,histo.ntstress$density)/10)*10)/4*3+2.5,paste0(round(sum(histo.ntstress$counts[histo.ntstress$breaks<=0.035][!is.na(histo.ntstress$counts[histo.ntstress$breaks<=0.035])])/sum(histo.ntstress$counts[!is.na(histo.ntstress$counts)])*100),"%"),col="green") + text(0.0575+0.01,(ceiling(max(histo.stress$density,histo.ntstress$density)/10)*10)/4*3-2.5,paste0(round(sum(histo.stress$counts[histo.stress$breaks>0.035][!is.na(histo.stress$counts[histo.stress$breaks>0.035])])/sum(histo.stress$counts[!is.na(histo.stress$counts)])*100),"%"),col="red") + text(0.0575-0.01,(ceiling(max(histo.stress$density,histo.ntstress$density)/10)*10)/4*3-2.5,paste0(round(sum(histo.stress$counts[histo.stress$breaks<=0.035][!is.na(histo.stress$counts[histo.stress$breaks<=0.035])])/sum(histo.stress$counts[!is.na(histo.stress$counts)])*100),"%"),col="red") + text(0.0575+0.05,(ceiling(max(histo.stress$density,histo.ntstress$density)/10)*10)/4*3+2.5,"Not Stressed",col="green") + text(0.0575+0.05,(ceiling(max(histo.stress$density,histo.ntstress$density)/10)*10)/4*3-2.5,"Stressed",col="red") + dev.off() +} +rm(i) +rm(genes.stress) +rm(gene.stress.pct) +rm(cell.index.stress) +rm(cell.index.ntstress) +rm(histo.stress) +rm(histo.ntstress) + +for (cut in c(0.9,0.95)){ + for (i in c("go","dws","vdb")){ + genes.stress=get(paste0("genes.stress.",i)) + sc10x.Stress <- SetAllIdent(object=sc10x.Stress,id="ALL") + sc10x.Stress.dta <- as.data.frame(as.matrix(sc10x.Stress@data[rownames(sc10x.Stress@scale.data) %in% unlist(genes.stress),])) + + #Run PCA of subsetted stress genes + sc10x.Stress.dta <- t(sc10x.Stress.dta) + sc10x.Stress.dta <- sc10x.Stress.dta[,apply(sc10x.Stress.dta,2,var)!=0] + sc10x.Stress.dta.pca <- prcomp(sc10x.Stress.dta,center=TRUE,scale.=TRUE) + sc10x.Stress.dta.pca <- sc10x.Stress.dta.pca$x[,1:2] + colnames(x=sc10x.Stress.dta.pca) <- paste0("Stress",1:2) + if (skewness(sc10x.Stress.dta.pca[,1])<0){ + sc10x.Stress.dta.pca[,1] <- (-sc10x.Stress.dta.pca[,1]) + } + if (skewness(sc10x.Stress.dta.pca[,2])<0){ + sc10x.Stress.dta.pca[,2] <- (-sc10x.Stress.dta.pca[,2]) + } + sc10x.dta <- sc10x.Stress + sc10x.dta <- SetDimReduction(object=sc10x.dta,reduction.type="Stress",slot="cell.embeddings",new.data=sc10x.Stress.dta.pca) + sc10x.dta <- SetDimReduction(object=sc10x.dta,reduction.type="Stress",slot="key",new.data="Stress") + + #CDS + cdf <- ecdf(GetCellEmbeddings(object=sc10x.dta,reduction.type="Stress",dims.use=1)) + cut.x <- quantile(cdf,probs=cut) + + assign(i,GetCellEmbeddings(object=sc10x.dta,reduction.type="Stress",dims.use=1)) + assign(paste0("cut.",i,".",cut),cut.x) +}} +rm(cut) +rm(i) +rm(genes.stress) +rm(sc10x.Stress.dta) +rm(sc10x.Stress.dta.pca) +rm(sc10x.dta) +rm(cdf) +rm(cut.x) + +comp <- cbind(vdb,go[match(rownames(vdb),rownames(go))],dws[match(rownames(vdb),rownames(dws))]) +colnames(comp) <- c("vdb","go","dws") +comp <- data.frame(comp) + +Cairo(paste0("./Stress.vdb.vs.go.png"),width=1000,height=500,bg="white") +ggplot(comp,aes(x=vdb,y=go))+geom_point(size=0.01)+geom_vline(xintercept=cut.vdb.0.9)+geom_hline(yintercept=cut.go.0.9)+annotate("text",x=max(comp$vdb)/4*3,y=max(comp$go)/4*3,col="red",size=10,fontface=2,label=paste0(round(nrow(comp[comp$vdb>cut.vdb.0.9,][comp[comp$vdb>cut.vdb.0.9,"go"]>cut.go.0.9,])/nrow(comp[comp$vdb>cut.vdb.0.9,])*100,1),"%"))+annotate("text",x=max(comp$vdb)/100,y=max(comp$go)/100,col="green",size=10,fontface=2,label=paste0(round(nrow(comp[comp$vdb<=cut.vdb.0.9,][comp[comp$vdb<=cut.vdb.0.9,"go"]<=cut.go.0.9,])/nrow(comp[comp$vdb<=cut.vdb.0.9,])*100,1),"%")) +dev.off() +Cairo(paste0("./Stress.vdb.vs.dws.png"),width=1000,height=500,bg="white") +ggplot(comp,aes(x=vdb,y=dws))+geom_point(size=0.01)+geom_vline(xintercept=cut.vdb.0.9)+geom_hline(yintercept=cut.dws.0.9)+annotate("text",x=max(comp$vdb)/4*3,y=max(comp$dws)/4*3,col="red",size=10,fontface=2,label=paste0(round(nrow(comp[comp$vdb>cut.vdb.0.9,][comp[comp$vdb>cut.vdb.0.9,"dws"]>cut.dws.0.9,])/nrow(comp[comp$vdb>cut.vdb.0.9,])*100,1),"%"))+annotate("text",x=max(comp$vdb)/100,y=max(comp$dws)/100,col="green",size=10,fontface=2,label=paste0(round(nrow(comp[comp$vdb<=cut.vdb.0.9,][comp[comp$vdb<=cut.vdb.0.9,"dws"]<=cut.dws.0.9,])/nrow(comp[comp$vdb<=cut.vdb.0.9,])*100,1),"%")) +dev.off() +Cairo(paste0("./Stress.go.vs.dws.png"),width=1000,height=500,bg="white") +ggplot(comp,aes(x=go,y=dws))+geom_point(size=0.01)+geom_vline(xintercept=cut.go.0.9)+geom_hline(yintercept=cut.dws.0.9)+annotate("text",x=max(comp$go)/4*3,y=max(comp$dws)/4*3,col="red",size=10,fontface=2,label=paste0(round(nrow(comp[comp$go>cut.go.0.9,][comp[comp$go>cut.go.0.9,"dws"]>cut.dws.0.9,])/nrow(comp[comp$go>cut.go.0.9,])*100,1),"%"))+annotate("text",x=max(comp$go)/100,y=max(comp$dws)/100,col="green",size=10,fontface=2,label=paste0(round(nrow(comp[comp$go<=cut.go.0.9,][comp[comp$go<=cut.go.0.9,"dws"]<=cut.dws.0.9,])/nrow(comp[comp$go<=cut.go.0.9,])*100,1),"%")) +dev.off() + +rm(comp) \ No newline at end of file diff --git a/stress/.gitkeep b/stress/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391