diff --git a/genesets/40425_2017_215_MOESM1_ESM.xlsx b/genesets/40425_2017_215_MOESM1_ESM.xlsx new file mode 100755 index 0000000000000000000000000000000000000000..5a813a3b5b13a22e578c267e8269b26ab51aca94 Binary files /dev/null and b/genesets/40425_2017_215_MOESM1_ESM.xlsx differ diff --git a/r.scripts/sc-TissueMapper.R b/r.scripts/sc-TissueMapper.R index 24d1548724bffda0afa7c350e49b8e66d512f316..5d140421d36cb0d62711788a8edf0923384207d1 100644 --- a/r.scripts/sc-TissueMapper.R +++ b/r.scripts/sc-TissueMapper.R @@ -1257,6 +1257,51 @@ sctSNECustCol <- function(sc10x,i,bl,rd,file){ dev.off() } +sctSNE3CustCol <- function(sc10x,i,bl,rd,gn,file){ + if (gn!=""){ + gn.col <- NULL + if (length(sc10x@ident[sc10x@ident=="B-cells"])>0){ + gn.col <- c(gn.col,"#BAE4B3") + } + if (length(sc10x@ident[sc10x@ident=="T-cells"])>0){ + gn.col <- c(gn.col,"#74C476") + } + if (length(sc10x@ident[sc10x@ident=="Macrophages"])>0){ + gn.col <- c(gn.col,"#31A354") + } + if (length(sc10x@ident[sc10x@ident=="Mast cells"])>0){ + gn.col <- c(gn.col,"#006D2C") + } + } + sc10x <- SetAllIdent(object=sc10x,id=i) + if (any(sc10x@ident == "Unknown")){ + sc10x@ident <- factor(sc10x@ident,levels=c(bl,rd,gn)) + } else { + sc10x@ident <- factor(sc10x@ident,levels=c(bl,rd,gn,"Unknown")) + } + postscript(paste0("./analysis/diy/tSNE_",file,".",i,".CustCol.eps")) + if (length(bl)==1 & length(rd)==1 & length(gn)==1){ + if (length(sc10x@ident[sc10x@ident=="Unknown"])>0){ + plot <- TSNEPlot(object=sc10x,pt.size=2.5,do.label=FALSE,label.size=10,do.return=TRUE,vector.friendly=FALSE,colors.use=c(brewer.pal(5,"Blues")[5],brewer.pal(5,"Reds")[5],brewer.pal(5,"Greens")[5],"grey50")) + } else { + plot <- TSNEPlot(object=sc10x,pt.size=2.5,do.label=FALSE,label.size=10,do.return=TRUE,vector.friendly=FALSE,colors.use=c(brewer.pal(5,"Blues")[5],brewer.pal(5,"Reds")[5],brewer.pal(5,"Greens")[5])) + } + plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) + plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) + plot(plot) + } else { + if (bl!=""){ + plot <- TSNEPlot(object=sc10x,pt.size=2.5,do.label=FALSE,label.size=10,do.return=TRUE,vector.friendly=FALSE,colors.use=c(brewer.pal((length(bl)+1),"Blues")[2:(length(bl)+1)],brewer.pal((length(rd)+1),"Reds")[2:(length(rd)+1)],gn.col)) + } else { + plot <- TSNEPlot(object=sc10x,pt.size=2.5,do.label=FALSE,label.size=10,do.return=TRUE,vector.friendly=FALSE,colors.use=brewer.pal((length(rd)+1),"Reds")[2:(length(rd)+1)]) + } + plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) + plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) + plot(plot) + } + dev.off() +} + sctSNEbwCol <- function(sc10x,i,file,files){ sc10x <- SetAllIdent(object=sc10x,id=i) sc10x@ident <- factor(sc10x@ident,levels=c("Epi","BE","LE","OE1","OE_SCGB","OE2","OE_KRT13","St","Fib","SM","Endo","Leu","Unknown",1:50)) diff --git a/r.scripts/sc-TissueMapper_RUN.PdPbPc.R b/r.scripts/sc-TissueMapper_RUN.PdPbPc.R new file mode 100755 index 0000000000000000000000000000000000000000..43fabef62b04029db1cbc74f1d9c32fe53d9974c --- /dev/null +++ b/r.scripts/sc-TissueMapper_RUN.PdPbPc.R @@ -0,0 +1,342 @@ +gc() +library(methods) +library(optparse) +library(Seurat) +library(readr) +library(fBasics) +library(pastecs) +library(qusage) +library(RColorBrewer) +library(monocle) +library(dplyr) +library(viridis) +library(readxl) + +source("../r.scripts/sc-TissueMapper.R") + +#Create folder structure +setwd("../") +if (!dir.exists("./analysis")){ + dir.create("./analysis") +} +if (!dir.exists("./analysis/qc")){ + dir.create("./analysis/qc") +} +if (!dir.exists("./analysis/qc/cc")){ + dir.create("./analysis/qc/cc") +} +if (!dir.exists("./analysis/tSNE")){ + dir.create("./analysis/tSNE") +} +if (!dir.exists("./analysis/tSNE/pre.stress")){ + dir.create("./analysis/tSNE/pre.stress") +} +if (!dir.exists("./analysis/pca")){ + dir.create("./analysis/pca") +} +if (!dir.exists("./analysis/pca/stress")){ + dir.create("./analysis/pca/stress") +} +if (!dir.exists("./analysis/violin")){ + dir.create("./analysis/violin") +} +if (!dir.exists("./analysis/violin/stress")){ + dir.create("./analysis/violin/stress") +} +if (!dir.exists("./analysis/table")){ + dir.create("./analysis/table") +} +if (!dir.exists("./analysis/tSNE/post.stress")){ + dir.create("./analysis/tSNE/post.stress") +} +if (!dir.exists("./analysis/cor")){ + dir.create("./analysis/cor") +} +if (!dir.exists("./analysis/tSNE/lin")){ + dir.create("./analysis/tSNE/lin") +} +if (!dir.exists("./analysis/tSNE/epi")){ + dir.create("./analysis/tSNE/epi") +} +if (!dir.exists("./analysis/tSNE/st")){ + dir.create("./analysis/tSNE/st") +} +if (!dir.exists("./analysis/tSNE/leu")){ + dir.create("./analysis/tSNE/leu") +} +if (!dir.exists("./analysis/tSNE/merge")){ + dir.create("./analysis/tSNE/merge") +} +if (!dir.exists("./analysis/pca/ne")){ + dir.create("./analysis/pca/ne") +} +if (!dir.exists("./analysis/tSNE/ne")){ + dir.create("./analysis/tSNE/ne") +} +if (!dir.exists("./analysis/violin/ne")){ + dir.create("./analysis/violin/ne") +} +if (!dir.exists("./analysis/tSNE/FINAL")){ + dir.create("./analysis/tSNE/FINAL") +} +if (!dir.exists("./analysis/deg")){ + dir.create("./analysis/deg") +} +if (!dir.exists("./analysis/cca")){ + dir.create("./analysis/cca") +} +if (!dir.exists("./analysis/diy")){ + dir.create("./analysis/diy") +} +if (!dir.exists("./analysis/pseudotime")){ + dir.create("./analysis/pseudotime") +} + +#Retrieve command-line options +option_list=list( + make_option("--p",action="store",default="DPrF",type='character',help="Project Name"), + make_option("--g",action="store",default="ALL",type='character',help="Group To analyze"), + make_option("--lg",action="store",default=500,type='integer',help="Threshold for cells with minimum genes"), + make_option("--hg",action="store",default=3000,type='integer',help="Threshold for cells with maximum genes"), + make_option("--lm",action="store",default=0,type='numeric',help="Threshold for cells with minimum %mito genes"), + make_option("--hm",action="store",default=0.1,type='numeric',help="Threshold for cells with maximum %mito genes"), + make_option("--lx",action="store",default=0.2,type='numeric',help="x low threshold for hvg selection"), + make_option("--hx",action="store",default=5,type='numeric',help="x high threshold for hvg selection"), + make_option("--ly",action="store",default=1,type='numeric',help="y low threshold for hvg selection"), + make_option("--cc",action="store",default=TRUE,type='logical',help="Scale cell cycle?"), + make_option("--cca",action="store",default=50,type='integer',help="Number of CCAs to cacluate"), + make_option("--acca",action="store",default=30,type='integer',help="Number of CCAs to align"), + make_option("--pc",action="store",default=50,type='integer',help="Number of PCs to cacluate"), + make_option("--res.prestress",action="store",default=1,type='numeric',help="Resolution to cluster, pre-stress"), + make_option("--st",action="store",default=TRUE,type='logical',help="Remove stressed cells?"), + make_option("--stg",action="store",default="dws",type='character',help="Geneset to use for stress ID"), + make_option("--cut.stress",action="store",default=0.9,type='numeric',help="Cutoff for stress score"), + make_option("--res.poststress",action="store",default=0.5,type='numeric',help="Resolution to cluster, post-stress"), + make_option("--cut.ne",action="store",default=0.999,type='numeric',help="Cutoff for NE score") +) +opt=parse_args(OptionParser(option_list=option_list)) +rm(option_list) +if (opt$lm==0){opt$lm=-Inf} + +load("./analysis/sc10x.Rda") + +for (i in c("Pb","Pc","Pd")){ + if (!dir.exists(paste0("./analysis/tSNE/post.stress/",i))){ + dir.create(paste0("./analysis/tSNE/post.stress/",i)) + } + if (!dir.exists(paste0("./analysis/tSNE/lin/",i))){ + dir.create(paste0("./analysis/tSNE/lin/",i)) + } + if (!dir.exists(paste0("./analysis/tSNE/epi/",i))){ + dir.create(paste0("./analysis/tSNE/epi/",i)) + } + if (!dir.exists(paste0("./analysis/tSNE/st/",i))){ + dir.create(paste0("./analysis/tSNE/st/",i)) + } + if (!dir.exists(paste0("./analysis/tSNE/leu/",i))){ + dir.create(paste0("./analysis/tSNE/leu/",i)) + } + + sc10x <- get(paste0("sc10x.",i)) + + sc10x <- scCluster(sc10x,pc.use=opt$acca,res.use=0.5,folder=paste0("post.stress/",i),red="cca.aligned") + + gene.set1 <- read_delim("./genesets/DEG_Epi_5FC.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=FALSE) + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "Epi" + gene.set <- c(gene.set1) + gene.set1 <- read_delim("./genesets/DEG_FMSt_5FC.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=FALSE) + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "St" + gene.set <- c(gene.set,gene.set1) + rm(gene.set1) + gc() + min.all <- min(table(sc10x@meta.data[,paste0("res",0.5)])) + results <- scQuSAGE(sc10x,gs=gene.set,res.use=opt$res.poststress,ds=min.all,nm="Lin",folder=paste0("lin/",i)) + sc10x <- results[[1]] + results.cor.Lin <- results[[2]] + results.clust.Lin.id <- results[[3]] + rm(results) + rm(gene.set) + + sc10x <- SetAllIdent(object=sc10x,id="Lin") + sc10x.Epi <- scSubset(sc10x,i="Lin",g="Epi") + if (any(levels(sc10x@ident)=="Unknown")){ + sc10x.St <- scSubset(sc10x,i="Lin",g=c("St","Unknown")) + } else { + sc10x.St <- scSubset(sc10x,i="Lin",g="St") + } + + sc10x.Epi <- scCluster(sc10x.Epi,pc.use=opt$acca,res.use=opt$res.poststress,folder=paste0("epi/",i),red="cca.aligned") + + #sc10x.Epi <- SetAllIdent(object=sc10x.Epi,id=paste0("res",opt$res.poststress)) + #sc10x.Epi <- BuildClusterTree(sc10x.Epi,do.reorder=TRUE,reorder.numeric=TRUE,do.plot=FALSE) + sc10x.Epi <- StashIdent(object=sc10x.Epi,save.name=paste0("res",opt$res.poststress)) + + sc10x.Epi <- RunTSNE(object=sc10x.Epi,reduction.use="cca.aligned",dims.use=1:opt$acca,do.fast=TRUE) + postscript(paste0("./analysis/tSNE/epi/",i,"/tSNE_Sample.eps")) + plot <- TSNEPlot(object=sc10x.Epi,group.by="samples",pt.size=2.5,do.return=TRUE,vector.friendly=FALSE) + plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) + plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) + plot(plot) + dev.off() + postscript(paste0("./analysis/tSNE/epi/",i,"/tSNE_res",opt$res.poststress,".eps")) + plot <- TSNEPlot(object=sc10x.Epi,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE,vector.friendly=FALSE) + plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) + plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) + plot(plot) + dev.off() + rm(plot) + + gene.set1 <- read_delim("./genesets/genes.deg.BE.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) + gene.set1 <- gene.set1[1] + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "BE" + gene.set <- c(gene.set1) + gene.set1 <- read_delim("./genesets/genes.deg.LE.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) + gene.set1 <- gene.set1[1] + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "LE" + gene.set <- c(gene.set,gene.set1) + gene.set1 <- read_delim("./genesets/genes.deg.OE1.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) + gene.set1 <- gene.set1[1] + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "Club" + gene.set <- c(gene.set,gene.set1) + gene.set1 <- read_delim("./genesets/genes.deg.OE2.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) + gene.set1 <- gene.set1[1] + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "Hillock" + gene.set <- c(gene.set,gene.set1) + rm(gene.set1) + gc() + min.epi <- min(table(sc10x.Epi@meta.data[,paste0("res",opt$res.poststress)])) + results <- scQuSAGE(sc10x.Epi,gs=gene.set,res.use=opt$res.poststress,ds=min.epi,nm="Epi.dws.sc",folder=paste0("epi/",i)) + sc10x.Epi <- results[[1]] + results.cor.Epi <- results[[2]] + results.clust.Epi.id <- results[[3]] + rm(results) + rm(gene.set) + + + sc10x.St <- scCluster(sc10x.St,pc.use=opt$acca,res.use=opt$res.poststress,folder=paste0("st/",i),red="cca.aligned") + + #sc10x.St <- SetAllIdent(object=sc10x.St,id=paste0("res",opt$res.poststress)) + #sc10x.St <- BuildClusterTree(sc10x.St,do.reorder=TRUE,reorder.numeric=TRUE,do.plot=FALSE) + sc10x.St <- StashIdent(object=sc10x.St,save.name=paste0("res",opt$res.poststress)) + + sc10x.St <- RunTSNE(object=sc10x.St,reduction.use="cca.aligned",dims.use=1:opt$acca,do.fast=TRUE) + postscript(paste0("./analysis/tSNE/st/",i,"/tSNE_Sample.eps")) + plot <- TSNEPlot(object=sc10x.St,group.by="samples",pt.size=2.5,do.return=TRUE,vector.friendly=FALSE) + plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) + plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) + plot(plot) + dev.off() + postscript(paste0("./analysis/tSNE/st/",i,"/tSNE_res",opt$res.poststress,".eps")) + plot <- TSNEPlot(object=sc10x.St,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE,vector.friendly=FALSE) + plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) + plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) + plot(plot) + dev.off() + rm(plot) + + gene.set1 <- read_delim("./genesets/genes.deg.Endo.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) + gene.set1 <- gene.set1[1] + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "Endo" + gene.set <- c(gene.set1) + gene.set1 <- read_delim("./genesets/genes.deg.SM.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) + gene.set1 <- gene.set1[1] + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "SM" + gene.set <- c(gene.set,gene.set1) + gene.set1 <- read_delim("./genesets/genes.deg.Fib.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) + gene.set1 <- gene.set1[1] + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "Fib" + gene.set <- c(gene.set,gene.set1) + + genes.leu <- read_excel("genesets/40425_2017_215_MOESM1_ESM.xlsx",sheet="S3. Candidate markers") + leu <- as.list(unique(genes.leu[,2]))$Cell + leu <- leu[-c(1,3,4,7:9,14,15,17:18,20:21,23:30)] + genes.leu <- genes.leu[genes.leu$Selected==1,] + genes.leu <- genes.leu[unlist(genes.leu[,2]) %in% unlist(leu),] + gene.set1 <- split(genes.leu[,1], genes.leu[,2]) + gene.set1 <- lapply(gene.set1,unname) + gene.set1 <- lapply(gene.set1,unlist) + gene.set <- c(gene.set,gene.set1) + + rm(gene.set1) + gc() + min.st <- min(table(sc10x.St@meta.data[,paste0("res",opt$res.poststress)])) + results <- scQuSAGE(sc10x.St,gs=gene.set,res.use=opt$res.poststress,ds=min.st,nm="St.dws.sc",folder=paste0("st/",i)) + sc10x.St <- results[[1]] + results.cor.St <- results[[2]] + results.clust.St.id <- results[[3]] + rm(results) + rm(gene.set) + + sc10x.St <- SetAllIdent(object=sc10x.St,id="St.dws.sc") + sc10x.Leu <- scSubset(sc10x.St,i="St.dws.sc",g=leu[leu %in% unique(sc10x.St@ident)]) + + sc10x.Leu <- RunTSNE(object=sc10x.Leu,reduction.use="cca.aligned",dims.use=1:opt$acca,do.fast=TRUE) + postscript(paste0("./analysis/tSNE/leu/",i,"/tSNE_Sample.eps")) + plot <- TSNEPlot(object=sc10x.Leu,group.by="samples",pt.size=2.5,do.return=TRUE,vector.friendly=FALSE) + plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) + plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) + plot(plot) + dev.off() + postscript(paste0("./analysis/tSNE/leu/",i,"/tSNE_LeuLin.eps")) + plot <- TSNEPlot(object=sc10x.Leu,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE,vector.friendly=FALSE) + plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) + plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) + plot(plot) + dev.off() + rm(plot) + + sc10x <- scMerge(sc10x,sc10x.Epi,sc10x.St,i.1="Epi.dws.sc",i.2="St.dws.sc",nm="Merge") + + sc10x@ident <- factor(sc10x@ident,levels=c("BE","LE","Hillock","Club","Fib","SM","Endo","B-cells","T-cells","Macrophages","Mast cells")) + sctSNE3CustCol(sc10x,i="Merge",bl=c("BE","LE","Hillock","Club"),rd=c("Fib","SM","Endo"),gn=c("B-cells","T-cells","Macrophages","Mast cells"),file=i) + + if (i=="Pd"){ + for (j in list(c("D17PrPzF_Via","D27PrPzF_Via","D35PrPzF_Via"),c("D17PrTzF_Via","D27PrTzF_Via","D35PrTzF_Via"))){ + sc10x.sub <- scSubset(sc10x,"samples",j) + sc10x.sub <- SetAllIdent(object=sc10x.sub,id="Merge") + sc10x.sub@ident <- factor(sc10x.sub@ident,levels=c("BE","LE","Hillock","Club","Fib","SM","Endo","B-cells","T-cells","Macrophages","Mast cells")) + sctSNE3CustCol(sc10x.sub,i="Merge",bl=c("BE","LE","Hillock","Club"),rd=c("Fib","SM","Endo"),gn=c("B-cells","T-cells","Macrophages","Mast cells"),file=paste0(i,".",substr(j[1],6,7))) + } + rm(sc10x.sub) + } + + write.table(table(sc10x@meta.data[,"samples"],sc10x@meta.data[,"Merge"]),file=paste0("./analysis/table/Table_samples_Merge",i,".csv"),row.names=TRUE,col.names=NA,append=FALSE,sep=",") + write.table(round(prop.table(table(sc10x@meta.data[,"samples"],sc10x@meta.data[,"Merge"]),1)*100,1),file=paste0("./analysis/table/ProbTable_samples_Merge",i,".csv"),row.names=TRUE,col.names=NA,append=FALSE,sep=",") + + assign(paste0("sc10x.",i,".analized"),sc10x) + assign(paste0("sc10x.Epi.",i,".analized"),sc10x.Epi) + assign(paste0("sc10x.St.",i,".analized"),sc10x.St) + assign(paste0("sc10x.Leu.",i,".analized"),sc10x.Leu) + assign(paste0("results.cor.Lin.",i),results.cor.Lin) + assign(paste0("results.clust.Lin.id.",i),results.clust.Lin.id) + assign(paste0("results.cor.Epi.",i),results.cor.Epi) + assign(paste0("results.clust.Epi.id.",i),results.clust.Epi.id) + assign(paste0("results.cor.St.",i),results.cor.St) + assign(paste0("results.clust.St.id.",i),results.clust.St.id) + rm(sc10x.sub) + rm(sc10x) + rm(sc10x.Epi) + rm(sc10x.St) + rm(sc10x.Leu) + rm(results.cor.Lin) + rm(results.cor.Epi) + rm(results.cor.St) + rm(results.clust.Lin.id) + rm(results.clust.Epi.id) + rm(results.clust.St.id) + rm(min.all) + rm(min.epi) + rm(min.st) + rm(i) +} +save.image(file="./analysis/sc10x.analized.RData") +