From e62a7dde0a45119200a4666aadae17e56d54b4b9 Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Fri, 7 Dec 2018 15:10:49 -0600 Subject: [PATCH] Get PdPbPc Lymphoid/Myeloid analyis stable... need to get better genelists --- r.scripts/sc-TissueMapper_RUN.PdPbPc.R | 490 --------------------- r.scripts/sc-TissueMapper_RUN.PdPbPc.cca.R | 277 ++++++++++++ 2 files changed, 277 insertions(+), 490 deletions(-) delete mode 100755 r.scripts/sc-TissueMapper_RUN.PdPbPc.R create mode 100755 r.scripts/sc-TissueMapper_RUN.PdPbPc.cca.R diff --git a/r.scripts/sc-TissueMapper_RUN.PdPbPc.R b/r.scripts/sc-TissueMapper_RUN.PdPbPc.R deleted file mode 100755 index aeb3cfb..0000000 --- a/r.scripts/sc-TissueMapper_RUN.PdPbPc.R +++ /dev/null @@ -1,490 +0,0 @@ -gc() -library(methods) -library(optparse) -library(Seurat) -library(readr) -library(fBasics) -library(pastecs) -library(qusage) -library(RColorBrewer) -library(monocle) -library(dplyr) -library(viridis) -library(readxl) - -source("../r.scripts/sc-TissueMapper.R") - -#Create folder structure -setwd("../") -if (!dir.exists("./analysis")){ - dir.create("./analysis") -} -if (!dir.exists("./analysis/qc")){ - dir.create("./analysis/qc") -} -if (!dir.exists("./analysis/qc/cc")){ - dir.create("./analysis/qc/cc") -} -if (!dir.exists("./analysis/tSNE")){ - dir.create("./analysis/tSNE") -} -if (!dir.exists("./analysis/tSNE/pre.stress")){ - dir.create("./analysis/tSNE/pre.stress") -} -if (!dir.exists("./analysis/pca")){ - dir.create("./analysis/pca") -} -if (!dir.exists("./analysis/pca/stress")){ - dir.create("./analysis/pca/stress") -} -if (!dir.exists("./analysis/violin")){ - dir.create("./analysis/violin") -} -if (!dir.exists("./analysis/violin/stress")){ - dir.create("./analysis/violin/stress") -} -if (!dir.exists("./analysis/table")){ - dir.create("./analysis/table") -} -if (!dir.exists("./analysis/tSNE/post.stress")){ - dir.create("./analysis/tSNE/post.stress") -} -if (!dir.exists("./analysis/cor")){ - dir.create("./analysis/cor") -} -if (!dir.exists("./analysis/tSNE/lin")){ - dir.create("./analysis/tSNE/lin") -} -if (!dir.exists("./analysis/tSNE/epi")){ - dir.create("./analysis/tSNE/epi") -} -if (!dir.exists("./analysis/tSNE/n-epi")){ - dir.create("./analysis/tSNE/n-epi") -} -if (!dir.exists("./analysis/tSNE/st")){ - dir.create("./analysis/tSNE/st") -} -if (!dir.exists("./analysis/tSNE/leu")){ - dir.create("./analysis/tSNE/leu") -} -if (!dir.exists("./analysis/tSNE/t")){ - dir.create("./analysis/tSNE/t") -} -if (!dir.exists("./analysis/tSNE/merge")){ - dir.create("./analysis/tSNE/merge") -} -if (!dir.exists("./analysis/pca/ne")){ - dir.create("./analysis/pca/ne") -} -if (!dir.exists("./analysis/tSNE/ne")){ - dir.create("./analysis/tSNE/ne") -} -if (!dir.exists("./analysis/violin/ne")){ - dir.create("./analysis/violin/ne") -} -if (!dir.exists("./analysis/tSNE/FINAL")){ - dir.create("./analysis/tSNE/FINAL") -} -if (!dir.exists("./analysis/deg")){ - dir.create("./analysis/deg") -} -if (!dir.exists("./analysis/cca")){ - dir.create("./analysis/cca") -} -if (!dir.exists("./analysis/diy")){ - dir.create("./analysis/diy") -} -if (!dir.exists("./analysis/pseudotime")){ - dir.create("./analysis/pseudotime") -} - -#Retrieve command-line options -option_list=list( - make_option("--p",action="store",default="DPrF",type='character',help="Project Name"), - make_option("--g",action="store",default="ALL",type='character',help="Group To analyze"), - make_option("--lg",action="store",default=500,type='integer',help="Threshold for cells with minimum genes"), - make_option("--hg",action="store",default=3000,type='integer',help="Threshold for cells with maximum genes"), - make_option("--lm",action="store",default=0,type='numeric',help="Threshold for cells with minimum %mito genes"), - make_option("--hm",action="store",default=0.1,type='numeric',help="Threshold for cells with maximum %mito genes"), - make_option("--lx",action="store",default=0.2,type='numeric',help="x low threshold for hvg selection"), - make_option("--hx",action="store",default=5,type='numeric',help="x high threshold for hvg selection"), - make_option("--ly",action="store",default=1,type='numeric',help="y low threshold for hvg selection"), - make_option("--cc",action="store",default=TRUE,type='logical',help="Scale cell cycle?"), - make_option("--cca",action="store",default=50,type='integer',help="Number of CCAs to cacluate"), - make_option("--acca",action="store",default=30,type='integer',help="Number of CCAs to align"), - make_option("--pc",action="store",default=50,type='integer',help="Number of PCs to cacluate"), - make_option("--res.prestress",action="store",default=1,type='numeric',help="Resolution to cluster, pre-stress"), - make_option("--st",action="store",default=TRUE,type='logical',help="Remove stressed cells?"), - make_option("--stg",action="store",default="dws",type='character',help="Geneset to use for stress ID"), - make_option("--cut.stress",action="store",default=0.9,type='numeric',help="Cutoff for stress score"), - make_option("--res.poststress",action="store",default=0.75,type='numeric',help="Resolution to cluster, post-stress"), - make_option("--cut.ne",action="store",default=0.999,type='numeric',help="Cutoff for NE score") -) -opt=parse_args(OptionParser(option_list=option_list)) -rm(option_list) -if (opt$lm==0){opt$lm=-Inf} - -load("./analysis/sc10x.Pd.Rda") -load("./analysis/sc10x.Pb.Rda") -load("./analysis/sc10x.Pc.Rda") - -for (i in c("Pb","Pc","Pd")){ - if (!dir.exists(paste0("./analysis/tSNE/post.stress/",i))){ - dir.create(paste0("./analysis/tSNE/post.stress/",i)) - } - if (!dir.exists(paste0("./analysis/tSNE/lin/",i))){ - dir.create(paste0("./analysis/tSNE/lin/",i)) - } - if (!dir.exists(paste0("./analysis/tSNE/epi/",i))){ - dir.create(paste0("./analysis/tSNE/epi/",i)) - } - if (!dir.exists(paste0("./analysis/tSNE/n-epi/",i))){ - dir.create(paste0("./analysis/tSNE/n-epi/",i)) - } - if (!dir.exists(paste0("./analysis/tSNE/st/",i))){ - dir.create(paste0("./analysis/tSNE/st/",i)) - } - if (!dir.exists(paste0("./analysis/tSNE/leu/",i))){ - dir.create(paste0("./analysis/tSNE/leu/",i)) - } - if (!dir.exists(paste0("./analysis/tSNE/t/",i))){ - dir.create(paste0("./analysis/tSNE/t/",i)) - } - - sc10x <- get(paste0("sc10x.",i)) - - sc10x <- scCluster(sc10x,pc.use=opt$acca,res.use=opt$res.poststress,folder=paste0("post.stress/",i),red="cca.aligned") - - gene.set1 <- read_delim("./genesets/DEG_Epi_5FC.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=FALSE) - gene.set1 <- as.list(gene.set1) - names(gene.set1) <- "Epi" - gene.set <- c(gene.set1) - gene.set1 <- read_delim("./genesets/DEG_FMSt_5FC.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=FALSE) - gene.set1 <- as.list(gene.set1) - names(gene.set1) <- "St" - gene.set <- c(gene.set,gene.set1) - rm(gene.set1) - gc() - min.all <- min(table(sc10x@meta.data[,paste0("res",opt$res.poststress)])) - results <- scQuSAGE(sc10x,gs=gene.set,res.use=opt$res.poststress,ds=min.all,nm="Lin",folder=paste0("lin/",i)) - sc10x <- results[[1]] - results.cor.Lin <- results[[2]] - results.clust.Lin.id <- results[[3]] - rm(results) - rm(gene.set) - - sc10x <- SetAllIdent(object=sc10x,id="Lin") - sc10x.Epi <- scSubset(sc10x,i="Lin",g="Epi") - if (any(levels(sc10x@ident)=="Unknown")){ - sc10x.nEpi <- scSubset(sc10x,i="Lin",g=c("St","Unknown")) - } else { - sc10x.nEpi <- scSubset(sc10x,i="Lin",g="St") - } - - sc10x.Epi <- scCluster(sc10x.Epi,pc.use=opt$acca,res.use=opt$res.poststress,folder=paste0("epi/",i),red="cca.aligned") - sc10x.Epi <- StashIdent(object=sc10x.Epi,save.name=paste0("res",opt$res.poststress)) - - sc10x.Epi <- RunTSNE(object=sc10x.Epi,reduction.use="cca.aligned",dims.use=1:opt$acca,do.fast=TRUE) - postscript(paste0("./analysis/tSNE/epi/",i,"/tSNE_Sample.eps")) - plot <- TSNEPlot(object=sc10x.Epi,group.by="samples",pt.size=2.5,do.return=TRUE,vector.friendly=FALSE) - plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) - plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) - plot(plot) - dev.off() - postscript(paste0("./analysis/tSNE/epi/",i,"/tSNE_res",opt$res.poststress,".eps")) - plot <- TSNEPlot(object=sc10x.Epi,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE,vector.friendly=FALSE) - plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) - plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) - plot(plot) - dev.off() - rm(plot) - - gene.set1 <- read_delim("./genesets/genes.deg.BE.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) - gene.set1 <- gene.set1[1] - gene.set1 <- as.list(gene.set1) - names(gene.set1) <- "BE" - gene.set <- c(gene.set1) - gene.set1 <- read_delim("./genesets/genes.deg.LE.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) - gene.set1 <- gene.set1[1] - gene.set1 <- as.list(gene.set1) - names(gene.set1) <- "LE" - gene.set <- c(gene.set,gene.set1) - gene.set1 <- read_delim("./genesets/genes.deg.OE1.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) - gene.set1 <- gene.set1[1] - gene.set1 <- as.list(gene.set1) - names(gene.set1) <- "Club" - gene.set <- c(gene.set,gene.set1) - gene.set1 <- read_delim("./genesets/genes.deg.OE2.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) - gene.set1 <- gene.set1[1] - gene.set1 <- as.list(gene.set1) - names(gene.set1) <- "Hillock" - gene.set <- c(gene.set,gene.set1) - rm(gene.set1) - gc() - min.epi <- min(table(sc10x.Epi@meta.data[,paste0("res",opt$res.poststress)])) - results <- scQuSAGE(sc10x.Epi,gs=gene.set,res.use=opt$res.poststress,ds=min.epi,nm="Epi.dws.sc",folder=paste0("epi/",i)) - sc10x.Epi <- results[[1]] - results.cor.Epi <- results[[2]] - results.clust.Epi.id <- results[[3]] - rm(results) - rm(gene.set) - - sc10x.nEpi <- scCluster(sc10x.nEpi,pc.use=opt$acca,res.use=opt$res.poststress,folder=paste0("n-epi/",i),red="cca.aligned") - sc10x.nEpi <- StashIdent(object=sc10x.nEpi,save.name=paste0("res",opt$res.poststress)) - - sc10x.nEpi <- RunTSNE(object=sc10x.nEpi,reduction.use="cca.aligned",dims.use=1:opt$acca,do.fast=TRUE) - postscript(paste0("./analysis/tSNE/n-epi/",i,"/tSNE_Sample.eps")) - plot <- TSNEPlot(object=sc10x.nEpi,group.by="samples",pt.size=2.5,do.return=TRUE,vector.friendly=FALSE) - plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) - plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) - plot(plot) - dev.off() - postscript(paste0("./analysis/tSNE/n-epi/",i,"/tSNE_res",opt$res.poststress,".eps")) - plot <- TSNEPlot(object=sc10x.nEpi,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE,vector.friendly=FALSE) - plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) - plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) - plot(plot) - dev.off() - rm(plot) - - gene.set1 <- read_delim("./genesets/genes.deg.Endo.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) - gene.set1 <- gene.set1[1] - gene.set1 <- as.list(gene.set1) - names(gene.set1) <- "Endo" - gene.set <- c(gene.set1) - gene.set1 <- read_delim("./genesets/genes.deg.SM.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) - gene.set1 <- gene.set1[1] - gene.set1 <- as.list(gene.set1) - names(gene.set1) <- "SM" - gene.set <- c(gene.set,gene.set1) - gene.set1 <- read_delim("./genesets/genes.deg.Fib.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) - gene.set1 <- gene.set1[1] - gene.set1 <- as.list(gene.set1) - names(gene.set1) <- "Fib" - gene.set <- c(gene.set,gene.set1) - rm(gene.set1) - gc() - min.nepi <- min(table(sc10x.nEpi@meta.data[,paste0("res",opt$res.poststress)])) - results <- scQuSAGE(sc10x.nEpi,gs=gene.set,res.use=opt$res.poststress,ds=min.nepi,nm="St.dws.sc",folder=paste0("n-epi/",i)) - sc10x.nEpi <- results[[1]] - results.cor.St <- results[[2]] - results.clust.St.id <- results[[3]] - rm(results) - rm(gene.set) - - sc10x.nEpi <- SetAllIdent(object=sc10x.nEpi,id="St.dws.sc") - sc10x.St <- scSubset(sc10x.nEpi,i="St.dws.sc",g=c("Fib","SM","Endo")[c("Fib","SM","Endo") %in% unique(sc10x.nEpi@ident)]) - sc10x.Leu <- scSubset(sc10x.nEpi,i="St.dws.sc",g="Unknown") - - sc10x.St <- RunTSNE(object=sc10x.St,reduction.use="cca.aligned",dims.use=1:opt$acca,do.fast=TRUE) - postscript(paste0("./analysis/tSNE/st/",i,"/tSNE_Sample.eps")) - plot <- TSNEPlot(object=sc10x.St,group.by="samples",pt.size=2.5,do.return=TRUE,vector.friendly=FALSE) - plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) - plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) - plot(plot) - dev.off() - postscript(paste0("./analysis/tSNE/st/",i,"/tSNE_St.dws.sc.eps")) - plot <- TSNEPlot(object=sc10x.St,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE,vector.friendly=FALSE) - plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) - plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) - plot(plot) - dev.off() - rm(plot) - - opt$res.leu <- 2 - - sc10x.Leu <- scCluster(sc10x.Leu,pc.use=opt$acca,res.use=opt$res.leu,folder=paste0("leu/",i),red="cca.aligned") - sc10x.Leu <- StashIdent(object=sc10x.Leu,save.name=paste0("res",opt$res.leu)) - - sc10x.Leu <- RunTSNE(object=sc10x.Leu,reduction.use="cca.aligned",dims.use=1:opt$acca,do.fast=TRUE) - postscript(paste0("./analysis/tSNE/leu/",i,"/tSNE_Sample.eps")) - plot <- TSNEPlot(object=sc10x.Leu,group.by="samples",pt.size=2.5,do.return=TRUE,vector.friendly=FALSE) - plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) - plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) - plot(plot) - dev.off() - postscript(paste0("./analysis/tSNE/leu/",i,"/tSNE_res",opt$res.leu,".eps")) - plot <- TSNEPlot(object=sc10x.Leu,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE,vector.friendly=FALSE) - plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) - plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) - plot(plot) - dev.off() - rm(plot) - - genes.leu <- read_excel("genesets/40425_2017_215_MOESM1_ESM.xlsx",sheet="S3. Candidate markers") - leu <- as.list(unique(genes.leu[,2]))$Cell - leu.l <- leu[-c(1,3,4,7:9,14,15,17:18,20:21,23:30)] - leu.t <- leu[-c(1,2,5,6,8:13,17:20,22)] -# genes.leu.s <- genes.leu[genes.leu$Selected==1,] -# leu.s <- as.list(unique(genes.leu.s[,2]))$Cell - genes.leu <- genes.leu[unlist(genes.leu[,2]) %in% unlist(leu),] - genes.leu.l <- genes.leu[unlist(genes.leu[,2]) %in% unlist(leu.l),] -# genes.leu.s <- genes.leu.s[unlist(genes.leu.s[,2]) %in% unlist(leu),] -genes.leu.t <- genes.leu[unlist(genes.leu[,2]) %in% unlist(leu.t),] - -# gene.set1 <- split(genes.leu[,1], genes.leu[,2]) -# gene.set1 <- lapply(gene.set1,unname) -# gene.set1 <- lapply(gene.set1,unlist) -# gene.set <- gene.set1 -# rm(gene.set1) -# gc() -# min.leu <- min(table(sc10x.Leu@meta.data[,paste0("res",opt$res.leu)])) -# results <- scQuSAGE(sc10x.Leu,gs=gene.set,res.use=opt$res.leu,ds=0,nm="Leu",folder=paste0("leu/",i)) -# sc10x.Leu <- results[[1]] -# results.cor.Leu <- results[[2]] -# results.clust.Leu.id <- results[[3]] -# rm(results) -# rm(gene.set) -# gene.set1 <- split(genes.leu.s[,1], genes.leu.s[,2]) -# gene.set1 <- lapply(gene.set1,unname) -# gene.set1 <- lapply(gene.set1,unlist) -# gene.set <- gene.set1 -# rm(gene.set1) -# gc() -# sc10x.Leu <- SetAllIdent(object=sc10x.Leu,id=paste0("res",opt$res.leu)) -# results <- scQuSAGE(sc10x.Leu,gs=gene.set,res.use=opt$res.leu,ds=0,nm="Leu.s",folder=paste0("leu/",i)) -# sc10x.Leu <- results[[1]] -# results.cor.Leu.s <- results[[2]] -# results.clust.Leu.s.id <- results[[3]] -# rm(results) -# rm(gene.set) - gene.set1 <- split(genes.leu.l[,1], genes.leu.l[,2]) - gene.set1 <- lapply(gene.set1,unname) - gene.set1 <- lapply(gene.set1,unlist) - gene.set <- gene.set1 - rm(gene.set1) - gc() - sc10x.Leu <- SetAllIdent(object=sc10x.Leu,id=paste0("res",opt$res.leu)) - results <- scQuSAGE(sc10x.Leu,gs=gene.set,res.use=opt$res.leu,ds=0,nm="Leu.l",folder=paste0("leu/",i)) - sc10x.Leu <- results[[1]] - results.cor.Leu.l <- results[[2]] - results.clust.Leu.l.id <- results[[3]] - rm(results) - rm(gene.set) - - sc10x.T <- scSubset(sc10x.Leu,i="Leu.l",g="T-cells") - - sc10x.T <- scCluster(sc10x.T,pc.use=opt$acca,res.use=opt$res.leu,folder=paste0("t/",i),red="cca.aligned") - sc10x.T <- StashIdent(object=sc10x.T,save.name=paste0("res",opt$res.leu)) - - sc10x.T <- RunTSNE(object=sc10x.T,reduction.use="cca.aligned",dims.use=1:opt$acca,do.fast=TRUE) - postscript(paste0("./analysis/tSNE/t/",i,"/tSNE_Sample.eps")) - plot <- TSNEPlot(object=sc10x.T,group.by="samples",pt.size=2.5,do.return=TRUE,vector.friendly=FALSE) - plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) - plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) - plot(plot) - dev.off() - postscript(paste0("./analysis/tSNE/t/",i,"/tSNE_res",opt$res.leu,".eps")) - plot <- TSNEPlot(object=sc10x.T,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE,vector.friendly=FALSE) - plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) - plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) - plot(plot) - dev.off() - rm(plot) - - gene.set1 <- split(genes.leu.t[,1], genes.leu.t[,2]) - gene.set1 <- lapply(gene.set1,unname) - gene.set1 <- lapply(gene.set1,unlist) - gene.set <- gene.set1 - rm(gene.set1) - gc() - sc10x.T <- SetAllIdent(object=sc10x.T,id=paste0("res",opt$res.leu)) - results <- scQuSAGE(sc10x.T,gs=gene.set,res.use=opt$res.leu,ds=0,nm="Leu.t",folder=paste0("t/",i)) - sc10x.Leu <- results[[1]] - results.cor.Leu.l <- results[[2]] - results.clust.Leu.l.id <- results[[3]] - rm(results) - rm(gene.set) - - - -# -# sc10x.Merge <- scMerge(sc10x.nEpi,sc10x.St,sc10x.Leu,i.1="St.dws.sc",i.2="Leu",nm="Merge") -# sc10x.Merge <- scMerge(sc10x,sc10x.Epi,sc10x.Merge,i.1="Epi.dws.sc",i.2="Merge",nm="Merge") -# -# sc10x.Merge@ident <- factor(sc10x.Merge@ident,levels=c("BE","LE","Hillock","Club","Fib","SM","Endo",leu)) -# -# postscript(paste0("./analysis/tSNE/FINAL/tSNE_Merge_",i,".eps")) -# plot <- TSNEPlot(object=sc10x.Merge,pt.size=2.5,do.label=TRUE,label.size=5,do.return=TRUE,vector.friendly=FALSE) -# plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) -# plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) -# plot(plot) -# dev.off() -# rm(plot) -# -# sctSNE3CustCol(sc10x,i="Merge",bl=c("BE","LE","Hillock","Club"),rd=c("Fib","SM","Endo"),gn=c("B-cells","T-cells","Macrophages","Mast cells"),file=i) -# -# if (i=="Pd"){ -# for (j in list(c("D17PrPzF_Via","D27PrPzF_Via","D35PrPzF_Via"),c("D17PrTzF_Via","D27PrTzF_Via","D35PrTzF_Via"))){ -# sc10x.sub <- scSubset(sc10x,"samples",j) -# sc10x.sub <- SetAllIdent(object=sc10x.sub,id="Merge") -# sc10x.sub@ident <- factor(sc10x.sub@ident,levels=c("BE","LE","Hillock","Club","Fib","SM","Endo","B-cells","T-cells","Macrophages","Mast cells")) -# sctSNE3CustCol(sc10x.sub,i="Merge",bl=c("BE","LE","Hillock","Club"),rd=c("Fib","SM","Endo"),gn=c("B-cells","T-cells","Macrophages","Mast cells"),file=paste0(i,".",substr(j[1],6,7))) -# } -# rm(sc10x.sub) -# } else if (i=="Pb"){ -# sc10x.sub <- scSubset(sc10x,"Glandular","Glandular") -# sc10x.sub <- SetAllIdent(object=sc10x.sub,id="Merge") -# sc10x.sub@ident <- factor(sc10x.sub@ident,levels=c("BE","LE","Hillock","Club","Fib","SM","Endo","B-cells","T-cells","Macrophages","Mast cells")) -# sctSNE3CustCol(sc10x.sub,i="Merge",bl=c("BE","LE","Hillock","Club"),rd=c("Fib","SM","Endo"),gn=c("B-cells","T-cells","Macrophages","Mast cells"),file=paste0(i,".Glandular")) -# -# sc10x.sub <- scSubset(sc10x,"Stromal","Stromal") -# sc10x.sub <- SetAllIdent(object=sc10x.sub,id="Merge") -# sc10x.sub@ident <- factor(sc10x.sub@ident,levels=c("BE","LE","Hillock","Club","Fib","SM","Endo","B-cells","T-cells","Macrophages","Mast cells")) -# sctSNE3CustCol(sc10x.sub,i="Merge",bl=c("BE","LE","Hillock","Club"),rd=c("Fib","SM","Endo"),gn=c("B-cells","T-cells","Macrophages","Mast cells"),file=paste0(i,".Stromal")) -# -# for (j in c("BPH327PrGF_Via","BPH327PrSF_Via")){ -# sc10x.sub <- scSubset(sc10x,"samples",j) -# sc10x.sub <- SetAllIdent(object=sc10x.sub,id="Merge") -# sc10x.sub@ident <- factor(sc10x.sub@ident,levels=c("BE","LE","Hillock","Club","Fib","SM","Endo","B-cells","T-cells","Macrophages","Mast cells")) -# sctSNE3CustCol(sc10x.sub,i="Merge",bl=c("BE","LE","Hillock","Club"),rd=c("Fib","SM","Endo"),gn=c("B-cells","T-cells","Macrophages","Mast cells"),file=paste0(i,".",substr(j,4,6),substr(j,9,9))) -# } -# } -# -# write.table(table(sc10x@meta.data[,"samples"],sc10x@meta.data[,"Merge"]),file=paste0("./analysis/table/Table_samples_Merge",i,".csv"),row.names=TRUE,col.names=NA,append=FALSE,sep=",") -# write.table(round(prop.table(table(sc10x@meta.data[,"samples"],sc10x@meta.data[,"Merge"]),1)*100,1),file=paste0("./analysis/table/ProbTable_samples_Merge",i,".csv"),row.names=TRUE,col.names=NA,append=FALSE,sep=",") - - assign(paste0("sc10x.",i,".analized"),sc10x) - assign(paste0("sc10x.Epi.",i,".analized"),sc10x.Epi) - assign(paste0("sc10x.nEpi.",i,".analized"),sc10x.nEpi) - assign(paste0("sc10x.St.",i,".analized"),sc10x.St) - assign(paste0("sc10x.Leu.",i,".analized"),sc10x.Leu) - assign(paste0("results.cor.Lin.",i),results.cor.Lin) - assign(paste0("results.clust.Lin.id.",i),results.clust.Lin.id) - assign(paste0("results.cor.Epi.",i),results.cor.Epi) - assign(paste0("results.clust.Epi.id.",i),results.clust.Epi.id) - assign(paste0("results.cor.St.",i),results.cor.St) - assign(paste0("results.clust.St.id.",i),results.clust.St.id) - assign(paste0("results.cor.Leu.",i),results.cor.Leu) - assign(paste0("results.clust.Leu.id.",i),results.clust.Leu.id) - assign(paste0("results.cor.Leu.s.",i),results.cor.Leu.s) - assign(paste0("results.clust.Leu.s.id.",i),results.clust.Leu.s.id) - assign(paste0("results.cor.Leu.l.",i),results.cor.Leu.l) - assign(paste0("results.clust.Leu.l.id.",i),results.clust.Leu.l.id) - - rm(sc10x.sub) - rm(sc10x) - rm(sc10x.Epi) - rm(sc10x.St) - rm(sc10x.Leu) - rm(results.cor.Lin) - rm(results.cor.Epi) - rm(results.cor.St) - rm(results.cor.Leu) - rm(results.cor.Leu.s) - rm(results.cor.Leu.l) - rm(results.clust.Lin.id) - rm(results.clust.Epi.id) - rm(results.clust.St.id) - rm(results.clust.Leu.id) - rm(results.clust.Leu.s.id) - rm(results.clust.Leu.l.id) - rm(min.all) - rm(min.epi) - rm(min.nepi) - rm(min.st) - rm(min.leu) -} -rm(i) -save.image(file="./analysis/sc10x.analized.RData") - diff --git a/r.scripts/sc-TissueMapper_RUN.PdPbPc.cca.R b/r.scripts/sc-TissueMapper_RUN.PdPbPc.cca.R new file mode 100755 index 0000000..4fee8b5 --- /dev/null +++ b/r.scripts/sc-TissueMapper_RUN.PdPbPc.cca.R @@ -0,0 +1,277 @@ +gc() +library(methods) +library(optparse) +library(Seurat) +library(readr) +library(fBasics) +library(pastecs) +library(qusage) +library(RColorBrewer) +library(monocle) +library(dplyr) +library(viridis) +library(readxl) + +source("../r.scripts/sc-TissueMapper.R") + +#Create folder structure +setwd("../") +if (!dir.exists("./analysis")){ + dir.create("./analysis") +} +if (!dir.exists("./analysis/qc")){ + dir.create("./analysis/qc") +} +if (!dir.exists("./analysis/qc/cc")){ + dir.create("./analysis/qc/cc") +} +if (!dir.exists("./analysis/tSNE")){ + dir.create("./analysis/tSNE") +} +if (!dir.exists("./analysis/tSNE/pre.stress")){ + dir.create("./analysis/tSNE/pre.stress") +} +if (!dir.exists("./analysis/pca")){ + dir.create("./analysis/pca") +} +if (!dir.exists("./analysis/pca/stress")){ + dir.create("./analysis/pca/stress") +} +if (!dir.exists("./analysis/violin")){ + dir.create("./analysis/violin") +} +if (!dir.exists("./analysis/violin/stress")){ + dir.create("./analysis/violin/stress") +} +if (!dir.exists("./analysis/table")){ + dir.create("./analysis/table") +} +if (!dir.exists("./analysis/tSNE/post.stress")){ + dir.create("./analysis/tSNE/post.stress") +} +if (!dir.exists("./analysis/cor")){ + dir.create("./analysis/cor") +} +if (!dir.exists("./analysis/tSNE/lin")){ + dir.create("./analysis/tSNE/lin") +} +if (!dir.exists("./analysis/tSNE/epi")){ + dir.create("./analysis/tSNE/epi") +} +if (!dir.exists("./analysis/tSNE/n-epi")){ + dir.create("./analysis/tSNE/n-epi") +} +if (!dir.exists("./analysis/tSNE/st")){ + dir.create("./analysis/tSNE/st") +} +if (!dir.exists("./analysis/tSNE/leu")){ + dir.create("./analysis/tSNE/leu") +} +if (!dir.exists("./analysis/tSNE/lymphoid")){ + dir.create("./analysis/tSNE/lymphoid") +} +if (!dir.exists("./analysis/tSNE/myeloid")){ + dir.create("./analysis/tSNE/myeloid") +} +if (!dir.exists("./analysis/tSNE/merge")){ + dir.create("./analysis/tSNE/merge") +} +if (!dir.exists("./analysis/pca/ne")){ + dir.create("./analysis/pca/ne") +} +if (!dir.exists("./analysis/tSNE/ne")){ + dir.create("./analysis/tSNE/ne") +} +if (!dir.exists("./analysis/violin/ne")){ + dir.create("./analysis/violin/ne") +} +if (!dir.exists("./analysis/tSNE/FINAL")){ + dir.create("./analysis/tSNE/FINAL") +} +if (!dir.exists("./analysis/deg")){ + dir.create("./analysis/deg") +} +if (!dir.exists("./analysis/cca")){ + dir.create("./analysis/cca") +} +if (!dir.exists("./analysis/diy")){ + dir.create("./analysis/diy") +} +if (!dir.exists("./analysis/pseudotime")){ + dir.create("./analysis/pseudotime") +} + +#Retrieve command-line options +option_list=list( + make_option("--p",action="store",default="DPrF",type='character',help="Project Name"), + make_option("--g",action="store",default="ALL",type='character',help="Group To analyze"), + make_option("--lg",action="store",default=500,type='integer',help="Threshold for cells with minimum genes"), + make_option("--hg",action="store",default=3000,type='integer',help="Threshold for cells with maximum genes"), + make_option("--lm",action="store",default=0,type='numeric',help="Threshold for cells with minimum %mito genes"), + make_option("--hm",action="store",default=0.1,type='numeric',help="Threshold for cells with maximum %mito genes"), + make_option("--lx",action="store",default=0.2,type='numeric',help="x low threshold for hvg selection"), + make_option("--hx",action="store",default=5,type='numeric',help="x high threshold for hvg selection"), + make_option("--ly",action="store",default=1,type='numeric',help="y low threshold for hvg selection"), + make_option("--cc",action="store",default=TRUE,type='logical',help="Scale cell cycle?"), + make_option("--cca",action="store",default=50,type='integer',help="Number of CCAs to cacluate"), + make_option("--acca",action="store",default=30,type='integer',help="Number of CCAs to align"), + make_option("--pc",action="store",default=50,type='integer',help="Number of PCs to cacluate"), + make_option("--res.prestress",action="store",default=1,type='numeric',help="Resolution to cluster, pre-stress"), + make_option("--st",action="store",default=TRUE,type='logical',help="Remove stressed cells?"), + make_option("--stg",action="store",default="dws",type='character',help="Geneset to use for stress ID"), + make_option("--cut.stress",action="store",default=0.9,type='numeric',help="Cutoff for stress score"), + make_option("--res.poststress",action="store",default=0.25,type='numeric',help="Resolution to cluster, post-stress"), + make_option("--cut.ne",action="store",default=0.999,type='numeric',help="Cutoff for NE score") +) +opt=parse_args(OptionParser(option_list=option_list)) +rm(option_list) +if (opt$lm==0){opt$lm=-Inf} + +load("./analysis/sc10x.Pd.Rda") +load("./analysis/sc10x.Pb.Rda") +load("./analysis/sc10x.Pc.Rda") + +for (i in c("Pb","Pc","Pd")){ + if (!dir.exists(paste0("./analysis/tSNE/post.stress/",i))){ + dir.create(paste0("./analysis/tSNE/post.stress/",i)) + } + if (!dir.exists(paste0("./analysis/tSNE/lin/",i))){ + dir.create(paste0("./analysis/tSNE/lin/",i)) + } + if (!dir.exists(paste0("./analysis/tSNE/epi/",i))){ + dir.create(paste0("./analysis/tSNE/epi/",i)) + } + if (!dir.exists(paste0("./analysis/tSNE/n-epi/",i))){ + dir.create(paste0("./analysis/tSNE/n-epi/",i)) + } + if (!dir.exists(paste0("./analysis/tSNE/st/",i))){ + dir.create(paste0("./analysis/tSNE/st/",i)) + } + if (!dir.exists(paste0("./analysis/tSNE/leu/",i))){ + dir.create(paste0("./analysis/tSNE/leu/",i)) + } + if (!dir.exists(paste0("./analysis/tSNE/lymphoid/",i))){ + dir.create(paste0("./analysis/tSNE/lymphoid/",i)) + } + if (!dir.exists(paste0("./analysis/tSNE/myeloid/",i))){ + dir.create(paste0("./analysis/tSNE/myeloid/",i)) + } + + sc10x <- get(paste0("sc10x.",i)) + + sc10x <- scCluster(sc10x,pc.use=opt$acca,res.use=opt$res.poststress,folder=paste0("post.stress/",i),red="cca.aligned") + +# gene.set1 <- read_delim("./genesets/genes.deg.Epi.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +# gene.set1 <- gene.set1[1] +# gene.set1 <- as.list(gene.set1) +# names(gene.set1) <- "Epi" +# gene.set <- c(gene.set1) +# gene.set1 <- read_delim("./genesets/genes.deg.St.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +# gene.set1 <- gene.set1[1] +# gene.set1 <- as.list(gene.set1) +# names(gene.set1) <- "St" +# gene.set <- c(gene.set,gene.set1) + gene.set1 <- read_delim("./genesets/genes.deg.BE.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) + gene.set1 <- gene.set1[1] + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "Epi" + gene.set <- c(gene.set1) #replaced gene.set <- c(gene.set,gene.set1) so pan-epi and pan-st not correlated + gene.set1 <- read_delim("./genesets/genes.deg.LE.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) + gene.set1 <- gene.set1[1] + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "Epi" + gene.set <- c(gene.set,gene.set1) + gene.set1 <- read_delim("./genesets/genes.deg.OE1.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) + gene.set1 <- gene.set1[1] + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "Epi" + gene.set <- c(gene.set,gene.set1) + gene.set1 <- read_delim("./genesets/genes.deg.OE2.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) + gene.set1 <- gene.set1[1] + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "Epi" + gene.set <- c(gene.set,gene.set1) + gene.set1 <- read_delim("./genesets/genes.deg.Endo.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) + gene.set1 <- gene.set1[1] + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "Endo" + gene.set <- c(gene.set,gene.set1) + gene.set1 <- read_delim("./genesets/genes.deg.SM.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) + gene.set1 <- gene.set1[1] + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "FMSt" + gene.set <- c(gene.set,gene.set1) + gene.set1 <- read_delim("./genesets/genes.deg.Fib.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) + gene.set1 <- gene.set1[1] + gene.set1 <- as.list(gene.set1) + names(gene.set1) <- "FMSt" + gene.set <- c(gene.set,gene.set1) + genes.leu <- read_excel("genesets/40425_2017_215_MOESM1_ESM.xlsx",sheet="S3. Candidate markers") + leu <- as.list(unique(genes.leu[,2]))$Cell + leu <- leu[-c(9,17,20)] + leu.l <- leu[-c(1,3:4,7:8,13:18,21,20:30)] + #leu.lin <- c("Myeloid","Lymphoid","Lymphoid","Lymphoid","Myeloid","Myeloid","Lymphoid","Myeloid","Myeloid","Myeloid","Myeloid","Myeloid","Lymphoid","Lymphoid","Lymphoid","Myeloid","Lymphoid","Lymphoid","Lymphoid","Lymphoid","Lymphoid","Lymphoid","Lymphoid","Lymphoid","Lymphoid","Lymphoid","Lymphoid") + leu.lin <- c("Lymphoid","Myeloid","Myeloid","Myeloid","Myeloid","Myeloid","Myeloid","Lymphoid") + #genes.leu <- genes.leu[unlist(genes.leu[,2]) %in% unlist(leu),] + genes.leu.l <- genes.leu[unlist(genes.leu[,2]) %in% unlist(leu.l),] + genes.leu.l[nrow(genes.leu.l)+1,] <- list("MS4A7","Macrophages",0) +genes.leu.l[nrow(genes.leu.l)+1,] <- list("CD14","Macrophages",0) +genes.leu.l[nrow(genes.leu.l)+1,] <- list("CD86","Macrophages",0) +genes.leu.l[nrow(genes.leu.l)+1,] <- list("S100A9","Neutrophils",0) +genes.leu.l[nrow(genes.leu.l)+1,] <- list("EREG","Neutrophils",0) +genes.leu.l[nrow(genes.leu.l)+1,] <- list("S100A8","Neutrophils",0) +genes.leu.l[nrow(genes.leu.l)+1,] <- list("FCN1","Neutrophils",0) + #gene.set1 <- split(genes.leu[,1], genes.leu[,2]) + gene.set1 <- split(genes.leu.l[,1], genes.leu.l[,2]) + gene.set1 <- lapply(gene.set1,unname) + gene.set1 <- lapply(gene.set1,unlist) + names(gene.set1) <- leu.lin + gene.set <- c(gene.set,gene.set1) + rm(gene.set1) + gc() + min.all <- min(table(sc10x@meta.data[,paste0("res",opt$res.poststress)])) + results <- scQuSAGE.nocorplot(sc10x,gs=gene.set,res.use=opt$res.poststress,ds=min.all,nm="Pop",folder=paste0("lin/",i)) + sc10x <- results[[1]] + results.cor.Lin <- results[[2]] + results.clust.Lin.id <- results[[3]] + rm(results) + rm(gene.set) + + res.leu <- 1 + + sc10x <- SetAllIdent(object=sc10x,id="Pop") + try({ + sc10x.Lymphoid <- scSubset(sc10x,i="Pop",g="Lymphoid") + sc10x.Lymphoid <- SetAllIdent(object=sc10x.Lymphoid,id=paste0("res",opt$res.poststress)) + sc10x.Lymphoid <- StashIdent(object=sc10x.Lymphoid,save.name=paste0("old.res",opt$res.poststress)) + sc10x.Lymphoid <- scCluster(sc10x.Lymphoid,pc.use=opt$acca,res.use=res.leu,folder=paste0("lymphoid/",i),red="cca.aligned") + assign(paste0("sc10x.Lymphoid.",i,".analized"),sc10x.Lymphoid) + }) + + try({ + sc10x.Myeloid <- scSubset(sc10x,i="Pop",g="Myeloid") + sc10x.Myeloid <- SetAllIdent(object=sc10x.Myeloid,id=paste0("res",opt$res.poststress)) + sc10x.Myeloid <- StashIdent(object=sc10x.Myeloid,save.name=paste0("old.res",opt$res.poststress)) + sc10x.Myeloid <- scCluster(sc10x.Myeloid,pc.use=opt$acca,res.use=res.leu,folder=paste0("myeloid/",i),red="cca.aligned") + assign(paste0("sc10x.Myeloid.",i,".analized"),sc10x.Myeloid) + }) + + assign(paste0("sc10x.",i,".analized"),sc10x) + assign(paste0("results.cor.Lin.",i),results.cor.Lin) + assign(paste0("results.clust.Lin.id.",i),results.clust.Lin.id) + + rm(sc10x) + rm(sc10x.Lymphoid) + rm(sc10x.Myeloid) + rm(sc10x.Leu) + rm(results.cor.Lin) + rm(results.clust.Lin.id) + rm(min.all) +} +rm(i) + +save(list=ls(pattern="sc10x.*.Pd.analized"),file="./analysis/sc10x.Pd.analized.Rda") +save(list=ls(pattern="sc10x.*.Pb.analized"),file="./analysis/sc10x.Pb.analized.Rda") +save(list=ls(pattern="sc10x.*.Pc.analized"),file="./analysis/sc10x.Pc.analized.Rda") +save.image(file="./analysis/sc10x.analized.RData") + -- GitLab