#sc-TissueMapper #Author: Gervaise H. Henry #Email: gervaise.henry@utsouthwestern.edu #Lab: Strand Lab, Deparment of Urology, University of Texas Southwestern Medical Center scFolders <- function(){ if (!dir.exists("./analysis/qc/")){ dir.create("./analysis/qc/") } if (!dir.exists("./analysis/qc/")){ dir.create("./analysis/qc/") } if (!dir.exists("./analysis/qc/cutoffs/")){ dir.create("./analysis/qc/cutoffs/") } if (!dir.exists("./analysis/qc/cellcycle")){ dir.create("./analysis/qc/cellcycle") } if (!dir.exists("./analysis/vis")){ dir.create("./analysis/vis") } if (!dir.exists("./analysis/score_id")){ dir.create("./analysis/score_id") } if (!dir.exists("./analysis/cor")){ dir.create("./analysis/cor") } } scLoad <- function(p,cellranger=3,aggr=TRUE){ #Load and prefilter filtered_gene_bc_matrices_mex output from cellranger #Inputs: #p = project name #cellranger cellranger version number used for count/aggr, 2 or 3 #aggr = if the samples are already aggregated, TRUE if useing the output of aggr, FALSE if using outputs of each count #Outputs: #sc10x = Seurat object list #sc10x.groups = group labels for each sample sc10x.groups <- read_csv(paste0("./analysis/DATA/",p,"-demultiplex.csv")) #Load filtered_gene_bc_matrices output from cellranger sc10x.data <- list() sc10x <- list() if (aggr==TRUE){ if (cellranger==2){ sc10x.data[aggr] <- Read10X(data.dir=paste0("./analysis/DATA/10x/filtered_gene_bc_matrices_mex/")) } else { sc10x.data[aggr] <- Read10X(data.dir=paste0("./analysis/DATA/10x/filtered_feature_bc_matrix/")) } sc10x[aggr] <- new("seurat",raw.data=sc10x.data[aggr]) } else { for (i in sc10x.groups$Samples){ if (cellranger==2){ sc10x.data[i] <- Read10X(data.dir=paste0("./analysis/DATA/10x/",i,"/filtered_gene_bc_matrices/")) } else { sc10x.data[i] <- Read10X(data.dir=paste0("./analysis/DATA/10x/",i,"/filtered_feature_bc_matrix/")) } sc10x[i] <- CreateSeuratObject(counts=sc10x.data[[i]],project=p) sc10x[[i]]$samples <- i } } # #Label sample names from aggregation_csv.csv # if (sub==FALSE){ # if (cellranger==2){ # sc10x.aggr <- read_csv("./analysis/DATA/10x/aggregation_csv.csv") # } else { # sc10x.aggr <- read_csv("./analysis/DATA/10x/aggregation.csv") # } # } else { # if (cellranger==2){ # sc10x.aggr <- read_csv(paste0("./analysis/DATA/",p,"/10x/aggregation_csv.csv")) # } else { # sc10x.aggr <- read_csv(paste0("./analysis/DATA/",p,"/10x/aggregation.csv")) # } # } # cell.codes <- as.data.frame(sc10x@raw.data@Dimnames[[2]]) # colnames(cell.codes) <- "barcodes" # rownames(cell.codes) <- cell.codes$barcodes # cell.codes$lib.codes <- as.factor(gsub(pattern=".+-",replacement="",cell.codes$barcodes)) # cell.codes$samples <- sc10x.aggr$library_id[match(cell.codes$lib.codes,as.numeric(rownames(sc10x.aggr)))] # sc10x <- CreateSeuratObject(counts=sc10x.data,project=p,assay="RNA",min.cells=mc,min.features=mg,meta.data=cell.codes["samples"]) # # #Create groups found in demultiplex.csv # for (i in 2:ncol(sc10x.demultiplex)){ # Idents(sc10x) <- "samples" # merge.cluster <- apply(sc10x.demultiplex[,i],1,as.character) # merge.cluster[merge.cluster==1] <- colnames(sc10x.demultiplex[,i]) # # Idents(sc10x) <- plyr::mapvalues(x=Idents(sc10x),from=sc10x.demultiplex$Samples,to=merge.cluster) # sc10x@meta.data[,colnames(sc10x.demultiplex[,i])] <- Idents(sc10x) # } results <- list( sc10x=sc10x, sc10x.groups=sc10x.groups ) return(results) } scSubset <- function(sc10x,i="ALL",g="ALL"){ #Subset cells based on an identity #Inputs: #sc10x = seruat object #i = identity to use #g = group to subset by #Outputs: #Seurat object Idents(sc10x) <- i sc10x.sub <- subset(x=sc10x,idents=g) return(sc10x.sub) } scQC <- function(sc10x,sp="hu"){ #QC and filter Seurat object #Inputs: #sc10x = Seruat object #sub = Subfolder to save output files #Outputs: #result[1] = filtered Seurat object #result[2] = raw cell count #result[3] = raw gene count #result[4] = filtered cell count #result[5] = filtered gene count #Calculate percent mitochondrea if (sp=="hu"){ mito.pattern <- "^MT-" } else if (sp=="mu"){ mito.pattern <- "^mt-" } for (i in names(sc10x)){ sc10x.temp <- sc10x[[i]] sc10x.temp[["percent.mito"]] <- PercentageFeatureSet(object=sc10x.temp,pattern=mito.pattern) #sc10x.temp <- subset(sc10x.temp,cell=names(which(is.na(sc10x.temp$percent.mito))),invert=TRUE) sc10x[i] <- sc10x.temp } #Calculate cutoffs thresh <- list() h <- list() cells.remove <- list() sc10x.temp <- list() thresh.l <- list() cutoff.l.mito <- list() for (i in c("nFeature_RNA","percent.mito")){ thresh[[i]] <- scThresh(sc10x,feature=i) if (i == "percent.mito"){ for (j in names(sc10x)){ cutoff.l.mito[[j]] <- NULL h[[i]] <- hist(data.frame(sc10x[[j]][[i]])$percent.mito,breaks=1000,plot=FALSE) cutoff.temp <- mean(c(h[[i]]$mids[which.max(h[[i]]$counts)],h[[i]]$mids[-which.max(h[[i]]$counts)][which.max(h[[i]]$counts[-which.max(h[[i]]$counts)])])) cells.remove[[j]] <- c(cells.remove[[j]],rownames(sc10x[[j]][["percent.mito"]])[sc10x[[j]][[i]][,1] > cutoff.temp]) sc10x.temp[[j]] <- subset(sc10x[[j]],cells=setdiff(colnames(sc10x[[j]]),cells.remove[[j]])) thresh.l[[i]] <- scThresh(sc10x.temp,feature=i,sub="lower") cutoff.l.mito[[j]] <- thresh.l[[i]][[j]]$threshold[thresh.l[[i]][[j]]$method=="Triangle"] } } } #Plot raw stats max.ct <- 0 max.ft <- 0 max.mt <- 0 for (i in names(sc10x)){ if (max.ct < max(sc10x[[i]][["nCount_RNA"]])){ max.ct <- max(sc10x[[i]][["nCount_RNA"]]) } if (max.ft < max(sc10x[[i]][["nFeature_RNA"]])){ max.ft <- max(sc10x[[i]][["nFeature_RNA"]]) } if (max.mt < max(sc10x[[i]][["percent.mito"]])){ max.mt <- max(sc10x[[i]][["percent.mito"]]) } } max.ct <- max.ct*1.1 max.ft <- max.ft*1.1 max.mt <- max.mt*1.1 cells.remove <- list() for (i in c("nCount_RNA","nFeature_RNA","percent.mito")){ max.y <- 0 if (i == "nCount_RNA"){ cells.remove[[j]] <- NULL max.y <- max.ct } else if (i == "nFeature_RNA"){ max.y <- max.ft } else if (i == "percent.mito"){ max.y <- max.mt } plots.v <- list() densities.s <- list() plots.s <- list() for (j in names(sc10x)){ sc10x.temp <- sc10x[[j]] plots.v[[j]] <- VlnPlot(object=sc10x.temp,features=i,pt.size=0.1,)+scale_x_discrete(labels=j)+scale_y_continuous(limits=c(0,max.y))+theme(legend.position="none",axis.text.x=element_text(hjust=0.5,angle=0)) if (i %in% c("nFeature_RNA","percent.mito")){ if (i == "nFeature_RNA"){ cutoff.l <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="MinErrorI"] cutoff.h <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="RenyiEntropy"] } else { cutoff.l <- cutoff.l.mito[[j]] cutoff.h <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="Triangle"] } plots.v[[j]] <- plots.v[[j]]+geom_hline(yintercept=cutoff.l,size=0.5,color="red")+geom_hline(yintercept=cutoff.h,size=0.5,color="red") densities.s[[j]] <- density(sc10x.temp$nCount_RNA,sc10x.temp[[i]][,1],n=1000) plots.s[[j]] <- ggplotGrob(ggplot(data.frame(cbind(sc10x.temp$nCount_RNA,sc10x.temp[[i]][,1])))+geom_point(aes(x=X1,y=X2,color=densities.s[[j]]),size=0.1)+scale_x_continuous(limits=c(0,max.ct))+scale_y_continuous(limits=c(0,max.y))+scale_color_viridis(option="inferno")+labs(x="nCount",y=i,color="Density")+ggtitle(j)+cowplot::theme_cowplot()+theme(plot.title=element_text(size=7.5),axis.title=element_text(size=7.5),axis.text=element_text(size=5,angle=45),legend.position="bottom",legend.title=element_text(size=5,face="bold",vjust=1),legend.text=element_text(size=5,angle=45))+guides(color=guide_colourbar(barwidth=5,barheight=0.5))+geom_hline(yintercept=cutoff.l,size=0.1,color="red")+geom_hline(yintercept=cutoff.h,size=0.1,color="red")) cells.remove[[j]] <- c(cells.remove[[j]],rownames(sc10x[[j]][[i]])[sc10x[[j]][[i]][,1] < cutoff.l | sc10x[[j]][[i]][,1] > cutoff.h]) } ggsave(paste0("./analysis/qc/Violin_qc.raw.",i,".",j,".eps"),plot=plots.v[[j]]) if (i %in% c("nFeature_RNA","percent.mito")){ ggsave(paste0("./analysis/qc/Plot_qc.raw.",i,".",j,".eps"),plot=plots.s[[j]]) } } } #Record cell/gene counts pre and post filtering counts.cell.raw <- list() counts.gene.raw <- list() sc10x.sub <- list() counts.cell.filtered <- list() counts.gene.filtered <- list() for (i in names(sc10x)){ counts.cell.raw[i] <- ncol(GetAssayData(object=sc10x[[i]],slot="counts")) counts.gene.raw[i] <- nrow(GetAssayData(object=sc10x[[i]],slot="counts")) sc10x.sub[[i]] <- subset(sc10x[[i]],cells=setdiff(colnames(sc10x[[i]]),cells.remove[[i]])) counts.cell.filtered[i] <- ncol(GetAssayData(object=sc10x.sub[[i]],slot="counts")) counts.gene.filtered[i] <- nrow(GetAssayData(object=sc10x.sub[[i]],slot="counts")) } #Plot filtered stats max.ct <- 0 max.ft <- 0 max.mt <- 0 for (i in names(sc10x)){ if (max.ct < max(sc10x.sub[[i]][["nCount_RNA"]])){ max.ct <- max(sc10x.sub[[i]][["nCount_RNA"]]) } if (max.ft < max(sc10x.sub[[i]][["nFeature_RNA"]])){ max.ft <- max(sc10x.sub[[i]][["nFeature_RNA"]]) } if (max.mt < max(sc10x.sub[[i]][["percent.mito"]])){ max.mt <- max(sc10x.sub[[i]][["percent.mito"]]) } } max.ct <- max.ct*1.1 max.ft <- max.ft*1.1 max.mt <- max.mt*1.1 for (i in c("nCount_RNA","nFeature_RNA","percent.mito")){ max.y <- 0 if (i == "nCount_RNA"){ max.y <- max.ct } else if (i == "nFeature_RNA"){ max.y <- max.ft } else if (i == "percent.mito"){ max.y <- max.mt } plots.v <- list() densities.s <- list() plots.s <- list() for (j in names(sc10x.sub)){ sc10x.temp <- sc10x.sub[[j]] plots.v[[j]] <- VlnPlot(object=sc10x.temp,features=i,pt.size=0.1,)+scale_x_discrete(labels=j)+scale_y_continuous(limits=c(0,max.y))+theme(legend.position="none",axis.text.x=element_text(hjust=0.5,angle=0)) if (i %in% c("nFeature_RNA","percent.mito")){ densities.s[[j]] <- density(sc10x.temp$nCount_RNA,sc10x.temp[[i]][,1],n=1000) plots.s[[j]] <- ggplotGrob(ggplot(data.frame(cbind(sc10x.temp$nCount_RNA,sc10x.temp[[i]][,1])))+geom_point(aes(x=X1,y=X2,color=densities.s[[j]]),size=0.1)+scale_x_continuous(limits=c(0,max.ct))+scale_y_continuous(limits=c(0,max.y))+scale_color_viridis(option="inferno")+labs(x="nCount",y=i,color="Density")+ggtitle(j)+cowplot::theme_cowplot()+theme(plot.title=element_text(size=7.5),axis.title=element_text(size=7.5),axis.text=element_text(size=5,angle=45),legend.position="bottom",legend.title=element_text(size=5,face="bold",vjust=1),legend.text=element_text(size=5,angle=45))+guides(color=guide_colourbar(barwidth=5,barheight=0.5))) } ggsave(paste0("./analysis/qc/Violin_qc.filtered.",i,".",j,".eps"),plot=plots.v[[j]]) if (i %in% c("nFeature_RNA","percent.mito")){ ggsave(paste0("./analysis/qc/Plot_qc.filtered.",i,".",j,".eps"),plot=plots.s[[j]]) } } } results <- list( sc10x=sc10x.sub, counts.cell.raw=counts.cell.raw, counts.gene.raw=counts.gene.raw, counts.cell.filtered=counts.cell.filtered, counts.gene.filtered=counts.gene.filtered ) return(results) } scThresh <- function(sc10x,feature,sub=FALSE){ #Calculate thresholds and cutoffs #Inputs: #sc10x = Seruat object #feature = feature to threshold #sub = Subfolder to save output files #Outputs: #result = Threshold data #Make folders if (sub==FALSE){ folder <- "./analysis/qc/cutoffs/" } else { folder <- paste0("./analysis/qc/cutoffs/",sub,"/") if (!dir.exists(folder)){ dir.create(folder) } } #Calculate range of histogram based threholding and manually select methods for cutoffs scale <- list() scale.scaled <- list() h <- list() thresh <-list() cutoff.l <- list() cutoff.h <- list() thresh_methods <- c("IJDefault","Huang","Huang2","Intermodes","IsoData","Li","Mean","MinErrorI","Moments","Otsu","Percentile","RenyiEntropy","Shanbhag","Triangle") for (i in names(sc10x)){ scale[[i]] <- data.frame(Score=sc10x[[i]][[feature]]) colnames(scale[[i]]) <- "Score" scale[[i]] <- data.frame(Score=scale[[i]]$Score[!is.na(scale[[i]]$Score)]) scale.scaled[[i]] <- as.integer((scale[[i]]$Score-min(scale[[i]]$Score))/(max(scale[[i]]$Score)-min(scale[[i]]$Score))*360) #scale.scaled[[i]] <- as.integer(scales::rescale(scale[[i]]$Score,to=c(0,1))*360) h[[i]] <- hist(scale[[i]]$Score,breaks=100,plot=FALSE) thresh[[i]] <- purrr::map_chr(thresh_methods, ~auto_thresh(scale.scaled[[i]], .)) %>% tibble(method = thresh_methods, threshold = .) thresh[[i]]$threshold <- as.numeric(thresh[[i]]$threshold) thresh[[i]]$threshold <- ((thresh[[i]]$threshold/360)*(max(scale[[i]]$Score)-min(scale[[i]]$Score)))+min(scale[[i]]$Score) #thresh[[i]] <- thresh[[i]] %>% mutate(threshold=(scales::rescale(as.numeric(threshold)/360,to=range(scale[[i]]$Score)))) postscript(paste0(folder,"Hist_qc.",i,".",feature,".eps")) plot(h[[i]],main=paste0("Histogram of ",feature," of sample ",i),xlab=feature) abline(v=thresh[[i]]$threshold) mtext(thresh[[i]]$method,side=1,line=2,at=thresh[[i]]$threshold,cex=0.5) dev.off() } return(thresh) } scCellCycle <- function(sc10x,sub=FALSE,sp="hu"){ #Runs Seurat based PCA analysis for cell cycle ID #Inputs: #sc10x = Seruat object #sub = Subfolder to save output files #Outputs: #results[1] = Seurat object #results[2] = s genes #results[3] = g2m genes #Make sub-folders if necessary if (sub==FALSE){ folder <- "./analysis/qc/cellcycle/" } else { folder <- paste0("./analysis/qc/cellcycle/",sub,"/") if (!dir.exists(folder)){ dir.create(folder) }} #score cell cycle genes.cc <- readLines(con="./genesets/regev_lab_cell_cycle_genes.txt") genes.s <- genes.cc[1:43] genes.g2m <- genes.cc[44:97] sc10x <- NormalizeData(object=sc10x,verbose=FALSE) sc10x <- ScaleData(object=sc10x,do.par=TRUE,num.cores=45,verbose=FALSE) sc10x <- CellCycleScoring(object=sc10x,s.features=genes.s,g2m.features=genes.g2m,set.ident=TRUE) #plot cell cycle specific genes if (sp=="hu"){ genes=c("PCNA","TOP2A","MCM6","MKI67") postscript(paste0(folder,"Violin_cc.Raw.eps")) plot <- VlnPlot(object=sc10x,features=genes,ncol=2,pt.size=1) plot(plot) dev.off() } # sc10x <- RunPCA(object=sc10x,features=c(genes.s,genes.g2m),npcs=2,verbose=FALSE) # postscript(paste0(folder,"PCA_cc.Raw.eps")) # plot <- DimPlot(object=sc10x,reduction="pca") # plot(plot) # dev.off() # gc() # sc10x <- ScaleData(object=sc10x,vars.to.regress=c("S.Score","G2M.Score"),do.par=TRUE,num.cores=45,verbose=TRUE) # gc() # sc10x <- RunPCA(object=sc10x,features=c(genes.s,genes.g2m),npcs=2,verbose=FALSE) # postscript(paste0(folder,"PCA_cc.Norm.eps")) # plot <- DimPlot(object=sc10x,reduction="pca") # plot(plot) # dev.off() results <- list( sc10x=sc10x, genes.s=genes.s, genes.g2m=genes.g2m ) return(results) } scPC <- function(sc10x,pc=50,hpc=0.9,file="pre.stress",print="tsne"){ #Scale Seurat object & calculate # of PCs to use #Inputs: #sc10x = Seruat object #pc = number of PCs to cacluate #hpc = max variance cutoff for PCs to use" #file = file for output #Outputs: #result[1] = Seurat object #result[2] = # of PCs to use #Run PCA Idents(object=sc10x) <- "ALL" sc10x <- RunPCA(object=sc10x,npcs=pc,verbose=FALSE,assay="integrated") #Calculate PCs to use pc.use <- sc10x[["pca"]]@stdev^2 pc.use <- pc.use/sum(pc.use) pc.use <- cumsum(pc.use) pc.use <- min(which(pc.use>=hpc)) postscript(paste0("./analysis/qc/Plot_PCElbow_",file,".eps")) plot <- ElbowPlot(object=sc10x,ndims=pc) plot <- plot+geom_vline(xintercept=pc.use,size=1,color="red") plot(plot) dev.off() results <- list( sc10x=sc10x, pc.use=pc.use ) return(results) } scCCA <- function(sc10x.l){ for (i in 1:length(sc10x.l)){ #sc10x.l[[i]] <- NormalizeData(sc10x.l[[i]],verbose=FALSE) gc() #sc10x.l[[i]] <- ScaleData(sc10x.l[[i]],vars.to.regress=c("nFeature_RNA","percent.mito"),verbose = FALSE) sc10x.l[[i]] <- SCTransform(sc10x.l[[i]],vars.to.regress=c("nFeature_RNA","percent.mito"),verbose=FALSE,assay="RNA") gc() #sc10x.l[[i]] <- FindVariableFeatures(sc10x.l[[i]],selection.method="vst",nfeatures=2000,verbose=FALSE) } sc10x.features <- SelectIntegrationFeatures(object.list=sc10x.l,nfeatures=3000) sc10x.l <- PrepSCTIntegration(object.list=sc10x.l,anchor.features=sc10x.features,verbose=FALSE) sc10x.anchors <- FindIntegrationAnchors(object.list=sc10x.l, normalization.method="SCT",anchor.features=sc10x.features,verbose=FALSE) sc10x <- IntegrateData(anchorset=sc10x.anchors,normalization.method="SCT",verbose=FALSE) #sc10x <- FindIntegrationAnchors(object.list=sc10x.l,dims=1:30,scale=FALSE) #sc10x <- IntegrateData(anchorset=sc10x,dims=1:30) #gc() #sc10x <- ScaleData(object=sc10x,do.par=TRUE,num.cores=45,verbose=FALSE,assay="integrated") #gc() gc() sc10x <- SCTransform(sc10x,vars.to.regress=c("nFeature_RNA","percent.mito"),verbose=FALSE,return.only.var.genes=FALSE,assay="RNA") gc() return(sc10x) } scCluster <- function(sc10x,res=0.1,red="pca",dim,print="tsne",folder=FALSE){ #Cluster Seurat object and produce visualizations #Inputs: #sc10x = Seruat object #res = resolution to calculate clustering #red = rediction type to use for clustering #dim = number of dimentions to use for display #print = dimentionality reduction to use for display #folder = folder for output #Outputs: #result = Seurat object #Create subfolder if necessary if (folder==FALSE){ sub <- "" } else { if (!dir.exists(paste0("./analysis/vis/",folder))){ dir.create(paste0("./analysis/vis/",folder)) } sub <- paste0(folder,"/") } DefaultAssay(sc10x) <- "integrated" #Calculste Vis sc10x <- RunTSNE(sc10x,dims=1:dim,reduction="pca",assay="integrated") sc10x <- RunUMAP(sc10x,dims=1:dim,reduction="pca",assay="integrated") sc10x <- FindNeighbors(sc10x,reduction=red,verbose=FALSE) for (i in res){ sc10x <- FindClusters(sc10x,resolution=i,verbose=FALSE) plot1 <- DimPlot(sc10x,reduction="pca",label=TRUE,repel=TRUE)+theme(legend.position="none") plot2 <- DimPlot(sc10x,reduction="tsne",label=TRUE,repel=TRUE)+theme(legend.position="none") plot3 <- DimPlot(sc10x,reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none") if (print=="tsne"){ postscript(paste0("./analysis/vis/",sub,"tSNE_",i,".eps")) print(print2) dev.off() } else if (print=="umap"){ postscript(paste0("./analysis/vis/",sub,"UMAP_",i,".eps")) print(print3) dev.off() } else if (print=="2"){ plot2 <- plot2+theme(legend.position="none") plot3 <- plot3+theme(legend.position="none") postscript(paste0("./analysis/vis/",sub,"2Vis_",i,".eps")) grid.arrange(plot2,plot3,ncol=1) dev.off() } else if (print=="3"){ plot1 <- plot1+theme(legend.position="none") plot2 <- plot2+theme(legend.position="none") plot3 <- plot3+theme(legend.position="none") postscript(paste0("./analysis/vis/",sub,"3Vis_",i,".eps")) grid.arrange(plot1,plot2,plot3,ncol=1) dev.off() }} plot1 <- DimPlot(sc10x,reduction="pca",group.by="samples") plot2 <- DimPlot(sc10x,reduction="tsne",group.by="samples") plot3 <- DimPlot(sc10x,reduction="umap",group.by="samples") legend <- cowplot::get_legend(plot1) if (print=="tsne"){ postscript(paste0("./analysis/vis/",sub,"tSNE_samples.eps")) grid.arrange(plot2,legend,ncol=1) dev.off() } else if (print=="umap"){ postscript(paste0("./analysis/vis/",sub,"UMAP_samples.eps")) grid.arrange(plot3,legend,ncol=1) dev.off() } else if (print=="2"){ plot2 <- plot2+theme(legend.position="none") plot3 <- plot3+theme(legend.position="none") postscript(paste0("./analysis/vis/",sub,"2Vis_samples.eps")) grid.arrange(plot2,plot3,legend,ncol=1) dev.off() } else if (print=="3"){ plot1 <- plot1+theme(legend.position="none") plot2 <- plot2+theme(legend.position="none") plot3 <- plot3+theme(legend.position="none") postscript(paste0("./analysis/vis/",sub,"3Vis_samples.eps")) grid.arrange(plot1,plot2,plot3,legend,ncol=1) dev.off() } DefaultAssay(sc10x) <- "SCT" return(sc10x) } scScore <- function(sc10x,score,geneset,cut.pt=0.9,anchor=FALSE){ #Runs custom PCA analysis for stress ID #Inputs: #sc10x = Seruat object #score = name of geneset for scoring #geneset = geneset to use for ID #cut.pt = % of cells to keep #Outputs: #results[1] = Seurat object (original + score) #results[2] = Seurat object (negatively filtered) #results[3] = Seurat object (positively filtered) #Make subdirectory if (!dir.exists(paste0("./analysis/score_id/",score))){ dir.create(paste0("./analysis/score_id/",score)) } if (!dir.exists(paste0("./analysis/vis/",score))){ dir.create(paste0("./analysis/vis/",score)) } #Score geneset sc10x <- AddModuleScore(object=sc10x,features=geneset,name=score,assay="SCT") Idents(object=sc10x) <- paste0(score,"1") #CDF cdf <- ecdf(as.numeric(levels(sc10x))) if (cut.pt == "renyi"){ thresh <- list() thresh[["all"]] <- scThresh(list(all=sc10x),feature=paste0(score,"1"),sub=score) cut.x <- thresh$all$all$threshold[thresh$all$all$method=="RenyiEntropy"] } else { cut.x <- quantile(cdf,probs=cut.pt) cut.x <- unname(cut.x) } postscript(paste0("./analysis/score_id/",score,"/CDF_",score,".eps")) plot(cdf,main=paste0("Cumulative Distribution of ",score," Score"),xlab=paste0(score," Score"),ylab="CDF") abline(v=cut.x,col="red") dev.off() #KDE postscript(paste0("./analysis/score_id/",score,"/Histo_",score,".eps")) plot(ggplot(data.frame(Score=as.numeric(levels(sc10x))),aes(x=Score))+geom_histogram(bins=100,aes(y=..density..))+geom_density()+geom_vline(xintercept=cut.x,size=1,color="red")+ggtitle(paste0(score," Score"))+cowplot::theme_cowplot()) dev.off() Idents(object=sc10x) <- "ALL" predicate <- paste0(score,"1 >= ",cut.x) Idents(object=sc10x, cells = WhichCells(object=sc10x,expression= predicate)) <- score sc10x[[score]] <- Idents(object=sc10x) Idents(sc10x) <- score sc10x.negative <- subset(x=sc10x,idents="ALL") sc10x.positive <- subset(x=sc10x,idents=score) #Generate plots postscript(paste0("./analysis/vis/",score,"/3Vis_",score,".eps")) plot1 <- DimPlot(sc10x,reduction="pca",label=TRUE,repel=TRUE)+theme(legend.position="none")+ggtitle("ALL") plot2 <- DimPlot(sc10x.negative,reduction="pca",label=TRUE,repel=TRUE)+theme(legend.position="none") plot3 <- DimPlot(sc10x.positive,reduction="pca",label=TRUE,repel=TRUE)+theme(legend.position="none") plot4 <- DimPlot(sc10x,reduction="tsne",label=TRUE,repel=TRUE)+theme(legend.position="none")+ggtitle("Negative") plot5 <- DimPlot(sc10x.negative,reduction="tsne",label=TRUE,repel=TRUE)+theme(legend.position="none") plot6 <- DimPlot(sc10x.positive,reduction="tsne",label=TRUE,repel=TRUE)+theme(legend.position="none") plot7 <- DimPlot(sc10x,reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none")+ggtitle("Positive") plot8 <- DimPlot(sc10x.negative,reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none") plot9 <- DimPlot(sc10x.positive,reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none") grid.arrange(plot1,plot2,plot3,plot4,plot5,plot6,plot7,plot8,plot9,ncol=3) dev.off() #Generate violin plot of gene exvpression if (anchor!=FALSE){ postscript(paste0("./analysis/score_id/",score,"/Violin_",score,".eps")) plot <- VlnPlot(object=sc10x,features=anchor,pt.size=0.1,assay="SCT") plot(plot) dev.off() } results <- list( sc10x <- sc10x, sc10x.negative <- sc10x.negative, sc10x.positive <- sc10x.positive ) return(results) } scQuSAGE <- function(sc10x,gs,save=FALSE,type,id,ds=0,nm="pops",print="tsne"){ #Runs QuSAGE #Inputs: #sc10x = Seruat object #gs = geneset to use for correlation #save = save ID #type = type of qusage to run (id: create ID's based on run, sm: cor only using small genesets, lg: cor only with large genesets) #id = ident to use #nm = name of test #print = dimentionality reduction to use for display #Outputs: #results[1] = Seurat object #results[2] = correlation table #results[3] = correlation results if (!dir.exists(paste0("./analysis/cor/",nm))){ dir.create(paste0("./analysis/cor/",nm)) } if (!dir.exists(paste0("./analysis/cor/",nm,"/geneset"))){ dir.create(paste0("./analysis/cor/",nm,"/geneset")) } if (!dir.exists(paste0("./analysis/cor/",nm,"/cluster"))){ dir.create(paste0("./analysis/cor/",nm,"/cluster")) } if (!dir.exists(paste0("./analysis/vis/",nm))){ dir.create(paste0("./analysis/vis/",nm)) } Idents(object=sc10x) <- id number.clusters <- length(unique(levels(x=sc10x))) labels <- paste0("Cluster_",as.vector(Idents(object=sc10x))) cell.sample <- NULL for (i in unique(labels)){ cell <- WhichCells(sc10x,ident=sub("Cluster_","",i)) if (length(cell)>ds & ds!=0){ set.seed(71682) rnd <- sample(1:length(cell),ds) cell <- cell[rnd] } cell.sample <- c(cell.sample,cell) } data <- as.data.frame(as.matrix(GetAssayData(sc10x[,colnames(sc10x) %in% cell.sample]))) labels <- labels[colnames(sc10x) %in% cell.sample] groups <- sort(unique(labels)) col <- hcl(h=(seq(15,375-375/length(groups),length=length(groups))),c=100,l=65) #Make labels for QuSAGE clust <- list() clust.comp <- list() for (i in groups){ t <- labels t[t != i] <- "REST" clust[i] <- list(i=t) rm(t) clust.comp[i] <- paste0(i,"-REST") } #Run QuSAGE for (i in groups){ assign(paste0("results.",i),qusage(data,unlist(clust[i]),unlist(clust.comp[i]),gs)) } #Generate ID table results.cor <- NULL results.cor <- qsTable(get(paste0("results.",groups[1])),number=length(gs)) results.cor$Cluster <- groups[1] for (i in groups[-1]){ qs <- qsTable(get(paste0("results.",i)),number=length(gs)) qs$Cluster <- i results.cor <- rbind(results.cor,qs) } results.cor <- results.cor[,-3] rownames(results.cor) <- NULL results.clust.id <- NULL #if (max(results.cor[results.cor[,4]==groups[1] & results.cor[,3]<=0.05,][,2],na.rm=TRUE)>=0){ # results.clust.id <- results.cor[results.cor[,4]==groups[1] & results.cor[,3]<=0.05,][which.max(results.cor[results.cor[,4]==groups[1] & results.cor[,3]<=0.05,][,2]),] if (max(results.cor[results.cor[,4]==groups[1],][,2],na.rm=TRUE)>=0){ results.clust.id <- results.cor[results.cor[,4]==groups[1],][which.max(results.cor[results.cor[,4]==groups[1],][,2]),] } else { results.clust.id$pathway.name <- "Unknown" results.clust.id$log.fold.change <- 0 results.clust.id$FDR <- 0 results.clust.id$Cluster <- groups[1] results.clust.id <- as.data.frame(results.clust.id) } for (i in groups[-1]){ #if (max(results.cor[results.cor[,4]==i & results.cor[,3]<=0.05,][,2],na.rm=TRUE)>=0){ # results.clust.id <- rbind(results.clust.id,results.cor[results.cor[,4]==i & results.cor[,3]<=0.05,][which.max(results.cor[results.cor[,4]==i & results.cor[,3]<=0.05,][,2]),]) if (max(results.cor[results.cor[,4]==i,][,2],na.rm=TRUE)>=0){ results.clust.id <- rbind(results.clust.id,results.cor[results.cor[,4]==i,][which.max(results.cor[results.cor[,4]==i,][,2]),]) } else { results.clust.id <- rbind(results.clust.id,data.frame(pathway.name="Unknown",log.fold.change=0,FDR=0,Cluster=i)) }} rownames(results.clust.id) <- NULL #Determine axes for correlation plots max.x.rg <- 0 min.x.rg <- 0 max.y.rg <- 0 for (i in groups){ qs <- get(paste0("results.",i)) if (max(qs$path.mean)>max.x.rg){ max.x.rg <- max(qs$path.mean) } if (min(qs$path.mean)max.y.rg){ max.y.rg <- max(qs$path.PDF) }} if (type=="sm"){ #Plot correlation plots by geneset for (i in 1:length(gs)){ postscript(paste0("./analysis/cor/",nm,"/geneset/QuSAGE_",nm,".",names(gs)[i],".eps")) for (j in groups){ qs <- get(paste0("results.",j)) if (j==groups[1]){ plotDensityCurves(qs,path.index=i,col=col[match(j,groups)],main=names(gs)[i],xlim=c(min.x.rg-0.05,max.x.rg+0.05),ylim=c(0,50*ceiling(max.y.rg/50)),xlab="Gene Set Activation",lwd=5,cex.main=2.5,cex.axis=1.5,cex.lab=2) } else { plotDensityCurves(qs,path.index=i,add=TRUE,col=col[match(j,groups)],lwd=5) }} legend("topright",col=col,legend=groups,lty=1,lwd=5,cex=2,ncol=1,bty="n",pt.cex=2) box(lwd=5) dev.off() } #Plot correlation plots by cluster for (i in groups){ qs <- get(paste0("results.",i)) postscript(paste0("./analysis/cor/",nm,"/cluster/QuSAGE_",nm,"_",i,".eps")) for (j in 1:length(gs)){ if (j==1){ plotDensityCurves(qs,path.index=j,col=viridis(length(gs))[j],main=i,xlim=c(min.x.rg-0.05,max.x.rg+0.05),ylim=c(0,50*ceiling(max.y.rg/50)),xlab="Gene Set Activation",lwd=5,cex.main=2.5,cex.axis=1.5,cex.lab=2) } else { plotDensityCurves(qs,path.index=j,add=TRUE,col=viridis(length(gs))[j],lwd=5) }} legend("topright",col=viridis(length(gs)),legend=names(gs),lty=1,lwd=5,cex=1,ncol=2,bty="n",pt.cex=2) box(lwd=5) dev.off() }} else { for (i in groups){ qs <- get(paste0("results.",i)) postscript(paste0("./analysis/cor/",nm,"/cluster/QuSAGE_",nm,"_",i,".eps")) plotCIs(qs,path.index=1:numPathways(qs),cex.lab=1.5) dev.off() }} if (save==TRUE){ merge.cluster <- NULL for (i in groups){ #if (max(qsTable(get(paste0("results.",i)),number=length(gs))[qsTable(get(paste0("results.",i)),number=length(gs))[,4]<=0.05,][,2],na.rm=TRUE)>=0){ # sc10x<-eval(parse(text=paste0("RenameIdents(object=sc10x,'",sub("Cluster_","",i),"' = '",qsTable(get(paste0("results.",i)),number=length(gs))[qsTable(get(paste0("results.",i)),number=length(gs))[2]==max(qsTable(get(paste0("results.",i)),number=length(gs))[qsTable(get(paste0("results.",i)),number=length(gs))[,4]<=0.05,][,2],na.rm=TRUE)][1],"')"))) if (max(qsTable(get(paste0("results.",i)),number=length(gs))[,2],na.rm=TRUE)>=0){ sc10x<-eval(parse(text=paste0("RenameIdents(object=sc10x,'",sub("Cluster_","",i),"' = '",qsTable(get(paste0("results.",i)),number=length(gs))[qsTable(get(paste0("results.",i)),number=length(gs))[2]==max(qsTable(get(paste0("results.",i)),number=length(gs))[,2],na.rm=TRUE)][1],"')"))) } else { sc10x<-eval(parse(text=paste0("RenameIdents(object=sc10x,'",sub("Cluster_","",i),"' = 'Unknown')"))) }} sc10x[[nm]] <- Idents(object=sc10x) } plot1 <- DimPlot(sc10x,reduction="pca",label=TRUE,repel=TRUE)+theme(legend.position="none") plot2 <- DimPlot(sc10x,reduction="tsne",label=TRUE,repel=TRUE)+theme(legend.position="none") plot3 <- DimPlot(sc10x,reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none") if (print=="tsne"){ postscript(paste0("./analysis/vis/",nm,"/tSNE_",id,"_",nm,".eps")) print(print2) dev.off() } else if (print=="umap"){ postscript(paste0("./analysis/vis/",nm,"/UMAP_",id,"_",nm,".eps")) print(print3) dev.off() } else if (print=="2"){ postscript(paste0("./analysis/vis/",nm,"/2Vis_",id,"_",nm,".eps")) grid.arrange(plot2,plot3,ncol=1) dev.off() } else if (print=="3"){ postscript(paste0("./analysis/vis/",nm,"/3Vis_",id,"_",nm,".eps")) grid.arrange(plot1,plot2,plot3,ncol=1) dev.off() } results <- list( sc10x=sc10x, results.cor=results.cor, results.clust.id=results.clust.id ) names(results)=c("sc10x",paste0("results.cor.",nm),paste0("results.clust.",nm,".id")) return(results) }