#sc-TissueMapper #Author: Gervaise H. Henry #Email: gervaise.henry@utsouthwestern.edu #Lab: Strand Lab, Deparment of Urology, University of Texas Southwestern Medical Center scFolders <- function(){ #Create analysis output folders if (!dir.exists("./analysis/qc/")){ dir.create("./analysis/qc/") } if (!dir.exists("./analysis/qc/")){ dir.create("./analysis/qc/") } if (!dir.exists("./analysis/qc/cutoffs/")){ dir.create("./analysis/qc/cutoffs/") } if (!dir.exists("./analysis/qc/cellcycle")){ dir.create("./analysis/qc/cellcycle") } if (!dir.exists("./analysis/vis")){ dir.create("./analysis/vis") } if (!dir.exists("./analysis/score_id")){ dir.create("./analysis/score_id") } if (!dir.exists("./analysis/cor")){ dir.create("./analysis/cor") } } scLoad <- function(p,cellranger=3,aggr=TRUE,ncell=0,nfeat=0,seurat=FALSE){ #Load and prefilter filtered_gene_bc_matrices_mex output from cellranger #Inputs: #p = project name #cellranger = cellranger version number used for count/aggr, 2, 3, or 4 #aggr = if the samples are already aggregated, TRUE if using the output of aggr, FALSE if using outputs of each count #ncell = minimum number of cells for initial feature filter #nfeat = minimum number of features for initial cell filter #seurat = if Seurat R objects per sample are already created, TRUE if using Seurat objects, FALSE if using output of count #Outputs: #sc10x = Seurat object list #sc10x.groups = group labels for each sample sc10x.groups <- read_csv(paste0("./analysis/DATA/",p,"-demultiplex.csv")) #Load filtered_gene_bc_matrices output from cellranger sc10x <- list() if (seurat==FALSE) { sc10x.data <- list() if (aggr==TRUE){ if (cellranger==2){ sc10x.data[aggr] <- Read10X(data.dir=paste0("./analysis/DATA/10x/filtered_gene_bc_matrices_mex/")) } else { sc10x.data[aggr] <- Read10X(data.dir=paste0("./analysis/DATA/10x/filtered_feature_bc_matrix/")) } sc10x[aggr] <- new("seurat",raw.data=sc10x.data[aggr]) } else { for (i in sc10x.groups$Samples){ if (cellranger==2){ sc10x.data[i] <- Read10X(data.dir=paste0("./analysis/DATA/10x/",i,"/filtered_gene_bc_matrices/")) } else { sc10x.data[i] <- Read10X(data.dir=paste0("./analysis/DATA/10x/",i,"/filtered_feature_bc_matrix/")) } sc10x[i] <- CreateSeuratObject(counts=sc10x.data[[i]],project=p,min.cells=ncell,min.features=nfeat) sc10x[[i]]$samples <- i } } } else { for (i in sc10x.groups$Samples){ sc10x[i] <- readRDS(paste0("./analysis/DATA/10x/",i,"/",i,".rds")) sc10x[[i]] <- sc10x[[i]] sc10x[[i]]$samples <- i } } if (length(colnames(sc10x.groups)[!(colnames(sc10x.groups) %in% c("Samples","ALL","Keep"))])!=0){ for (i in sc10x.groups$Samples){ Idents(sc10x[[i]],cells=1:ncol(sc10x[[i]])) <- i sc10x[[i]]$samples <- Idents(sc10x[[i]]) for (j in colnames(sc10x.groups)[!(colnames(sc10x.groups) %in% c("Samples","ALL"))]){ Idents(sc10x[[i]],cells=1:ncol(sc10x[[i]])) <- sc10x.groups[sc10x.groups$Samples==i,colnames(sc10x.groups)==j] sc10x[[i]]@meta.data <- sc10x[[i]]@meta.data[,c("nCount_RNA","nFeature_RNA","samples")] } } } # #Label sample names from aggregation_csv.csv # if (sub==FALSE){ # if (cellranger==2){ # sc10x.aggr <- read_csv("./analysis/DATA/10x/aggregation_csv.csv") # } else { # sc10x.aggr <- read_csv("./analysis/DATA/10x/aggregation.csv") # } # } else { # if (cellranger==2){ # sc10x.aggr <- read_csv(paste0("./analysis/DATA/",p,"/10x/aggregation_csv.csv")) # } else { # sc10x.aggr <- read_csv(paste0("./analysis/DATA/",p,"/10x/aggregation.csv")) # } # } # cell.codes <- as.data.frame(sc10x@raw.data@Dimnames[[2]]) # colnames(cell.codes) <- "barcodes" # rownames(cell.codes) <- cell.codes$barcodes # cell.codes$lib.codes <- as.factor(gsub(pattern=".+-",replacement="",cell.codes$barcodes)) # cell.codes$samples <- sc10x.aggr$library_id[match(cell.codes$lib.codes,as.numeric(rownames(sc10x.aggr)))] # sc10x <- CreateSeuratObject(counts=sc10x.data,project=p,assay="RNA",min.cells=mc,min.features=mg,meta.data=cell.codes["samples"]) # # #Create groups found in demultiplex.csv # for (i in 2:ncol(sc10x.demultiplex)){ # Idents(sc10x) <- "samples" # merge.cluster <- apply(sc10x.demultiplex[,i],1,as.character) # merge.cluster[merge.cluster==1] <- colnames(sc10x.demultiplex[,i]) # # Idents(sc10x) <- plyr::mapvalues(x=Idents(sc10x),from=sc10x.demultiplex$Samples,to=merge.cluster) # sc10x@meta.data[,colnames(sc10x.demultiplex[,i])] <- Idents(sc10x) # } results <- list( sc10x=sc10x, sc10x.groups=sc10x.groups ) return(results) } scSubset <- function(sc10x,i="ALL",g="ALL"){ #Subset cells based on an identity #Inputs: #sc10x = seruat object #i = identity to use #g = group to subset by #Outputs: #Seurat object Idents(sc10x) <- i sc10x.sub <- subset(x=sc10x,idents=g) return(sc10x.sub) } scQC <- function(sc10x,sp="hu",feature="nFeature_RNA"){ #QC and filter Seurat object #Inputs: #sc10x = Seruat object #sub = Subfolder to save output files #Outputs: #result[1] = filtered Seurat object #result[2] = raw cell count #result[3] = raw gene count #result[4] = filtered cell count #result[5] = filtered gene count #Calculate percent mitochondrea if (sp=="hu"){ mito.pattern <- "^MT-" ribo.pattern <- "^(RPL|RPS)" } else if (sp=="mu"){ mito.pattern <- "^mt-" ribo.pattern <- "^(Rpl|Rps)" } for (i in names(sc10x)){ sc10x.temp <- sc10x[[i]] sc10x.temp[["percent.mito"]] <- PercentageFeatureSet(object=sc10x.temp,pattern=mito.pattern) sc10x.temp[["percent.ribo"]] <- PercentageFeatureSet(object=sc10x.temp,pattern=ribo.pattern) #sc10x.temp <- subset(sc10x.temp,cell=names(which(is.na(sc10x.temp$percent.mito))),invert=TRUE) sc10x[i] <- sc10x.temp } #Calculate cutoffs thresh <- list() for (i in feature){ if (i == "nFeature_RNA"){ sc10x.temp <- list() for (j in names(sc10x)){ h <- NULL cutoff.temp <- NULL cells.remove <- NULL h <- hist(data.frame(sc10x[[j]][[i]])$nFeature_RNA,breaks=10,plot=FALSE) cutoff.temp <- mean(c(h$mids[which.max(h$counts)],h$mids[-which.max(h$counts)][which.max(h$counts[-which.max(h$counts)])])) cells.remove <- c(cells.remove,rownames(sc10x[[j]][["nFeature_RNA"]])[sc10x[[j]][[i]][,1] < cutoff.temp]) sc10x.temp[[j]] <- subset(sc10x[[j]],cells=cells.remove,invert=TRUE) } thresh[[i]] <- scThresh(sc10x.temp,feature=i,sub="higher") } if (i == "percent.mito"){ sc10x.temp <- list() for (j in names(sc10x)){ h <- NULL cutoff.temp <- NULL cells.remove <- NULL h <- hist(data.frame(sc10x[[j]][[i]])$percent.mito,breaks=100,plot=FALSE) cutoff.temp <- mean(c(h$mids[which.max(h$counts)],h$mids[-which.max(h$counts)][which.max(h$counts[-which.max(h$counts)])])) cells.remove <- c(cells.remove,rownames(sc10x[[j]][["percent.mito"]])[sc10x[[j]][[i]][,1] < cutoff.temp]) sc10x.temp[[j]] <- subset(sc10x[[j]],cells=cells.remove,invert=TRUE) } thresh[[i]] <- scThresh(sc10x.temp,feature=i,sub="higher") } if (i == "percent.ribo"){ sc10x.temp <- list() for (j in names(sc10x)){ h <- NULL cutoff.temp <- NULL cells.remove <- NULL h <- hist(data.frame(sc10x[[j]][[i]])$percent.ribo,breaks=100,plot=FALSE) cutoff.temp <- mean(c(h$mids[which.max(h$counts)],h$mids[-which.max(h$counts)][which.max(h$counts[-which.max(h$counts)])])) cells.remove <- c(cells.remove,rownames(sc10x[[j]][["percent.ribo"]])[sc10x[[j]][[i]][,1] > cutoff.temp]) sc10x.temp[[j]] <- subset(sc10x[[j]],cells=cells.remove,invert=TRUE) } thresh[[i]] <- scThresh(sc10x.temp,feature=i,sub="lower") } if (i == "nCount_RNA"){ sc10x.temp <- list() for (j in names(sc10x)){ h <- NULL cutoff.temp <- NULL cells.remove <- NULL h <- hist(data.frame(sc10x[[j]][[i]])$nCount_RNA,breaks=100,plot=FALSE) cutoff.temp <- mean(c(h$mids[which.max(h$counts)],h$mids[-which.max(h$counts)][which.max(h$counts[-which.max(h$counts)])])) cells.remove <- c(cells.remove,rownames(sc10x[[j]][["nCount_RNA"]])[sc10x[[j]][[i]][,1] > cutoff.temp]) sc10x.temp[[j]] <- subset(sc10x[[j]],cells=cells.remove,invert=TRUE) } thresh[[i]] <- scThresh(sc10x.temp,feature=i,sub="lower") } } #Plot raw stats max.ct <- 0 max.ft <- 0 max.mt <- 0 max.rb <- 0 for (i in names(sc10x)){ if (max.ct < max(sc10x[[i]][["nCount_RNA"]])){ max.ct <- max(sc10x[[i]][["nCount_RNA"]]) } if (max.ft < max(sc10x[[i]][["nFeature_RNA"]])){ max.ft <- max(sc10x[[i]][["nFeature_RNA"]]) } if (max.mt < max(sc10x[[i]][["percent.mito"]])){ max.mt <- max(sc10x[[i]][["percent.mito"]]) } if (max.rb < max(sc10x[[i]][["percent.ribo"]])){ max.rb <- max(sc10x[[i]][["percent.ribo"]]) } } max.ct <- max.ct*1.1 max.ft <- max.ft*1.1 max.mt <- max.mt*1.1 max.rb <- max.rb*1.1 cells.remove <- list() for (i in feature){ max.y <- 0 if (i == "nCount_RNA"){ max.y <- max.ct } else if (i == "nFeature_RNA"){ max.y <- max.ft } else if (i == "percent.mito"){ max.y <- max.mt } else if (i == "percent.ribo"){ max.y <- max.rb } plots.v <- list() densities.s <- list() plots.s <- list() sc10x.temp <- NULL for (j in names(sc10x)){ sc10x.temp <- sc10x[[j]] plots.v[[j]] <- VlnPlot(object=sc10x.temp,features=i,pt.size=0.1,)+scale_x_discrete(labels=j)+scale_y_continuous(limits=c(0,max.y))+theme(legend.position="none",axis.text.x=element_text(hjust=0.5,angle=0)) if (i %in% c("nFeature_RNA","percent.mito","percent.ribo","nCount_RNA")){ if (i == "nFeature_RNA"){ cutoff.h <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="RenyiEntropy"] cutoff.l <- 200 } else if (i == "percent.mito") { cutoff.h <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="Triangle"] cutoff.l <- 0 } else if (i == "percent.ribo") { cutoff.h <- max(sc10x[[j]][[i]]) cutoff.l <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="RenyiEntropy"] } else if (i == "nCount_RNA") { cutoff.h <- max(sc10x[[j]][[i]]) cutoff.l <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="RenyiEntropy"] } plots.v[[j]] <- plots.v[[j]]+geom_hline(yintercept=cutoff.l,size=0.5,color="red")+geom_hline(yintercept=cutoff.h,size=0.5,color="red") if (i != "nCount_RNA"){ densities.s[[j]] <- density(sc10x.temp$nCount_RNA,sc10x.temp[[i]][,1],n=1000) plots.s[[j]] <- ggplotGrob(ggplot(data.frame(cbind(sc10x.temp$nCount_RNA,sc10x.temp[[i]][,1])))+geom_point(aes(x=X1,y=X2,color=densities.s[[j]]),size=0.1)+scale_x_continuous(limits=c(0,max.ct))+scale_y_continuous(limits=c(0,max.y))+scale_color_viridis(option="inferno")+labs(x="nCount_RNA",y=i,color="Density")+ggtitle(j)+cowplot::theme_cowplot()+theme(plot.title=element_text(size=7.5),axis.title=element_text(size=7.5),axis.text=element_text(size=5,angle=45),legend.position="bottom",legend.title=element_text(size=5,face="bold",vjust=1),legend.text=element_text(size=5,angle=45))+guides(color=guide_colourbar(barwidth=5,barheight=0.5))+geom_hline(yintercept=cutoff.l,size=0.1,color="red")+geom_hline(yintercept=cutoff.h,size=0.1,color="red")) } else { densities.s[[j]] <- density(sc10x.temp$nFeature_RNA,sc10x.temp[[i]][,1],n=1000) plots.s[[j]] <- ggplotGrob(ggplot(data.frame(cbind(sc10x.temp$nFeature_RNA,sc10x.temp[[i]][,1])))+geom_point(aes(x=X1,y=X2,color=densities.s[[j]]),size=0.1)+scale_x_continuous(limits=c(0,max.ct))+scale_y_continuous(limits=c(0,max.y))+scale_color_viridis(option="inferno")+labs(x="nFeature_RNA",y=i,color="Density")+ggtitle(j)+cowplot::theme_cowplot()+theme(plot.title=element_text(size=7.5),axis.title=element_text(size=7.5),axis.text=element_text(size=5,angle=45),legend.position="bottom",legend.title=element_text(size=5,face="bold",vjust=1),legend.text=element_text(size=5,angle=45))+guides(color=guide_colourbar(barwidth=5,barheight=0.5))+geom_hline(yintercept=cutoff.l,size=0.1,color="red")+geom_hline(yintercept=cutoff.h,size=0.1,color="red")) } cells.remove[[j]] <- c(cells.remove[[j]],rownames(sc10x[[j]][[i]])[sc10x[[j]][[i]][,1] < cutoff.l | sc10x[[j]][[i]][,1] > cutoff.h]) } ggsave(paste0("./analysis/qc/Violin_qc.raw.",i,".",j,".eps"),plot=plots.v[[j]]) if (i %in% c("nFeature_RNA","percent.mito","percent.ribo","nCount_RNA")){ ggsave(paste0("./analysis/qc/Plot_qc.raw.",i,".",j,".eps"),plot=plots.s[[j]]) } } } #Record cell/gene counts pre and post filtering counts.cell.raw <- list() counts.gene.raw <- list() sc10x.sub <- list() counts.cell.filtered <- list() counts.gene.filtered <- list() for (i in names(sc10x)){ counts.cell.raw[i] <- ncol(GetAssayData(object=sc10x[[i]],slot="counts")) counts.gene.raw[i] <- nrow(GetAssayData(object=sc10x[[i]],slot="counts")) sc10x.sub[[i]] <- subset(sc10x[[i]],cells=setdiff(colnames(sc10x[[i]]),cells.remove[[i]])) counts.cell.filtered[i] <- ncol(GetAssayData(object=sc10x.sub[[i]],slot="counts")) counts.gene.filtered[i] <- nrow(GetAssayData(object=sc10x.sub[[i]],slot="counts")) } #Plot filtered stats max.ct <- 0 max.ft <- 0 max.mt <- 0 max.rb <- 0 for (i in names(sc10x)){ if (max.ct < max(sc10x.sub[[i]][["nCount_RNA"]])){ max.ct <- max(sc10x.sub[[i]][["nCount_RNA"]]) } if (max.ft < max(sc10x.sub[[i]][["nFeature_RNA"]])){ max.ft <- max(sc10x.sub[[i]][["nFeature_RNA"]]) } if (max.mt < max(sc10x.sub[[i]][["percent.mito"]])){ max.mt <- max(sc10x.sub[[i]][["percent.mito"]]) } if (max.rb < max(sc10x.sub[[i]][["percent.ribo"]])){ max.rb <- max(sc10x.sub[[i]][["percent.ribo"]]) } } max.ct <- max.ct*1.1 max.ft <- max.ft*1.1 max.mt <- max.mt*1.1 max.rb <- max.rb*1.1 for (i in feature){ max.y <- 0 if (i == "nCount_RNA"){ max.y <- max.ct } else if (i == "nFeature_RNA"){ max.y <- max.ft } else if (i == "percent.mito"){ max.y <- max.mt } else if (i == "percent.ribo"){ max.y <- max.rb } plots.v <- list() densities.s <- list() plots.s <- list() for (j in names(sc10x.sub)){ sc10x.temp <- sc10x.sub[[j]] plots.v[[j]] <- VlnPlot(object=sc10x.temp,features=i,pt.size=0.1,)+scale_x_discrete(labels=j)+scale_y_continuous(limits=c(0,max.y))+theme(legend.position="none",axis.text.x=element_text(hjust=0.5,angle=0)) if (i != "nCount_RNA"){ densities.s[[j]] <- density(sc10x.temp$nCount_RNA,sc10x.temp[[i]][,1],n=1000) plots.s[[j]] <- ggplotGrob(ggplot(data.frame(cbind(sc10x.temp$nCount_RNA,sc10x.temp[[i]][,1])))+geom_point(aes(x=X1,y=X2,color=densities.s[[j]]),size=0.1)+scale_x_continuous(limits=c(0,max.ct))+scale_y_continuous(limits=c(0,max.y))+scale_color_viridis(option="inferno")+labs(x="nCount_RNA",y=i,color="Density")+ggtitle(j)+cowplot::theme_cowplot()+theme(plot.title=element_text(size=7.5),axis.title=element_text(size=7.5),axis.text=element_text(size=5,angle=45),legend.position="bottom",legend.title=element_text(size=5,face="bold",vjust=1),legend.text=element_text(size=5,angle=45))+guides(color=guide_colourbar(barwidth=5,barheight=0.5))) } else { densities.s[[j]] <- density(sc10x.temp$nFeature_RNA,sc10x.temp[[i]][,1],n=1000) plots.s[[j]] <- ggplotGrob(ggplot(data.frame(cbind(sc10x.temp$nFeature_RNA,sc10x.temp[[i]][,1])))+geom_point(aes(x=X1,y=X2,color=densities.s[[j]]),size=0.1)+scale_x_continuous(limits=c(0,max.ct))+scale_y_continuous(limits=c(0,max.y))+scale_color_viridis(option="inferno")+labs(x="nFeature_RNA",y=i,color="Density")+ggtitle(j)+cowplot::theme_cowplot()+theme(plot.title=element_text(size=7.5),axis.title=element_text(size=7.5),axis.text=element_text(size=5,angle=45),legend.position="bottom",legend.title=element_text(size=5,face="bold",vjust=1),legend.text=element_text(size=5,angle=45))+guides(color=guide_colourbar(barwidth=5,barheight=0.5))) } ggsave(paste0("./analysis/qc/Violin_qc.filtered.",i,".",j,".eps"),plot=plots.v[[j]]) if (i %in% c("nFeature_RNA","percent.mito","percent.ribo","nCount_RNA")){ ggsave(paste0("./analysis/qc/Plot_qc.filtered.",i,".",j,".eps"),plot=plots.s[[j]]) } } } results <- list( sc10x=sc10x.sub, counts.cell.raw=counts.cell.raw, counts.gene.raw=counts.gene.raw, counts.cell.filtered=counts.cell.filtered, counts.gene.filtered=counts.gene.filtered ) return(results) } scThresh <- function(sc10x,feature,sub=FALSE){ #Calculate thresholds and cutoffs #Inputs: #sc10x = Seruat object #feature = feature to threshold #sub = Subfolder to save output files #Outputs: #result = Threshold data #Make folders if (sub==FALSE){ folder <- "./analysis/qc/cutoffs/" } else { folder <- paste0("./analysis/qc/cutoffs/",sub,"/") if (!dir.exists(folder)){ dir.create(folder) } } #Calculate range of histogram based threholding and manually select methods for cutoffs scale <- list() scale.scaled <- list() h <- list() thresh <-list() cutoff.l <- list() cutoff.h <- list() thresh_methods <- c("IJDefault","Huang","Huang2","Li","Mean","MinErrorI","Moments","Otsu","Percentile","RenyiEntropy","Shanbhag","Triangle")#,"Intermodes","IsoData" for (i in names(sc10x)){ scale[[i]] <- data.frame(Score=sc10x[[i]][[feature]]) colnames(scale[[i]]) <- "Score" scale[[i]] <- data.frame(Score=scale[[i]]$Score[!is.na(scale[[i]]$Score)]) scale.scaled[[i]] <- as.integer((scale[[i]]$Score-min(scale[[i]]$Score))/(max(scale[[i]]$Score)-min(scale[[i]]$Score))*360) #scale.scaled[[i]] <- as.integer(scales::rescale(scale[[i]]$Score,to=c(0,1))*360) h[[i]] <- hist(scale[[i]]$Score,breaks=100,plot=FALSE) thresh[[i]] <- purrr::map_chr(thresh_methods,~auto_thresh(scale.scaled[[i]],.)) %>% tibble(method = thresh_methods, threshold = .) thresh[[i]]$threshold <- as.numeric(thresh[[i]]$threshold) thresh[[i]]$threshold <- ((thresh[[i]]$threshold/360)*(max(scale[[i]]$Score)-min(scale[[i]]$Score)))+min(scale[[i]]$Score) #thresh[[i]] <- thresh[[i]] %>% mutate(threshold=(scales::rescale(as.numeric(threshold)/360,to=range(scale[[i]]$Score)))) postscript(paste0(folder,"Hist_qc.",i,".",feature,".eps")) plot(h[[i]],main=paste0("Histogram of ",feature," of sample ",i),xlab=feature) abline(v=thresh[[i]]$threshold) mtext(thresh[[i]]$method,side=1,line=2,at=thresh[[i]]$threshold,cex=0.5) dev.off() } return(thresh) } scCellCycle <- function(sc10x,sub=FALSE,sp="hu"){ #Runs Seurat based PCA analysis for cell cycle ID #Inputs: #sc10x = Seruat object #sub = Subfolder to save output files #Outputs: #results[1] = Seurat object #results[2] = s genes #results[3] = g2m genes #Make sub-folders if necessary if (sub==FALSE){ folder <- "./analysis/qc/cellcycle/" } else { folder <- paste0("./analysis/qc/cellcycle/",sub,"/") if (!dir.exists(folder)){ dir.create(folder) }} #score cell cycle genes.cc <- readLines(con="./genesets/regev_lab_cell_cycle_genes.txt") genes.s <- genes.cc[1:43] genes.g2m <- genes.cc[44:97] sc10x <- NormalizeData(object=sc10x,verbose=FALSE) sc10x <- ScaleData(object=sc10x,do.par=TRUE,num.cores=45,verbose=FALSE) sc10x <- CellCycleScoring(object=sc10x,s.features=genes.s,g2m.features=genes.g2m,set.ident=TRUE) #plot cell cycle specific genes if (sp=="hu"){ genes=c("PCNA","TOP2A","MCM6","MKI67") postscript(paste0(folder,"Violin_cc.Raw.eps")) plot <- VlnPlot(object=sc10x,features=genes,ncol=2,pt.size=1) plot(plot) dev.off() } # sc10x <- RunPCA(object=sc10x,features=c(genes.s,genes.g2m),npcs=2,verbose=FALSE) # postscript(paste0(folder,"PCA_cc.Raw.eps")) # plot <- DimPlot(object=sc10x,reduction="pca") # plot(plot) # dev.off() # gc() # sc10x <- ScaleData(object=sc10x,vars.to.regress=c("S.Score","G2M.Score"),do.par=TRUE,num.cores=45,verbose=TRUE) # gc() # sc10x <- RunPCA(object=sc10x,features=c(genes.s,genes.g2m),npcs=2,verbose=FALSE) # postscript(paste0(folder,"PCA_cc.Norm.eps")) # plot <- DimPlot(object=sc10x,reduction="pca") # plot(plot) # dev.off() results <- list( sc10x=sc10x, genes.s=genes.s, genes.g2m=genes.g2m ) return(results) } scPC <- function(sc10x,pc=50,hpc=0.9,file="pre.stress",print="tsne",assay="integrated"){ #Scale Seurat object & calculate # of PCs to use #Inputs: #sc10x = Seruat object #pc = number of PCs to cacluate #hpc = max variance cutoff for PCs to use" #file = file for output #Outputs: #result[1] = Seurat object #result[2] = # of PCs to use #Run PCA Idents(object=sc10x) <- "ALL" sc10x <- RunPCA(object=sc10x,npcs=pc,verbose=FALSE,assay=assay) #Calculate PCs to use pc.use <- sc10x[["pca"]]@stdev^2 pc.use <- pc.use/sum(pc.use) pc.use <- cumsum(pc.use) pc.use <- min(which(pc.use>=hpc)) postscript(paste0("./analysis/qc/Plot_PCElbow_",file,".eps")) plot <- ElbowPlot(object=sc10x,ndims=pc) plot <- plot+geom_vline(xintercept=pc.use,size=1,color="red") plot(plot) dev.off() results <- list( sc10x=sc10x, pc.use=pc.use ) return(results) } scAlign <- function(sc10x.l){ for (i in 1:length(sc10x.l)){ #sc10x.l[[i]] <- NormalizeData(sc10x.l[[i]],verbose=FALSE) gc() #sc10x.l[[i]] <- ScaleData(sc10x.l[[i]],vars.to.regress=c("nFeature_RNA","percent.mito"),verbose = FALSE) sc10x.l[[i]] <- SCTransform(sc10x.l[[i]],vars.to.regress=c("nFeature_RNA","percent.mito","Stress1"),verbose=FALSE,assay="RNA") gc() #sc10x.l[[i]] <- FindVariableFeatures(sc10x.l[[i]],selection.method="vst",nfeatures=2000,verbose=FALSE) } sc10x.features <- SelectIntegrationFeatures(object.list=sc10x.l,nfeatures=5000) sc10x.l <- PrepSCTIntegration(object.list=sc10x.l,anchor.features=sc10x.features,verbose=FALSE) sc10x.l <- lapply(sc10x.l,FUN=function(x) { RunPCA(x,features=sc10x.features,verbose=FALSE) }) sc10x.anchors <- FindIntegrationAnchors(object.list=sc10x.l,normalization.method="SCT",anchor.features=sc10x.features,verbose=FALSE,reduction="rpca",dims=1:30) sc10x <- IntegrateData(anchorset=sc10x.anchors,normalization.method="SCT",verbose=FALSE) #sc10x <- FindIntegrationAnchors(object.list=sc10x.l,dims=1:30,scale=FALSE) #sc10x <- IntegrateData(anchorset=sc10x,dims=1:30) #gc() #sc10x <- ScaleData(object=sc10x,do.par=TRUE,num.cores=45,verbose=FALSE,assay="integrated") #gc() gc() sc10x <- SCTransform(sc10x,vars.to.regress=c("nFeature_RNA","percent.mito","Stress1"),verbose=FALSE,return.only.var.genes=FALSE,assay="RNA") gc() return(sc10x) } scCluster <- function(sc10x,res=0.1,red="pca",dim,print="tsne",folder=FALSE,assay="integrated"){ #Cluster Seurat object and produce visualizations #Inputs: #sc10x = Seruat object #res = resolution to calculate clustering #red = rediction type to use for clustering #dim = number of dimentions to use for display #print = dimentionality reduction to use for display #folder = folder for output #Outputs: #result = Seurat object #Create subfolder if necessary if (folder==FALSE){ sub <- "" } else if (print != "0") { if (!dir.exists(paste0("./analysis/vis/",folder))){ dir.create(paste0("./analysis/vis/",folder)) } sub <- paste0(folder,"/") } DefaultAssay(sc10x) <- assay #Calculste Vis sc10x <- RunTSNE(sc10x,dims=1:dim,reduction="pca",assay=assay) sc10x <- RunUMAP(sc10x,dims=1:dim,reduction="pca",assay=assay) sc10x <- FindNeighbors(sc10x,reduction=red,verbose=FALSE,assay=assay) for (i in res){ sc10x <- FindClusters(sc10x,resolution=i,verbose=FALSE) plot1 <- DimPlot(sc10x,reduction="pca",label=TRUE,repel=TRUE)+theme(legend.position="none") plot2 <- DimPlot(sc10x,reduction="tsne",label=TRUE,repel=TRUE)+theme(legend.position="none") plot3 <- DimPlot(sc10x,reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none") if (print=="tsne"){ postscript(paste0("./analysis/vis/",sub,"tSNE_",i,".eps")) print(print2) dev.off() } else if (print=="umap"){ postscript(paste0("./analysis/vis/",sub,"UMAP_",i,".eps")) print(print3) dev.off() } else if (print=="2"){ plot2 <- plot2+theme(legend.position="none") plot3 <- plot3+theme(legend.position="none") postscript(paste0("./analysis/vis/",sub,"2Vis_",i,".eps")) grid.arrange(plot2,plot3,ncol=1) dev.off() } else if (print=="3"){ plot1 <- plot1+theme(legend.position="none") plot2 <- plot2+theme(legend.position="none") plot3 <- plot3+theme(legend.position="none") postscript(paste0("./analysis/vis/",sub,"3Vis_",i,".eps")) grid.arrange(plot1,plot2,plot3,ncol=1) dev.off() }} for (i in c("samples","HTO_maxID","hash.ID")[c("samples","HTO_maxID","hash.ID") %in% colnames(sc10x@meta.data)]){ plot1 <- DimPlot(sc10x,reduction="pca",group.by=i) plot2 <- DimPlot(sc10x,reduction="tsne",group.by=i) plot3 <- DimPlot(sc10x,reduction="umap",group.by=i) legend <- cowplot::get_legend(plot1) if (print=="tsne"){ postscript(paste0("./analysis/vis/",sub,"tSNE_",i,".eps")) grid.arrange(plot2,legend,ncol=1) dev.off() } else if (print=="umap"){ postscript(paste0("./analysis/vis/",sub,"UMAP_",i,".eps")) grid.arrange(plot3,legend,ncol=1) dev.off() } else if (print=="2"){ plot2 <- plot2+theme(legend.position="none") plot3 <- plot3+theme(legend.position="none") postscript(paste0("./analysis/vis/",sub,"2Vis_",i,".eps")) grid.arrange(plot2,plot3,legend,ncol=1) dev.off() } else if (print=="3"){ plot1 <- plot1+theme(legend.position="none") plot2 <- plot2+theme(legend.position="none") plot3 <- plot3+theme(legend.position="none") postscript(paste0("./analysis/vis/",sub,"3Vis_",i,".eps")) grid.arrange(plot1,plot2,plot3,legend,ncol=1) dev.off() } } DefaultAssay(sc10x) <- "SCT" return(sc10x) } scScore <- function(sc10x.l,score,geneset,cut.pt=0.9,anchor=FALSE){ #Runs custom PCA analysis for stress ID #Inputs: #sc10x = Seruat object #score = name of geneset for scoring #geneset = geneset to use for ID #cut.pt = % of cells to keep #Outputs: #results[1] = Seurat object (original + score) #results[2] = Seurat object (negatively filtered) #results[3] = Seurat object (positively filtered) #Make subdirectory if (!dir.exists(paste0("./analysis/score_id/",score))){ dir.create(paste0("./analysis/score_id/",score)) } if (!dir.exists(paste0("./analysis/vis/",score))){ dir.create(paste0("./analysis/vis/",score)) } sc10x.l.negative <- list() sc10x.l.positive <- list() for (i in names(sc10x.l)){ sc10x <- sc10x.l[[i]] #Score geneset sc10x <- AddModuleScore(object=sc10x,features=geneset,name=score,assay="SCT") Idents(object=sc10x) <- paste0(score,"1") #CDF cdf <- ecdf(as.numeric(levels(sc10x))) if (cut.pt == "renyi"){ h <- hist(data.frame(sc10x[[paste0(score,"1")]])[,paste0(score,"1")],breaks=1000,plot=FALSE) cutoff.temp <- mean(c(h$mids[which.max(h$counts)],h$mids[-which.max(h$counts)][which.max(h$counts[-which.max(h$counts)])])) cells.remove <- rownames(sc10x[[paste0(score,"1")]])[sc10x[[paste0(score,"1")]][,1] < cutoff.temp] sc10x.temp <- subset(sc10x,cells=cells.remove,invert=TRUE) thresh <- list() thresh[["all"]] <- scThresh(list(all=sc10x.temp),feature=paste0(score,"1"),sub=paste0(score,".",i)) cut.x <- thresh$all$all$threshold[thresh$all$all$method=="RenyiEntropy"] } else if (cut.pt == "triangle"){ h <- hist(data.frame(sc10x[[paste0(score,"1")]])[,paste0(score,"1")],breaks=1000,plot=FALSE) cutoff.temp <- mean(c(h$mids[which.max(h$counts)],h$mids[-which.max(h$counts)][which.max(h$counts[-which.max(h$counts)])])) cells.remove <- rownames(sc10x[[paste0(score,"1")]])[sc10x[[paste0(score,"1")]][,1] < cutoff.temp] sc10x.temp <- subset(sc10x,cells=cells.remove,invert=TRUE) thresh <- list() thresh[["all"]] <- scThresh(list(all=sc10x.temp),feature=paste0(score,"1"),sub=paste0(score,".",i)) cut.x <- thresh$all$all$threshold[thresh$all$all$method=="Triangle"] } else if (cut.pt == "minerror"){ h <- hist(data.frame(sc10x[[paste0(score,"1")]])[,paste0(score,"1")],breaks=1000,plot=FALSE) cutoff.temp <- mean(c(h$mids[which.max(h$counts)],h$mids[-which.max(h$counts)][which.max(h$counts[-which.max(h$counts)])])) cells.remove <- rownames(sc10x[[paste0(score,"1")]])[sc10x[[paste0(score,"1")]][,1] < cutoff.temp] sc10x.temp <- subset(sc10x,cells=cells.remove,invert=TRUE) thresh <- list() thresh[["all"]] <- scThresh(list(all=sc10x.temp),feature=paste0(score,"1"),sub=paste0(score,".",i)) cut.x <- thresh$all$all$threshold[thresh$all$all$method=="MinErrorI"] } else { cut.x <- quantile(cdf,probs=cut.pt) cut.x <- unname(cut.x) } postscript(paste0("./analysis/score_id/",score,"/CDF_",i,".",score,".eps")) plot(cdf,main=paste0("Cumulative Distribution of ",score," Score"),xlab=paste0(score," Score"),ylab="CDF") abline(v=cut.x,col="red") dev.off() #KDE postscript(paste0("./analysis/score_id/",score,"/Histo_",i,".",score,".eps")) plot(ggplot(data.frame(Score=as.numeric(levels(sc10x))),aes(x=Score))+geom_histogram(bins=100,aes(y=..density..))+geom_density()+geom_vline(xintercept=cut.x,size=1,color="red")+ggtitle(paste0(score," Score"))+cowplot::theme_cowplot()) dev.off() Idents(object=sc10x) <- "ALL" predicate <- paste0(score,"1 >= ",cut.x) Idents(object=sc10x, cells = rownames(sc10x[[paste0(score,"1")]])[sc10x[[paste0(score,"1")]] >= cut.x]) <- score sc10x[[score]] <- Idents(object=sc10x) Idents(sc10x) <- score sc10x.negative <- subset(x=sc10x,idents="ALL") sc10x.positive <- subset(x=sc10x,idents=score) #Generate plots postscript(paste0("./analysis/vis/",score,"/3Vis_",i,".",score,".eps")) plot1 <- DimPlot(sc10x,reduction="pca",label=TRUE,repel=TRUE)+theme(legend.position="none")+ggtitle("ALL") plot2 <- DimPlot(sc10x.negative,reduction="pca",label=TRUE,repel=TRUE)+theme(legend.position="none") plot3 <- DimPlot(sc10x.positive,reduction="pca",label=TRUE,repel=TRUE)+theme(legend.position="none") plot4 <- DimPlot(sc10x,reduction="tsne",label=TRUE,repel=TRUE)+theme(legend.position="none")+ggtitle("Negative") plot5 <- DimPlot(sc10x.negative,reduction="tsne",label=TRUE,repel=TRUE)+theme(legend.position="none") plot6 <- DimPlot(sc10x.positive,reduction="tsne",label=TRUE,repel=TRUE)+theme(legend.position="none") plot7 <- DimPlot(sc10x,reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none")+ggtitle("Positive") plot8 <- DimPlot(sc10x.negative,reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none") plot9 <- DimPlot(sc10x.positive,reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none") grid.arrange(plot1,plot2,plot3,plot4,plot5,plot6,plot7,plot8,plot9,ncol=3) dev.off() #Generate violin plot of gene exvpression if (anchor!=FALSE){ postscript(paste0("./analysis/score_id/",score,"/Violin_",i,".",score,".eps")) plot <- VlnPlot(object=sc10x,features=anchor,pt.size=0.1,assay="SCT") plot(plot) dev.off() } sc10x.l[[i]] <- sc10x sc10x.l.negative[[i]] <- sc10x.negative sc10x.l.positive[[i]] <- sc10x.positive } results <- list( sc10x <- sc10x.l, sc10x.negative <- sc10x.l.negative, sc10x.positive <- sc10x.l.positive ) return(results) } scQuSAGE <- function(sc10x,gs,save=FALSE,type,id,ds=0,nm="pops",print="tsne"){ #Runs QuSAGE #Inputs: #sc10x = Seruat object #gs = geneset to use for correlation #save = save ID #type = type of qusage to run (id: create ID's based on run, sm: cor only using small genesets, lg: cor only with large genesets) #id = ident to use #nm = name of test #print = dimentionality reduction to use for display #Outputs: #results[1] = Seurat object #results[2] = correlation table #results[3] = correlation results if (!dir.exists(paste0("./analysis/cor/",nm))){ dir.create(paste0("./analysis/cor/",nm)) } if (!dir.exists(paste0("./analysis/cor/",nm,"/geneset"))){ dir.create(paste0("./analysis/cor/",nm,"/geneset")) } if (!dir.exists(paste0("./analysis/cor/",nm,"/cluster"))){ dir.create(paste0("./analysis/cor/",nm,"/cluster")) } if (!dir.exists(paste0("./analysis/vis/",nm))){ dir.create(paste0("./analysis/vis/",nm)) } Idents(object=sc10x) <- id number.clusters <- length(unique(levels(x=sc10x))) labels <- paste0("Cluster_",as.vector(Idents(object=sc10x))) cell.sample <- NULL for (i in unique(labels)){ cell <- WhichCells(sc10x,ident=sub("Cluster_","",i)) if (length(cell)>ds & ds!=0){ set.seed(71682) rnd <- sample(1:length(cell),ds) cell <- cell[rnd] } cell.sample <- c(cell.sample,cell) } data <- as.data.frame(as.matrix(GetAssayData(sc10x[,colnames(sc10x) %in% cell.sample],assay="SCT",slot="scale.data"))) labels <- labels[colnames(sc10x) %in% cell.sample] #groups <- sort(unique(labels)) groups <- paste0("Cluster_",levels(sc10x@active.ident)) #col <- hcl(h=(seq(15,375-375/length(groups),length=length(groups))),c=100,l=65) col <- brewer.pal(n=length(groups),name="Dark2") #Make labels for QuSAGE clust <- list() clust.comp <- list() for (i in groups){ t <- labels t[t != i] <- "REST" clust[i] <- list(i=t) rm(t) clust.comp[i] <- paste0(i,"-REST") } #Run QuSAGE for (i in groups){ assign(paste0("results.",i),qusage(data,unlist(clust[i]),unlist(clust.comp[i]),gs)) } #Generate ID table results.cor <- NULL results.cor <- qsTable(get(paste0("results.",groups[1])),number=length(gs)) results.cor$Cluster <- groups[1] for (i in groups[-1]){ qs <- qsTable(get(paste0("results.",i)),number=length(gs)) qs$Cluster <- i results.cor <- rbind(results.cor,qs) } results.cor <- results.cor[,-3] rownames(results.cor) <- NULL results.clust.id <- NULL #if (max(results.cor[results.cor[,4]==groups[1] & results.cor[,3]<=0.05,][,2],na.rm=TRUE)>=0){ # results.clust.id <- results.cor[results.cor[,4]==groups[1] & results.cor[,3]<=0.05,][which.max(results.cor[results.cor[,4]==groups[1] & results.cor[,3]<=0.05,][,2]),] if (max(results.cor[results.cor[,4]==groups[1],][,2],na.rm=TRUE)>=0){ results.clust.id <- results.cor[results.cor[,4]==groups[1],][which.max(results.cor[results.cor[,4]==groups[1],][,2]),] } else { results.clust.id$pathway.name <- "Unknown" results.clust.id$log.fold.change <- 0 results.clust.id$FDR <- 0 results.clust.id$Cluster <- groups[1] results.clust.id <- as.data.frame(results.clust.id) } for (i in groups[-1]){ #if (max(results.cor[results.cor[,4]==i & results.cor[,3]<=0.05,][,2],na.rm=TRUE)>=0){ # results.clust.id <- rbind(results.clust.id,results.cor[results.cor[,4]==i & results.cor[,3]<=0.05,][which.max(results.cor[results.cor[,4]==i & results.cor[,3]<=0.05,][,2]),]) if (max(results.cor[results.cor[,4]==i,][,2],na.rm=TRUE)>=0){ results.clust.id <- rbind(results.clust.id,results.cor[results.cor[,4]==i,][which.max(results.cor[results.cor[,4]==i,][,2]),]) } else { results.clust.id <- rbind(results.clust.id,data.frame(pathway.name="Unknown",log.fold.change=0,FDR=0,Cluster=i)) }} rownames(results.clust.id) <- NULL #Determine axes for correlation plots max.x.rg <- 0 min.x.rg <- 0 max.y.rg <- 0 for (i in groups){ qs <- get(paste0("results.",i)) if (max(qs$path.mean)>max.x.rg){ max.x.rg <- max(qs$path.mean) } if (min(qs$path.mean)max.y.rg){ max.y.rg <- max(qs$path.PDF) }} if (type=="sm"){ #Plot correlation plots by geneset for (i in 1:length(gs)){ postscript(paste0("./analysis/cor/",nm,"/geneset/QuSAGE_",nm,".",names(gs)[i],".eps")) for (j in groups){ qs <- get(paste0("results.",j)) if (j==groups[1]){ plotDensityCurves(qs,path.index=i,col=col[match(j,groups)],main=names(gs)[i],xlim=c(min.x.rg-0.05,max.x.rg+0.05),ylim=c(0,50*ceiling(max.y.rg/50)),xlab="Gene Set Activation",lwd=5,cex.main=2.5,cex.axis=1.5,cex.lab=2) } else { plotDensityCurves(qs,path.index=i,add=TRUE,col=col[match(j,groups)],lwd=5) }} legend("topright",col=col,legend=groups,lty=1,lwd=5,cex=2,ncol=1,bty="n",pt.cex=2) box(lwd=5) dev.off() } #Plot correlation plots by cluster for (i in groups){ qs <- get(paste0("results.",i)) postscript(paste0("./analysis/cor/",nm,"/cluster/QuSAGE_",nm,"_",i,".eps")) for (j in 1:length(gs)){ if (j==1){ plotDensityCurves(qs,path.index=j,col=viridis(length(gs))[j],main=i,xlim=c(min.x.rg-0.05,max.x.rg+0.05),ylim=c(0,50*ceiling(max.y.rg/50)),xlab="Gene Set Activation",lwd=5,cex.main=2.5,cex.axis=1.5,cex.lab=2) } else { plotDensityCurves(qs,path.index=j,add=TRUE,col=viridis(length(gs))[j],lwd=5) }} legend("topright",col=viridis(length(gs)),legend=names(gs),lty=1,lwd=5,cex=1,ncol=2,bty="n",pt.cex=2) box(lwd=5) dev.off() }} else { for (i in groups){ qs <- get(paste0("results.",i)) postscript(paste0("./analysis/cor/",nm,"/cluster/QuSAGE_",nm,"_",i,".eps")) plotCIs(qs,path.index=1:numPathways(qs),cex.lab=1.5) dev.off() }} if (save==TRUE){ merge.cluster <- NULL for (i in groups){ #if (max(qsTable(get(paste0("results.",i)),number=length(gs))[qsTable(get(paste0("results.",i)),number=length(gs))[,4]<=0.05,][,2],na.rm=TRUE)>=0){ # sc10x<-eval(parse(text=paste0("RenameIdents(object=sc10x,'",sub("Cluster_","",i),"' = '",qsTable(get(paste0("results.",i)),number=length(gs))[qsTable(get(paste0("results.",i)),number=length(gs))[2]==max(qsTable(get(paste0("results.",i)),number=length(gs))[qsTable(get(paste0("results.",i)),number=length(gs))[,4]<=0.05,][,2],na.rm=TRUE)][1],"')"))) if (max(qsTable(get(paste0("results.",i)),number=length(gs))[,2],na.rm=TRUE)>=0){ sc10x<-eval(parse(text=paste0("RenameIdents(object=sc10x,'",sub("Cluster_","",i),"' = '",qsTable(get(paste0("results.",i)),number=length(gs))[qsTable(get(paste0("results.",i)),number=length(gs))[2]==max(qsTable(get(paste0("results.",i)),number=length(gs))[,2],na.rm=TRUE)][1],"')"))) } else { sc10x<-eval(parse(text=paste0("RenameIdents(object=sc10x,'",sub("Cluster_","",i),"' = 'Unknown')"))) }} sc10x[[nm]] <- Idents(object=sc10x) } plot1 <- DimPlot(sc10x,reduction="pca",label=TRUE,repel=TRUE)+theme(legend.position="none") plot2 <- DimPlot(sc10x,reduction="tsne",label=TRUE,repel=TRUE)+theme(legend.position="none") plot3 <- DimPlot(sc10x,reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none") if (print=="tsne"){ postscript(paste0("./analysis/vis/",nm,"/tSNE_",id,"_",nm,".eps")) print(plot2) dev.off() } else if (print=="umap"){ postscript(paste0("./analysis/vis/",nm,"/UMAP_",id,"_",nm,".eps")) print(plot3) dev.off() } else if (print=="2"){ postscript(paste0("./analysis/vis/",nm,"/2Vis_",id,"_",nm,".eps")) grid.arrange(plot2,plot3,ncol=1) dev.off() } else if (print=="3"){ postscript(paste0("./analysis/vis/",nm,"/3Vis_",id,"_",nm,".eps")) grid.arrange(plot1,plot2,plot3,ncol=1) dev.off() } results <- list( sc10x=sc10x, results.cor=results.cor, results.clust.id=results.clust.id ) names(results)=c("sc10x",paste0("results.cor.",nm),paste0("results.clust.",nm,".id")) return(results) } scShinyOutput <- function(sc10x,anal="raw"){ write_delim(as.data.frame(colnames(sc10x)),path=paste0("./analysis/shiny/",anal,"/outs/filtered_feature_bc_matrix/barcodes.tsv.gz"),delim="\t",col_names=FALSE) features <- rownames(sc10x) features <- c(features,c("nFeature","nCount","percent.mito","percent.ribo","Stress.score")) features <- data.frame(ENSG=features,Feature=features,Label="feature") write_delim(features,path=paste0("./analysis/shiny/",anal,"/outs/filtered_feature_bc_matrix/features.tsv.gz"),delim="\t",col_names=FALSE) exp <- GetAssayData(sc10x,slot="scale.data") exp.extra <- matrix(nrow=5,ncol=ncol(sc10x)) exp.extra[1,] <- as.numeric(sc10x$nFeature_RNA) exp.extra[2,] <- as.numeric(sc10x$nCount_RNA) exp.extra[3,] <- as.numeric(sc10x$percent.mito) exp.extra[4,] <- as.numeric(sc10x$percent.ribo) exp.extra[5,] <- as.numeric(sc10x$Stress1) exp <- rbind(exp,exp.extra) Matrix::writeMM(as(exp,"dgCMatrix"),file=paste0("./analysis/shiny/",anal,"/outs/filtered_feature_bc_matrix/matrix.mtx.gz")) for (i in c("pca","tsne","umap")){ dr <- Embeddings(sc10x,i) if (i != "pca"){ colnames(dr) <- c(paste0(toupper(i),"-",1:2)) } else { dr <- dr[,1:10] colnames(dr) <- c(paste0(toupper(i),"-",1:10)) } dr <- cbind(dr,Barcode=rownames(dr)) dr <- dr[,c(3,1,2)] dr <- as.data.frame(dr,row.names=FALSE) if (i != "pca"){ write_csv(dr,paste0("./analysis/shiny/",anal,"/outs/analysis/",i,"/2_components/projection.csv"),col_names=TRUE) } else { write_csv(dr,paste0("./analysis/shiny/",anal,"/outs/analysis/",i,"/10_components/projection.csv"),col_names=TRUE) } } sc10x <- NormalizeData(sc10x,assay="RNA") clusters <- c("samples","samples_HTO",paste0("integrated_snn_res.",res),"lin","pops","leu","scDWSpr","HTO_maxID","hash.ID") clusters <- intersect(clusters,names(sc10x@meta.data)) for (i in clusters){ if (nrow(unique(sc10x[[i]]))>1){ if (!dir.exists(paste0("./analysis/shiny/",anal,"/outs/analysis/clustering/",gsub("integrated_snn_res.","res_",i)))){ dir.create(paste0("./analysis/shiny/",anal,"/outs/analysis/clustering/",gsub("integrated_snn_res.","res_",i))) } clust <- as.matrix(sc10x[[i]]) colnames(clust) <- "Cluster" clust <- cbind(clust,Barcode=rownames(clust)) clust <- clust[,c(2,1)] clust <- as.data.frame(clust,row.names=FALSE) clust[,2] <- paste0("Cluster ",clust[,2]) write_csv(clust,paste0("./analysis/shiny/",anal,"/outs/analysis/clustering/",gsub("integrated_snn_res.","res_",i),"/clusters.csv"),col_names=TRUE) if (!dir.exists(paste0("./analysis/shiny/",anal,"/outs/analysis/diffexp/",gsub("integrated_snn_res.","res_",i)))){ dir.create(paste0("./analysis/shiny/",anal,"/outs/analysis/diffexp/",gsub("integrated_snn_res.","res_",i))) } Idents(sc10x) <- i deg <- FindAllMarkers(sc10x,assay="RNA",slot="data",logfc.threshold=0,test.use="MAST",min.pct=0.25,min.diff.pct=0.25,max.cells.per.ident=500) dexp <- data.frame("Feature ID"=unique(deg$gene),"Feature Name"=unique(deg$gene)) for (cluster in unique(deg$cluster)){ dexp[,paste0("Cluster.",cluster,".Mean.Counts")] <- deg$pct.1[deg$cluster==cluster][match(dexp$Feature.ID,deg$gene[deg$cluster==cluster])] dexp[,paste0("Cluster.",cluster,".Log2.fold.change")] <- deg$avg_logFC[deg$cluster==cluster][match(dexp$Feature.ID,deg$gene[deg$cluster==cluster])] dexp[,paste0("Cluster.",cluster,".Adjusted.p.value")] <- deg$p_val_adj[deg$cluster==cluster][match(dexp$Feature.ID,deg$gene[deg$cluster==cluster])] } colnames(dexp) <- gsub("\\."," ",colnames(dexp)) write_csv(dexp,paste0("./analysis/shiny/",anal,"/outs/analysis/diffexp/",gsub("integrated_snn_res.","res_",i),"/differential_expression.csv"),col_names=TRUE) } } }