Commit b5f7d29a authored by Gervaise Henry's avatar Gervaise Henry 🤠
Browse files

Update script with seurat 3 final optimizations to work with PdPbPc_deep

parent 2c0321c5
......@@ -13,6 +13,8 @@ library(dplyr)
library(viridis)
library(gridExtra)
library(SingleR)
library(sctransform)
library(autothresholdr)
options(bitmapType="cairo")
......@@ -21,73 +23,74 @@ setwd("../")
source("./r.scripts/sc-TissueMapper_functions.R")
source("./r.scripts/sc-TissueMapper_process.R")
project.name="musAdPrF"
project.name="PdPbPc_deep"
scFolders()
sc10x <- scLoad(p=project.name,cellranger=3,ref="mm10")
sc10x <- scLoad(p=project.name,cellranger=3,ref="GRCh38")
lg=250
hg=2000
hm=0.1
results <- scQC(sc10x,lg=lg,hg=hg,hm=hm,sub=FALSE,sp="mu")
lu=1000
results <- scQC(sc10x,lu=lu,sub=FALSE,sp="hu")
sc10x <- results[[1]]
counts.cell.raw <- results[[2]]
counts.gene.raw <- results[[3]]
counts.cell.filtered <- results[[4]]
counts.gene.filtered <- results[[5]]
rm(results)
rm(lg)
rm(hg)
rm(hm)
rm(lu)
# results <- scCellCycle(sc10x,sub=FALSE,sp="mu")
# results <- scCellCycle(sc10x,sub=FALSE,sp="hu")
# sc10x <- results[[1]]
# genes.s <- results[[2]]
# genes.g2m <- results[[3]]
# rm(results)
sc10x.l <- list()
sc10x.l[["mu1"]] <- subset(sc10x,subset= samples=="musAd001_PrF")
sc10x.l[["mu2"]] <- subset(sc10x,subset= samples=="musAd002_PrF")
sc10x.l[["mu3"]] <- subset(sc10x,subset= samples=="musAd003_PrF_St")
sc10x.l[["Donor"]] <- subset(sc10x,subset= Donor=="Donor")
sc10x.l[["BPH"]] <- subset(sc10x,subset= BPH=="BPH")
sc10x.l[["Cancer"]] <- subset(sc10x,subset= Cancer=="Cancer")
sc10x <- scCCA(sc10x.l)
rm(sc10x.l)
sc10x@project.name <- project.name
# gc()
# sc10x <- ScaleData(object=sc10x,vars.to.regress=c("nFeature_RNA","percent.mito","S.Score","G2M.Score"),do.par=TRUE,num.cores=45,verbose=FALSE)
sc10x <- ScaleData(object=sc10x,vars.to.regress=c("nFeature_RNA","percent.mito"),do.par=TRUE,num.cores=45,verbose=FALSE)
# gc()
# sc10x <- FindVariableFeatures(sc10x,verbose=FALSE)
gc()
sc10x <- ScaleData(object=sc10x,do.par=TRUE,num.cores=45,verbose=FALSE)
# sc10x <- ScaleData(object=sc10x,vars.to.regress=c("nFeature_RNA","percent.mito"),do.par=TRUE,num.cores=45,verbose=FALSE)
gc()
results <- scPC(sc10x,pc=50,hpc=0.9,file="pre.stress",print="2",cca=TRUE)
results <- scPC(sc10x,pc=100,hpc=0.9,file="pre.stress",print="2")
sc10x <- results[[1]]
pc.use.prestress <- results[[2]]
rm(results)
# sc10x <- scCluster(sc10x,res=0.5,red="pca",dim=pc.use.prestress,print="2",folder="pre.stress")
sc10x <- scCluster(sc10x,res=0.5,red="pca",dim=pc.use.prestress,print="2",folder="pre.stress")
# genes.stress <- read_delim("./genesets/genes.deg.Stress.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE)
# genes.stress <- genes.stress[1]
# colnames(genes.stress) <- "scDWS.Stress"
genes.stress <- read_delim("./genesets/genes.deg.Stress.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE)
genes.stress <- genes.stress[1]
colnames(genes.stress) <- "scDWS.Stress"
# results <- scScore(sc10x,score="Stress",geneset=as.list(genes.stress),cut.pt=0.75,anchor=c("EGR1","FOS","JUN"))
# sc10x.preStress <- results[[1]]
# sc10x <- results[[2]]
# rm(results)
results <- scScore(sc10x,score="Stress",geneset=as.list(genes.stress),cut.pt=0.9,anchor=c("EGR1","FOS","JUN"))
sc10x.preStress <- results[[1]]
sc10x <- results[[2]]
rm(results)
# sc10x <- FindVariableFeatures(sc10x,verbose=FALSE)
#
# gc()
# sc10x <- ScaleData(object=sc10x,vars.to.regress=c("nFeature_RNA","percent.mito","S.Score","G2M.Score"),do.par=TRUE,num.cores=45,verbose=FALSE)
# sc10x <- ScaleData(object=sc10x,vars.to.regress=c("nFeature_RNA","percent.mito"),do.par=TRUE,num.cores=45,verbose=FALSE)
# # sc10x <- ScaleData(object=sc10x,vars.to.regress=c("nFeature_RNA","percent.mito"),do.par=TRUE,num.cores=45,verbose=FALSE)
# gc()
# results <- scPC(sc10x,pc=50,hpc=0.9,file="post.stress",print="2",cca=TRUE)
# sc10x <- results[[1]]
# pc.use.poststress <- results[[2]]
# rm(results)
results <- scPC(sc10x,pc=100,hpc=0.9,file="post.stress",print="2")
sc10x <- results[[1]]
pc.use.poststress <- results[[2]]
rm(results)
res <- c(seq(0.1,0.5,0.1),0.75,seq(1,5,1))
sc10x <- scCluster(sc10x,res=res,red="pca",dim=pc.use.prestress,print="2",folder="ALL")
sc10x <- scCluster(sc10x,res=res,red="pca",dim=pc.use.poststress,print="2",folder="ALL")
save(sc10x,file=paste0("./analysis/sc10x.raw.rda"))
save.image(file="./analysis/sc10x.raw.RData")
......@@ -23,7 +23,7 @@ scFolders <- function(){
}
scLoad <- function(p,mc=3,mg=200,sub=FALSE,cellranger=3,ref="GRCh38"){
scLoad <- function(p,mc=3,mg=0,sub=FALSE,cellranger=3,ref="GRCh38"){
#Load and prefilter filtered_gene_bc_matrices_mex output from cellranger
#Inputs:
......@@ -110,7 +110,7 @@ scSubset <- function(sc10x,i="ALL",g="ALL"){
}
scQC <- function(sc10x,lg=500,hg=2500,hm=0.1,sub=FALSE,sp="hu"){
scQC <- function(sc10x,lu=0,sub=FALSE,sp="hu"){
#QC and filter Seurat object
#Inputs:
......@@ -138,38 +138,65 @@ scQC <- function(sc10x,lg=500,hg=2500,hm=0.1,sub=FALSE,sp="hu"){
#Calculate stats
if (sp=="hu"){
mito.genes <- grep(pattern="^MT-",x=rownames(x=GetAssayData(object=sc10x)),value=TRUE)
sc10x[["percent.mito"]] <- PercentageFeatureSet(object=sc10x,pattern="^MT-")
} else {
mito.genes <- grep(pattern="^mt-",x=rownames(x=GetAssayData(object=sc10x)),value=TRUE)
sc10x[["percent.mito"]] <- PercentageFeatureSet(object=sc10x,pattern="^mt-")
}
percent.mito <- Matrix::colSums(GetAssayData(object=sc10x,slot="counts")[mito.genes,])/Matrix::colSums(GetAssayData(object=sc10x,slot="counts"))
sc10x$percent.mito <- percent.mito
#Plot raw stats
for(x in c("ALL","samples")){
Idents(object=sc10x) <- x
postscript(paste0(folder,"Violin_qc.raw.",x,".eps"))
plot1 <- VlnPlot(object=sc10x,features="nCount_RNA",pt.size=0.1)
plot2 <- VlnPlot(object=sc10x,features="nFeature_RNA",pt.size=0.1)+geom_hline(yintercept=lg,size=1,color="red")+geom_hline(yintercept=hg,size=1,color="red")
plot3 <- VlnPlot(object=sc10x,features="percent.mito",pt.size=0.1)+geom_hline(yintercept=hm,size=1,color="red")
plot2 <- VlnPlot(object=sc10x,features="nFeature_RNA",pt.size=0.1)
#+geom_hline(yintercept=lg,size=1,color="red")+geom_hline(yintercept=hg,size=1,color="red")
plot3 <- VlnPlot(object=sc10x,features="percent.mito",pt.size=0.1)
#+geom_hline(yintercept=hm,size=1,color="red")
grid.arrange(plot1,plot2,plot3,nrow=1)
dev.off()
}
threshPlot <- list()
cell.remove <- NULL
Idents(object=sc10x) <- "samples"
for (i in c("percent.mito","nFeature_RNA")){
for (j in unique(sc10x$samples)){
if (i=="nFeature_RNA"){
thresh <- scThresh(scSubset(sc10x,i="samples",g=j),feature=i,direction="b",min=lu,name=j)
} else {
thresh <- scThresh(scSubset(sc10x,i="samples",g=j),feature=i,direction="h",name=j)
}
threshPlot[[j]] <- thresh[[3]]
data <- FetchData(subset(sc10x,idents=j),vars=i)
if (i=="nFeature_RNA"){
cell.remove <- c(cell.remove,rownames(data)[data[,1] < thresh[[2]] | data[,1] > thresh[[1]]])
} else {
cell.remove <- c(cell.remove,rownames(data)[data[,1] > thresh[[1]]])
}
sc10x
}
postscript(paste0(folder,"Plot_qc.hist.",i,".eps"))
do.call("grid.arrange",c(threshPlot,ncol=1,top=i))
dev.off()
}
sc10x.sub <- subset(sc10x,cells=WhichCells(sc10x,cells=cell.remove,invert=TRUE))
postscript(paste0(folder,"Plot_qc.raw.eps"))
density1 <- density(sc10x$nCount_RNA,sc10x$nFeature_RNA,n=1000)
plot1 <- ggplot(data.frame(cbind(sc10x$nCount_RNA,sc10x$nFeature_RNA)))+geom_point(aes(x=X1,y=X2,color=density1),size=0.1)+scale_color_viridis(option="inferno")+geom_hline(yintercept=lg,size=1,color="red")+geom_hline(yintercept=hg,size=1,color="red")+labs(x="nUMI",y="nGenes",color="Count")+cowplot::theme_cowplot()
plot1 <- ggplot(data.frame(cbind(sc10x$nCount_RNA,sc10x$nFeature_RNA)))+geom_point(aes(x=X1,y=X2,color=density1),size=0.1)+scale_color_viridis(option="inferno")+labs(x="nCount",y="nFeature",color="Count")+cowplot::theme_cowplot()
#+geom_hline(yintercept=lg,size=1,color="red")+geom_hline(yintercept=hg,size=1,color="red")
density2 <- density(sc10x$nCount_RNA,sc10x$percent.mito,n=1000)
plot2 <- ggplot(data.frame(cbind(sc10x$nCount_RNA,sc10x$percent.mito)))+geom_point(aes(x=X1,y=X2,color=density2),size=0.1)+scale_color_viridis(option="inferno")+geom_hline(yintercept=hm,size=1,color="red")+labs(x="nUMI",y="Percent Mitochondrial",color="Count")+cowplot::theme_cowplot()
plot2 <- ggplot(data.frame(cbind(sc10x$nCount_RNA,sc10x$percent.mito)))+geom_point(aes(x=X1,y=X2,color=density2),size=0.1)+scale_color_viridis(option="inferno")+labs(x="nCount",y="Percent Mitochondrial",color="Count")+cowplot::theme_cowplot()
#+geom_hline(yintercept=hm,size=1,color="red")
grid.arrange(plot1,plot2,nrow=1)
dev.off()
counts.cell.raw <- ncol(GetAssayData(object=sc10x,slot="counts"))
counts.gene.raw <- nrow(GetAssayData(object=sc10x,slot="counts"))
#Filter/normalize data
sc10x.sub <- subset(x=sc10x,subset= nFeature_RNA > lg)
sc10x.sub <- subset(x=sc10x.sub,subset= nFeature_RNA < hg)
sc10x.sub <- subset(x=sc10x.sub,subset= percent.mito < hm)
sc10x.sub <- NormalizeData(object=sc10x.sub,normalization.method="LogNormalize",scale.factor=10000,verbose=FALSE)
#sc10x.sub <- subset(x = sc10x, subset = nFeature_RNA > lg & nFeature_RNA < hg & percent.mito < hm)
# sc10x.sub <- NormalizeData(object=sc10x.sub,normalization.method="LogNormalize",scale.factor=10000,verbose=FALSE)
sc10x.sub <- SCTransform(sc10x.sub,vars.to.regress=c("nFeature_RNA","percent.mito"),verbose=FALSE)
#Plot filtered stats
for(x in c("ALL","samples")){
......@@ -183,9 +210,9 @@ scQC <- function(sc10x,lg=500,hg=2500,hm=0.1,sub=FALSE,sp="hu"){
}
postscript(paste0(folder,"Plot_qc.filtered.eps"))
density1 <- density(sc10x.sub$nCount_RNA,sc10x.sub$nFeature_RNA,n=1000)
plot1 <- ggplot(data.frame(cbind(sc10x.sub$nCount_RNA,sc10x.sub$nFeature_RNA)))+geom_point(aes(x=X1,y=X2,color=density1),size=0.1)+scale_color_viridis(option="inferno")+labs(x="nUMI",y="nGenes",color="Count")+cowplot::theme_cowplot()
plot1 <- ggplot(data.frame(cbind(sc10x.sub$nCount_RNA,sc10x.sub$nFeature_RNA)))+geom_point(aes(x=X1,y=X2,color=density1),size=0.1)+scale_color_viridis(option="inferno")+labs(x="nCount",y="nFeature",color="")+cowplot::theme_cowplot()
density2 <- density(sc10x.sub$nCount_RNA,sc10x.sub$percent.mito,n=1000)
plot2 <- ggplot(data.frame(cbind(sc10x.sub$nCount_RNA,sc10x.sub$percent.mito)))+geom_point(aes(x=X1,y=X2,color=density2),size=0.1)+scale_color_viridis(option="inferno")+labs(x="nUMI",y="Percent Mitochondrial",color="Count")+cowplot::theme_cowplot()
plot2 <- ggplot(data.frame(cbind(sc10x.sub$nCount_RNA,sc10x.sub$percent.mito)))+geom_point(aes(x=X1,y=X2,color=density2),size=0.1)+scale_color_viridis(option="inferno")+labs(x="nCount",y="Percent Mitochondrial",color="Count")+cowplot::theme_cowplot()
grid.arrange(plot1,plot2,nrow=1)
dev.off()
counts.cell.filtered <- ncol(GetAssayData(object=sc10x.sub,slot="counts"))
......@@ -201,6 +228,41 @@ scQC <- function(sc10x,lg=500,hg=2500,hm=0.1,sub=FALSE,sp="hu"){
return(results)
}
scThresh <- function(sc10x,feature,direction,min=0,name){
scale <- data.frame(Score=sc10x[[feature]])
colnames(scale) <- "Score"
h <- hist(scale$Score,breaks=100)
scale.s.h <- scale
scale.s.h <- data.frame(Score=scale.s.h[scale.s.h$Score>min,])
scale.s.l <- data.frame(Score=scale[scale$Score<h$mids[which.max(h$counts)],])
scale.max.h <- max(scale.s.h$Score)
scale.max.l <- max(scale.s.l$Score)
scale.s.h$Score <- as.integer(scale(scale.s.h$Score,FALSE,max(scale.s.h$Score))*100)
scale.s.l$Score <- as.integer(scale(scale.s.l$Score,FALSE,max(scale.s.l$Score))*100)
thresh.h <- auto_thresh(scale.s.h$Score,method="Triangle")
thresh.l <- auto_thresh(scale.s.l$Score,method="Triangle")
thresh.h <- (thresh.h/100)*scale.max.h
thresh.l <- (thresh.l/100)*scale.max.l
if (min>0){
thresh.l <- min
}
threshPlot <- ggplot(scale,aes(x=Score))+geom_histogram(bins=100,aes(y=..density..))+geom_density()+ggtitle(name)
if (direction=="h"){
threshPlot <- threshPlot+geom_vline(xintercept=thresh.h,size=1,color="red")
} else if (direction=="l"){
threshPlot <- threshPlot+geom_vline(xintercept=thresh.l,size=1,color="red")
} else if (direction=="b"){
threshPlot <- threshPlot+geom_vline(xintercept=thresh.h,size=1,color="red")
threshPlot <- threshPlot+geom_vline(xintercept=thresh.l,size=1,color="red")
}
results <- list(
thresh.h=thresh.h,
thresh.l=thresh.l,
threshPlot=threshPlot
)
return(results)
}
scCellCycle <- function(sc10x,sub=FALSE,sp="hu"){
#Runs Seurat based PCA analysis for cell cycle ID
......@@ -263,7 +325,7 @@ scCellCycle <- function(sc10x,sub=FALSE,sp="hu"){
}
scPC <- function(sc10x,pc=50,hpc=0.9,file="pre.stress",print="tsne",cca=FALSE){
scPC <- function(sc10x,pc=50,hpc=0.9,file="pre.stress",print="tsne"){
#Scale Seurat object & calculate # of PCs to use
#Inputs:
......@@ -276,9 +338,6 @@ scPC <- function(sc10x,pc=50,hpc=0.9,file="pre.stress",print="tsne",cca=FALSE){
#result[1] = Seurat object
#result[2] = # of PCs to use
#Find variable genes
if (cca==FALSE){sc10x <- FindVariableFeatures(object=sc10x,verbose=FALSE)}
#Run PCA
Idents(object=sc10x) <- "ALL"
sc10x <- RunPCA(object=sc10x,npcs=pc,verbose=FALSE)
......@@ -303,10 +362,12 @@ scPC <- function(sc10x,pc=50,hpc=0.9,file="pre.stress",print="tsne",cca=FALSE){
}
scCCA <- function(sc10x.l){
for (i in 1:length(sc10x.l)){
sc10x.l[[i]] <- NormalizeData(object=sc10x.l[[i]],verbose=FALSE)
sc10x.l[[i]] <- FindVariableFeatures(object=sc10x.l[[i]],verbose=FALSE)
scCCA <- function(sc10x.l,rerun=FALSE){
if (rerun==FALSE){
for (i in 1:length(sc10x.l)){
sc10x.l[[i]] <- NormalizeData(object=sc10x.l[[i]],verbose=FALSE)
sc10x.l[[i]] <- FindVariableFeatures(object=sc10x.l[[i]],verbose=FALSE)
}
}
sc10x <- FindIntegrationAnchors(object.list=sc10x.l,dims=1:30)
......@@ -434,7 +495,7 @@ scScore <- function(sc10x,score,geneset,cut.pt=0.9,anchor=FALSE){
}
#Score geneset
sc10x <- AddModuleScore(object=sc10x,features=geneset,name=score,assay="RNA")
sc10x <- AddModuleScore(object=sc10x,features=genes.stress,name=score,assay="RNA")
Idents(object=sc10x) <- paste0(score,"1")
#CDF
......@@ -569,8 +630,10 @@ scQuSAGE <- function(sc10x,gs,save=FALSE,type,id,ds=0,nm="pops",print="tsne"){
rownames(results.cor) <- NULL
results.clust.id <- NULL
if (max(results.cor[results.cor[,4]==groups[1] & results.cor[,3]<=0.05,][,2],na.rm=TRUE)>=0){
results.clust.id <- results.cor[results.cor[,4]==groups[1] & results.cor[,3]<=0.05,][which.max(results.cor[results.cor[,4]==groups[1] & results.cor[,3]<=0.05,][,2]),]
#if (max(results.cor[results.cor[,4]==groups[1] & results.cor[,3]<=0.05,][,2],na.rm=TRUE)>=0){
# results.clust.id <- results.cor[results.cor[,4]==groups[1] & results.cor[,3]<=0.05,][which.max(results.cor[results.cor[,4]==groups[1] & results.cor[,3]<=0.05,][,2]),]
if (max(results.cor[results.cor[,4]==groups[1],][,2],na.rm=TRUE)>=0){
results.clust.id <- results.cor[results.cor[,4]==groups[1],][which.max(results.cor[results.cor[,4]==groups[1],][,2]),]
} else {
results.clust.id$pathway.name <- "Unknown"
results.clust.id$log.fold.change <- 0
......@@ -579,8 +642,10 @@ scQuSAGE <- function(sc10x,gs,save=FALSE,type,id,ds=0,nm="pops",print="tsne"){
results.clust.id <- as.data.frame(results.clust.id)
}
for (i in groups[-1]){
if (max(results.cor[results.cor[,4]==i & results.cor[,3]<=0.05,][,2],na.rm=TRUE)>=0){
results.clust.id <- rbind(results.clust.id,results.cor[results.cor[,4]==i & results.cor[,3]<=0.05,][which.max(results.cor[results.cor[,4]==i & results.cor[,3]<=0.05,][,2]),])
#if (max(results.cor[results.cor[,4]==i & results.cor[,3]<=0.05,][,2],na.rm=TRUE)>=0){
# results.clust.id <- rbind(results.clust.id,results.cor[results.cor[,4]==i & results.cor[,3]<=0.05,][which.max(results.cor[results.cor[,4]==i & results.cor[,3]<=0.05,][,2]),])
if (max(results.cor[results.cor[,4]==i,][,2],na.rm=TRUE)>=0){
results.clust.id <- rbind(results.clust.id,results.cor[results.cor[,4]==i,][which.max(results.cor[results.cor[,4]==i,][,2]),])
} else {
results.clust.id <- rbind(results.clust.id,data.frame(pathway.name="Unknown",log.fold.change=0,FDR=0,Cluster=i))
}}
......@@ -640,8 +705,10 @@ scQuSAGE <- function(sc10x,gs,save=FALSE,type,id,ds=0,nm="pops",print="tsne"){
if (save==TRUE){
merge.cluster <- NULL
for (i in groups){
if (max(qsTable(get(paste0("results.",i)),number=length(gs))[qsTable(get(paste0("results.",i)),number=length(gs))[,4]<=0.05,][,2],na.rm=TRUE)>=0){
sc10x<-eval(parse(text=paste0("RenameIdents(object=sc10x,'",sub("Cluster_","",i),"' = '",qsTable(get(paste0("results.",i)),number=length(gs))[qsTable(get(paste0("results.",i)),number=length(gs))[2]==max(qsTable(get(paste0("results.",i)),number=length(gs))[qsTable(get(paste0("results.",i)),number=length(gs))[,4]<=0.05,][,2],na.rm=TRUE)][1],"')")))
#if (max(qsTable(get(paste0("results.",i)),number=length(gs))[qsTable(get(paste0("results.",i)),number=length(gs))[,4]<=0.05,][,2],na.rm=TRUE)>=0){
# sc10x<-eval(parse(text=paste0("RenameIdents(object=sc10x,'",sub("Cluster_","",i),"' = '",qsTable(get(paste0("results.",i)),number=length(gs))[qsTable(get(paste0("results.",i)),number=length(gs))[2]==max(qsTable(get(paste0("results.",i)),number=length(gs))[qsTable(get(paste0("results.",i)),number=length(gs))[,4]<=0.05,][,2],na.rm=TRUE)][1],"')")))
if (max(qsTable(get(paste0("results.",i)),number=length(gs))[,2],na.rm=TRUE)>=0){
sc10x<-eval(parse(text=paste0("RenameIdents(object=sc10x,'",sub("Cluster_","",i),"' = '",qsTable(get(paste0("results.",i)),number=length(gs))[qsTable(get(paste0("results.",i)),number=length(gs))[2]==max(qsTable(get(paste0("results.",i)),number=length(gs))[,2],na.rm=TRUE)][1],"')")))
} else {
sc10x<-eval(parse(text=paste0("RenameIdents(object=sc10x,'",sub("Cluster_","",i),"' = 'Unknown')")))
}}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment