Commit 1e66398e authored by Gervaise Henry's avatar Gervaise Henry 🤠
Browse files

Add nCount_RNA low cutoff (also percent.ribo calc, but don't use for filter),...

Add nCount_RNA low cutoff (also percent.ribo calc, but don't use for filter), and make filtering consecutive)
parent f753547f
......@@ -20,8 +20,8 @@ options(bitmapType="cairo")
options(future.globals.maxSize= 1000000000000)
option_list=list(
make_option("--p",action="store",default="Biopsy",type='character',help="Project Name"),
make_option("--s",action="store",default="hu",type='character',help="Species")
make_option("--p",action="store",default="muBl",type='character',help="Project Name"),
make_option("--s",action="store",default="mu",type='character',help="Species")
)
opt=parse_args(OptionParser(option_list=option_list))
rm(option_list)
......@@ -41,12 +41,24 @@ sc10x <- results[[1]]
sc10x.groups <- results[[2]]
rm(results)
results <- scQC(sc10x,sp=opt$s)
results <- scQC(sc10x,sp=opt$s,feature="nFeature_RNA")
sc10x <- results[[1]]
counts.cell.raw <- results[[2]]
counts.gene.raw <- results[[3]]
counts.cell.filtered <- results[[4]]
counts.gene.filtered <- results[[5]]
counts.cell.filtered.1umi <- results[[4]]
counts.gene.filtered.1umi <- results[[5]]
rm(results)
results <- scQC(sc10x,sp=opt$s,feature="percent.mito")
sc10x <- results[[1]]
counts.cell.filtered.2mito <- results[[4]]
counts.gene.filtered.2mito <- results[[5]]
rm(results)
results <- scQC(sc10x,sp=opt$s,feature="nCount_RNA")
sc10x <- results[[1]]
counts.cell.filtered.3gene <- results[[4]]
counts.gene.filtered.3gene <- results[[5]]
rm(results)
# results <- scCellCycle(sc10x,sub=FALSE,sp="hu")
......
......@@ -166,7 +166,7 @@ scSubset <- function(sc10x,i="ALL",g="ALL"){
}
scQC <- function(sc10x,sp="hu"){
scQC <- function(sc10x,sp="hu",feature="nFeature_RNA"){
#QC and filter Seurat object
#Inputs:
......@@ -184,12 +184,15 @@ scQC <- function(sc10x,sp="hu"){
#Calculate percent mitochondrea
if (sp=="hu"){
mito.pattern <- "^MT-"
ribo.pattern <- "^(RPL|RPS)"
} else if (sp=="mu"){
mito.pattern <- "^mt-"
ribo.pattern <- "^(Rpl|Rps)"
}
for (i in names(sc10x)){
sc10x.temp <- sc10x[[i]]
sc10x.temp[["percent.mito"]] <- PercentageFeatureSet(object=sc10x.temp,pattern=mito.pattern)
sc10x.temp[["percent.ribo"]] <- PercentageFeatureSet(object=sc10x.temp,pattern=ribo.pattern)
#sc10x.temp <- subset(sc10x.temp,cell=names(which(is.na(sc10x.temp$percent.mito))),invert=TRUE)
sc10x[i] <- sc10x.temp
}
......@@ -199,17 +202,17 @@ scQC <- function(sc10x,sp="hu"){
#h <- list()
#cells.remove <- list()
#sc10x.temp <- list()
for (i in c("nFeature_RNA","percent.mito")){
for (i in feature){
if (i == "nFeature_RNA"){
h <- list()
cells.remove <- list()
sc10x.temp <- list()
for (j in names(sc10x)){
h[[i]] <- hist(data.frame(sc10x[[j]][[i]])$nFeature_RNA,breaks=10,plot=FALSE)
cutoff.temp <- mean(c(h[[i]]$mids[which.max(h[[i]]$counts)],h[[i]]$mids[-which.max(h[[i]]$counts)][which.max(h[[i]]$counts[-which.max(h[[i]]$counts)])]))
cells.remove[[j]] <- c(cells.remove[[j]],rownames(sc10x[[j]][["nFeature_RNA"]])[sc10x[[j]][[i]][,1] < cutoff.temp])
sc10x.temp[[j]] <- subset(sc10x[[j]],cells=setdiff(colnames(sc10x[[j]]),cells.remove[[j]]))
}
h <- list()
cells.remove <- list()
sc10x.temp <- list()
for (j in names(sc10x)){
h[[i]] <- hist(data.frame(sc10x[[j]][[i]])$nFeature_RNA,breaks=10,plot=FALSE)
cutoff.temp <- mean(c(h[[i]]$mids[which.max(h[[i]]$counts)],h[[i]]$mids[-which.max(h[[i]]$counts)][which.max(h[[i]]$counts[-which.max(h[[i]]$counts)])]))
cells.remove[[j]] <- c(cells.remove[[j]],rownames(sc10x[[j]][["nFeature_RNA"]])[sc10x[[j]][[i]][,1] < cutoff.temp])
sc10x.temp[[j]] <- subset(sc10x[[j]],cells=setdiff(colnames(sc10x[[j]]),cells.remove[[j]]))
}
thresh[[i]] <- scThresh(sc10x.temp,feature=i)
}
if (i == "percent.mito"){
......@@ -230,12 +233,28 @@ scQC <- function(sc10x,sp="hu"){
thresh[[i]] <- scThresh(sc10x,feature=i)
}
}
if (i == "percent.ribo"){
h <- list()
cells.remove <- list()
sc10x.temp <- list()
thresh.l <- list()
cutoff.l.ribo <- list()
for (j in names(sc10x)){
cutoff.l.ribo[[j]] <- NULL
h[[i]] <- hist(data.frame(sc10x[[j]][[i]])$percent.ribo,breaks=100,plot=FALSE)
thresh[[i]] <- scThresh(sc10x,feature=i)
}
}
if (i == "nCount_RNA"){
thresh[[i]] <- scThresh(sc10x,feature=i)
}
}
#Plot raw stats
max.ct <- 0
max.ft <- 0
max.mt <- 0
max.rb <- 0
for (i in names(sc10x)){
if (max.ct < max(sc10x[[i]][["nCount_RNA"]])){
max.ct <- max(sc10x[[i]][["nCount_RNA"]])
......@@ -246,20 +265,25 @@ scQC <- function(sc10x,sp="hu"){
if (max.mt < max(sc10x[[i]][["percent.mito"]])){
max.mt <- max(sc10x[[i]][["percent.mito"]])
}
if (max.rb < max(sc10x[[i]][["percent.ribo"]])){
max.rb <- max(sc10x[[i]][["percent.ribo"]])
}
}
max.ct <- max.ct*1.1
max.ft <- max.ft*1.1
max.mt <- max.mt*1.1
max.rb <- max.rb*1.1
cells.remove <- list()
for (i in c("nCount_RNA","nFeature_RNA","percent.mito")){
for (i in feature){
max.y <- 0
if (i == "nCount_RNA"){
cells.remove[[j]] <- NULL
max.y <- max.ct
} else if (i == "nFeature_RNA"){
max.y <- max.ft
} else if (i == "percent.mito"){
max.y <- max.mt
} else if (i == "percent.ribo"){
max.y <- max.rb
}
plots.v <- list()
densities.s <- list()
......@@ -267,24 +291,35 @@ scQC <- function(sc10x,sp="hu"){
for (j in names(sc10x)){
sc10x.temp <- sc10x[[j]]
plots.v[[j]] <- VlnPlot(object=sc10x.temp,features=i,pt.size=0.1,)+scale_x_discrete(labels=j)+scale_y_continuous(limits=c(0,max.y))+theme(legend.position="none",axis.text.x=element_text(hjust=0.5,angle=0))
if (i %in% c("nFeature_RNA","percent.mito")){
if (i %in% c("nFeature_RNA","percent.mito","percent.ribo","nCount_RNA")){
if (i == "nFeature_RNA"){
#cutoff.l <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="MinErrorI"]
cutoff.h <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="RenyiEntropy"]
cutoff.l <- 200
#cutoff.h <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="Huang2"]
} else {
} else if (i == "percent.mito") {
#cutoff.l <- cutoff.l.mito[[j]]
cutoff.h <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="Triangle"]
cutoff.l <- 0
} else if (i == "percent.ribo") {
cutoff.h <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="RenyiEntropy"]
cutoff.l <- 0
} else if (i == "nCount_RNA") {
cutoff.l <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="Triangle"]
cutoff.h <- max(sc10x[[j]][[i]])
}
plots.v[[j]] <- plots.v[[j]]+geom_hline(yintercept=cutoff.l,size=0.5,color="red")+geom_hline(yintercept=cutoff.h,size=0.5,color="red")
densities.s[[j]] <- density(sc10x.temp$nCount_RNA,sc10x.temp[[i]][,1],n=1000)
plots.s[[j]] <- ggplotGrob(ggplot(data.frame(cbind(sc10x.temp$nCount_RNA,sc10x.temp[[i]][,1])))+geom_point(aes(x=X1,y=X2,color=densities.s[[j]]),size=0.1)+scale_x_continuous(limits=c(0,max.ct))+scale_y_continuous(limits=c(0,max.y))+scale_color_viridis(option="inferno")+labs(x="nCount",y=i,color="Density")+ggtitle(j)+cowplot::theme_cowplot()+theme(plot.title=element_text(size=7.5),axis.title=element_text(size=7.5),axis.text=element_text(size=5,angle=45),legend.position="bottom",legend.title=element_text(size=5,face="bold",vjust=1),legend.text=element_text(size=5,angle=45))+guides(color=guide_colourbar(barwidth=5,barheight=0.5))+geom_hline(yintercept=cutoff.l,size=0.1,color="red")+geom_hline(yintercept=cutoff.h,size=0.1,color="red"))
if (i != "nCount_RNA"){
densities.s[[j]] <- density(sc10x.temp$nCount_RNA,sc10x.temp[[i]][,1],n=1000)
plots.s[[j]] <- ggplotGrob(ggplot(data.frame(cbind(sc10x.temp$nCount_RNA,sc10x.temp[[i]][,1])))+geom_point(aes(x=X1,y=X2,color=densities.s[[j]]),size=0.1)+scale_x_continuous(limits=c(0,max.ct))+scale_y_continuous(limits=c(0,max.y))+scale_color_viridis(option="inferno")+labs(x="nCount_RNA",y=i,color="Density")+ggtitle(j)+cowplot::theme_cowplot()+theme(plot.title=element_text(size=7.5),axis.title=element_text(size=7.5),axis.text=element_text(size=5,angle=45),legend.position="bottom",legend.title=element_text(size=5,face="bold",vjust=1),legend.text=element_text(size=5,angle=45))+guides(color=guide_colourbar(barwidth=5,barheight=0.5))+geom_hline(yintercept=cutoff.l,size=0.1,color="red")+geom_hline(yintercept=cutoff.h,size=0.1,color="red"))
} else {
densities.s[[j]] <- density(sc10x.temp$nFeature_RNA,sc10x.temp[[i]][,1],n=1000)
plots.s[[j]] <- ggplotGrob(ggplot(data.frame(cbind(sc10x.temp$nFeature_RNA,sc10x.temp[[i]][,1])))+geom_point(aes(x=X1,y=X2,color=densities.s[[j]]),size=0.1)+scale_x_continuous(limits=c(0,max.ct))+scale_y_continuous(limits=c(0,max.y))+scale_color_viridis(option="inferno")+labs(x="nFeature_RNA",y=i,color="Density")+ggtitle(j)+cowplot::theme_cowplot()+theme(plot.title=element_text(size=7.5),axis.title=element_text(size=7.5),axis.text=element_text(size=5,angle=45),legend.position="bottom",legend.title=element_text(size=5,face="bold",vjust=1),legend.text=element_text(size=5,angle=45))+guides(color=guide_colourbar(barwidth=5,barheight=0.5))+geom_hline(yintercept=cutoff.l,size=0.1,color="red")+geom_hline(yintercept=cutoff.h,size=0.1,color="red"))
}
cells.remove[[j]] <- c(cells.remove[[j]],rownames(sc10x[[j]][[i]])[sc10x[[j]][[i]][,1] < cutoff.l | sc10x[[j]][[i]][,1] > cutoff.h])
}
ggsave(paste0("./analysis/qc/Violin_qc.raw.",i,".",j,".eps"),plot=plots.v[[j]])
if (i %in% c("nFeature_RNA","percent.mito")){
if (i %in% c("nFeature_RNA","percent.mito","percent.ribo","nCount_RNA")){
ggsave(paste0("./analysis/qc/Plot_qc.raw.",i,".",j,".eps"),plot=plots.s[[j]])
}
}
......@@ -308,6 +343,7 @@ scQC <- function(sc10x,sp="hu"){
max.ct <- 0
max.ft <- 0
max.mt <- 0
max.rb <- 0
for (i in names(sc10x)){
if (max.ct < max(sc10x.sub[[i]][["nCount_RNA"]])){
max.ct <- max(sc10x.sub[[i]][["nCount_RNA"]])
......@@ -318,11 +354,15 @@ scQC <- function(sc10x,sp="hu"){
if (max.mt < max(sc10x.sub[[i]][["percent.mito"]])){
max.mt <- max(sc10x.sub[[i]][["percent.mito"]])
}
if (max.rb < max(sc10x.sub[[i]][["percent.ribo"]])){
max.rb <- max(sc10x.sub[[i]][["percent.ribo"]])
}
}
max.ct <- max.ct*1.1
max.ft <- max.ft*1.1
max.mt <- max.mt*1.1
for (i in c("nCount_RNA","nFeature_RNA","percent.mito")){
max.rb <- max.rb*1.1
for (i in feature){
max.y <- 0
if (i == "nCount_RNA"){
max.y <- max.ct
......@@ -330,6 +370,8 @@ scQC <- function(sc10x,sp="hu"){
max.y <- max.ft
} else if (i == "percent.mito"){
max.y <- max.mt
} else if (i == "percent.ribo"){
max.y <- max.rb
}
plots.v <- list()
densities.s <- list()
......@@ -337,12 +379,15 @@ scQC <- function(sc10x,sp="hu"){
for (j in names(sc10x.sub)){
sc10x.temp <- sc10x.sub[[j]]
plots.v[[j]] <- VlnPlot(object=sc10x.temp,features=i,pt.size=0.1,)+scale_x_discrete(labels=j)+scale_y_continuous(limits=c(0,max.y))+theme(legend.position="none",axis.text.x=element_text(hjust=0.5,angle=0))
if (i %in% c("nFeature_RNA","percent.mito")){
if (i != "nCount_RNA"){
densities.s[[j]] <- density(sc10x.temp$nCount_RNA,sc10x.temp[[i]][,1],n=1000)
plots.s[[j]] <- ggplotGrob(ggplot(data.frame(cbind(sc10x.temp$nCount_RNA,sc10x.temp[[i]][,1])))+geom_point(aes(x=X1,y=X2,color=densities.s[[j]]),size=0.1)+scale_x_continuous(limits=c(0,max.ct))+scale_y_continuous(limits=c(0,max.y))+scale_color_viridis(option="inferno")+labs(x="nCount",y=i,color="Density")+ggtitle(j)+cowplot::theme_cowplot()+theme(plot.title=element_text(size=7.5),axis.title=element_text(size=7.5),axis.text=element_text(size=5,angle=45),legend.position="bottom",legend.title=element_text(size=5,face="bold",vjust=1),legend.text=element_text(size=5,angle=45))+guides(color=guide_colourbar(barwidth=5,barheight=0.5)))
plots.s[[j]] <- ggplotGrob(ggplot(data.frame(cbind(sc10x.temp$nCount_RNA,sc10x.temp[[i]][,1])))+geom_point(aes(x=X1,y=X2,color=densities.s[[j]]),size=0.1)+scale_x_continuous(limits=c(0,max.ct))+scale_y_continuous(limits=c(0,max.y))+scale_color_viridis(option="inferno")+labs(x="nCount_RNA",y=i,color="Density")+ggtitle(j)+cowplot::theme_cowplot()+theme(plot.title=element_text(size=7.5),axis.title=element_text(size=7.5),axis.text=element_text(size=5,angle=45),legend.position="bottom",legend.title=element_text(size=5,face="bold",vjust=1),legend.text=element_text(size=5,angle=45))+guides(color=guide_colourbar(barwidth=5,barheight=0.5)))
} else {
densities.s[[j]] <- density(sc10x.temp$nFeature_RNA,sc10x.temp[[i]][,1],n=1000)
plots.s[[j]] <- ggplotGrob(ggplot(data.frame(cbind(sc10x.temp$nFeature_RNA,sc10x.temp[[i]][,1])))+geom_point(aes(x=X1,y=X2,color=densities.s[[j]]),size=0.1)+scale_x_continuous(limits=c(0,max.ct))+scale_y_continuous(limits=c(0,max.y))+scale_color_viridis(option="inferno")+labs(x="nFeature_RNA",y=i,color="Density")+ggtitle(j)+cowplot::theme_cowplot()+theme(plot.title=element_text(size=7.5),axis.title=element_text(size=7.5),axis.text=element_text(size=5,angle=45),legend.position="bottom",legend.title=element_text(size=5,face="bold",vjust=1),legend.text=element_text(size=5,angle=45))+guides(color=guide_colourbar(barwidth=5,barheight=0.5)))
}
ggsave(paste0("./analysis/qc/Violin_qc.filtered.",i,".",j,".eps"),plot=plots.v[[j]])
if (i %in% c("nFeature_RNA","percent.mito")){
if (i %in% c("nFeature_RNA","percent.mito","percent.ribo","nCount_RNA")){
ggsave(paste0("./analysis/qc/Plot_qc.filtered.",i,".",j,".eps"),plot=plots.s[[j]])
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment