Commit 447e05b8 authored by Gervaise H. Henry's avatar Gervaise H. Henry 🤠

Fix QC filter

parent 46b71b47
......@@ -44,13 +44,10 @@ rm(results)
sc10x <- sc10x[as.character(unlist(sc10x.groups[sc10x.groups$ALL==1,1]))]
#results <- scQC(sc10x,sp=opt$s,feature=c("nFeature_RNA","percent.mito","nCount_RNA"))
results <- scQC(sc10x,sp=opt$s,feature=c("nFeature_RNA"))
sc10x <- results[[1]]
counts.cell.raw <- results[[2]]
counts.gene.raw <- results[[3]]
#counts.cell.filtered <- results[[4]]
#counts.gene.filtered <- results[[5]]
counts.cell.filtered.1umi <- results[[4]]
counts.gene.filtered.1umi <- results[[5]]
rm(results)
......
......@@ -204,61 +204,48 @@ scQC <- function(sc10x,sp="hu",feature="nFeature_RNA"){
#Calculate cutoffs
thresh <- list()
#h <- list()
#cells.remove <- list()
#sc10x.temp <- list()
for (i in feature){
if (i == "nFeature_RNA"){
h <- list()
cells.remove <- list()
sc10x.temp <- list()
for (j in names(sc10x)){
h[[i]] <- hist(data.frame(sc10x[[j]][[i]])$nFeature_RNA,breaks=10,plot=FALSE)
cutoff.temp <- mean(c(h[[i]]$mids[which.max(h[[i]]$counts)],h[[i]]$mids[-which.max(h[[i]]$counts)][which.max(h[[i]]$counts[-which.max(h[[i]]$counts)])]))
cells.remove[[j]] <- c(cells.remove[[j]],rownames(sc10x[[j]][["nFeature_RNA"]])[sc10x[[j]][[i]][,1] < cutoff.temp])
sc10x.temp[[j]] <- subset(sc10x[[j]],cells=setdiff(colnames(sc10x[[j]]),cells.remove[[j]]))
h <- NULL
cutoff.temp <- NULL
cells.remove <- NULL
h <- hist(data.frame(sc10x[[j]][[i]])$nFeature_RNA,breaks=10,plot=FALSE)
cutoff.temp <- mean(c(h$mids[which.max(h$counts)],h$mids[-which.max(h$counts)][which.max(h$counts[-which.max(h$counts)])]))
cells.remove <- c(cells.remove,rownames(sc10x[[j]][["nFeature_RNA"]])[sc10x[[j]][[i]][,1] < cutoff.temp])
sc10x.temp[[j]] <- subset(sc10x[[j]],cells=cells.remove,invert=TRUE)
}
thresh[[i]] <- scThresh(sc10x.temp,feature=i)
thresh[[i]] <- scThresh(sc10x.temp,feature=i,sub="higher")
}
if (i == "percent.mito"){
thresh[[i]] <- scThresh(sc10x,feature=i)
h <- list()
cells.remove <- list()
sc10x.temp <- list()
thresh.l <- list()
cutoff.l.mito <- list()
for (j in names(sc10x)){
cutoff.l.mito[[j]] <- NULL
temp <- data.frame(sc10x[[j]][[i]])$percent.mito
h[[i]] <- hist(temp,breaks=100,plot=FALSE)
cutoff.temp <- mean(c(h[[i]]$mids[which.max(h[[i]]$counts)],h[[i]]$mids[-which.max(h[[i]]$counts)][which.max(h[[i]]$counts[-which.max(h[[i]]$counts)])]))
cells.remove[[j]] <- c(cells.remove[[j]],rownames(sc10x[[j]][["percent.mito"]])[sc10x[[j]][[i]][,1] > cutoff.temp])
sc10x.temp[[j]] <- subset(sc10x[[j]],cells=setdiff(colnames(sc10x[[j]]),cells.remove[[j]]))
thresh.l[[i]] <- scThresh(sc10x.temp,feature=i,sub="lower")
#cutoff.l.mito[[j]] <- thresh.l[[i]][[j]]$threshold[thresh.l[[i]][[j]]$method=="Triangle"]
cutoff.l.mito[[j]] <- thresh.l[[i]][[j]]$threshold[thresh.l[[i]][[j]]$method=="RenyiEntropy"]
h <- NULL
cutoff.temp <- NULL
cells.remove <- NULL
h <- hist(data.frame(sc10x[[j]][[i]])$percent.mito,breaks=100,plot=FALSE)
cutoff.temp <- mean(c(h$mids[which.max(h$counts)],h$mids[-which.max(h$counts)][which.max(h$counts[-which.max(h$counts)])]))
cells.remove <- c(cells.remove,rownames(sc10x[[j]][["percent.mito"]])[sc10x[[j]][[i]][,1] < cutoff.temp])
sc10x.temp[[j]] <- subset(sc10x[[j]],cells=cells.remove,invert=TRUE)
}
thresh[[i]] <- scThresh(sc10x.temp,feature=i,sub="higher")
}
if (i == "percent.ribo"){
thresh[[i]] <- scThresh(sc10x,feature=i)
thresh[[i]] <- scThresh(sc10x,feature=i,sub="all")
}
if (i == "nCount_RNA"){
thresh[[i]] <- scThresh(sc10x,feature=i)
h <- list()
cells.remove <- list()
sc10x.temp <- list()
thresh.l <- list()
cutoff.l.count <- list()
for (j in names(sc10x)){
cutoff.l.count[[j]] <- NULL
temp <- data.frame(sc10x[[j]][[i]])$nCount_RNA
h[[i]] <- hist(temp,breaks=seq(min(temp),max(temp),length.out=6),plot=FALSE)
cutoff.temp <- mean(c(h[[i]]$mids[which.max(h[[i]]$counts)],h[[i]]$mids[-which.max(h[[i]]$counts)][which.max(h[[i]]$counts[-which.max(h[[i]]$counts)])]))
cells.remove[[j]] <- c(cells.remove[[j]],rownames(sc10x[[j]][["nCount_RNA"]])[sc10x[[j]][[i]][,1] > cutoff.temp])
sc10x.temp[[j]] <- subset(sc10x[[j]],cells=setdiff(colnames(sc10x[[j]]),cells.remove[[j]]))
thresh.l[[i]] <- scThresh(sc10x.temp,feature=i,sub="lower")
cutoff.l.count[[j]] <- thresh.l[[i]][[j]]$threshold[thresh.l[[i]][[j]]$method=="MinErrorI"]
h <- NULL
cutoff.temp <- NULL
cells.remove <- NULL
h <- hist(data.frame(sc10x[[j]][[i]])$nCount_RNA,breaks=100,plot=FALSE)
cutoff.temp <- mean(c(h$mids[which.max(h$counts)],h$mids[-which.max(h$counts)][which.max(h$counts[-which.max(h$counts)])]))
cells.remove <- c(cells.remove,rownames(sc10x[[j]][["nCount_RNA"]])[sc10x[[j]][[i]][,1] < cutoff.temp])
sc10x.temp[[j]] <- subset(sc10x[[j]],cells=cells.remove,invert=TRUE)
}
thresh[[i]] <- scThresh(sc10x.temp,feature=i,sub="lower")
}
}
......@@ -300,35 +287,23 @@ scQC <- function(sc10x,sp="hu",feature="nFeature_RNA"){
plots.v <- list()
densities.s <- list()
plots.s <- list()
sc10x.temp <- NULL
for (j in names(sc10x)){
sc10x.temp <- sc10x[[j]]
plots.v[[j]] <- VlnPlot(object=sc10x.temp,features=i,pt.size=0.1,)+scale_x_discrete(labels=j)+scale_y_continuous(limits=c(0,max.y))+theme(legend.position="none",axis.text.x=element_text(hjust=0.5,angle=0))
if (i %in% c("nFeature_RNA","percent.mito","percent.ribo","nCount_RNA")){
if (i == "nFeature_RNA"){
#cutoff.l <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="MinErrorI"]
cutoff.h <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="RenyiEntropy"]
cutoff.l <- 200
#cutoff.h <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="Huang2"]
} else if (i == "percent.mito") {
h <- list()
cells.remove <- list()
sc10x.temps <- list()
thresh.h <- list()
temp <- data.frame(sc10x[[j]][[i]])$percent.mito
h[[i]] <- hist(temp,breaks=100,plot=FALSE)
cutoff.temp <- mean(c(h[[i]]$mids[which.max(h[[i]]$counts)],h[[i]]$mids[-which.max(h[[i]]$counts)][which.max(h[[i]]$counts[-which.max(h[[i]]$counts)])]))
cells.remove[[j]] <- c(cells.remove[[j]],rownames(sc10x[[j]][["percent.mito"]])[sc10x[[j]][[i]][,1] < cutoff.temp])
sc10x.temps[[j]] <- subset(sc10x[[j]],cells=setdiff(colnames(sc10x[[j]]),cells.remove[[j]]))
thresh.h[[i]] <- scThresh(sc10x.temps,feature=i,sub="higher")
cutoff.h <- thresh.h[[i]][[j]]$threshold[thresh.h[[i]][[j]]$method=="Triangle"]
cutoff.h <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="Triangle"]
cutoff.l <- 0
} else if (i == "percent.ribo") {
cutoff.h <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="RenyiEntropy"]
cutoff.l <- 0
} else if (i == "nCount_RNA") {
#cutoff.l <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="MinErrorI"]
cutoff.l <- cutoff.l.count[[j]]
cutoff.h <- max(sc10x[[j]][[i]])
cutoff.l <- thresh[[i]][[j]]$threshold[thresh[[i]][[j]]$method=="MinErrorI"]
}
plots.v[[j]] <- plots.v[[j]]+geom_hline(yintercept=cutoff.l,size=0.5,color="red")+geom_hline(yintercept=cutoff.h,size=0.5,color="red")
if (i != "nCount_RNA"){
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment