diff --git a/bash.scripts/sc_TissueMapper-DS_D17.sh b/bash.scripts/sc_TissueMapper-DS_D17.sh new file mode 100644 index 0000000000000000000000000000000000000000..198685b1bb589757f0f23c097dea4d6f9d7f4a8b --- /dev/null +++ b/bash.scripts/sc_TissueMapper-DS_D17.sh @@ -0,0 +1,13 @@ +#!/bin/bash +#SBATCH --job-name R_FullAnalysis +#SBATCH -p 256GB,256GBv1 +#SBATCH -N 1 +#SBATCH -t 7-0:0:0 +#SBATCH -o job_%j.out +#SBATCH -e job_%j.out +#SBATCH --mail-type ALL +#SBATCH --mail-user gervaise.henry@utsouthwestern.edu + +module load R/3.4.1-gccmkl + +Rscript ../r.scripts/sc-TissueMapper_RUN.DS_D17.R diff --git a/r.scripts/sc-TissueMapper.R b/r.scripts/sc-TissueMapper.R index 53b6d5874a9435b026be315a74d9c02dea0805bc..196bb868929a39fa62e848ef433c1fd63dcb4cd5 100644 --- a/r.scripts/sc-TissueMapper.R +++ b/r.scripts/sc-TissueMapper.R @@ -103,14 +103,17 @@ scCellCycle <- function(sc10x,sub=FALSE){ sc10x <- ScaleData(object=sc10x,display.progress=FALSE,do.par=TRUE,num.cores=45) sc10x <- CellCycleScoring(object=sc10x,s.genes=genes.s,g2m.genes=genes.g2m,set.ident=TRUE) - postscript(paste0(folder,"Ridge_cc.Raw.eps")) - plot <- RidgePlot(object=sc10x,features.plot=c("PCNA","TOP2A","MCM6","MKI67"),y.log=TRUE,nCol=2,do.return=TRUE) - plot(plot) - dev.off() - postscript(paste0(folder,"Violin_cc.Raw.eps")) - plot <- VlnPlot(object=sc10x,features.plot=c("PCNA","TOP2A","MCM6","MKI67"),nCol=2,point.size.use=1,size.title.use=20,x.lab.rot=TRUE) - plot(plot) - dev.off() + tryCatch({ + postscript(paste0(folder,"Ridge_cc.Raw.eps")) + plot <- RidgePlot(object=sc10x,features.plot=c("PCNA","TOP2A","MCM6","MKI67"),y.log=TRUE,nCol=2,do.return=TRUE) + plot(plot) + dev.off() + postscript(paste0(folder,"Violin_cc.Raw.eps")) + plot <- VlnPlot(object=sc10x,features.plot=c("PCNA","TOP2A","MCM6","MKI67"),nCol=2,point.size.use=1,size.title.use=20,x.lab.rot=TRUE) + plot(plot) + dev.off() + },error=function(e){cat("ERROR : ",conditionMessage(e),"/\n")}) + sc10x <- RunPCA(object=sc10x,pc.genes=c(genes.s,genes.g2m),do.print=FALSE,pcs.store=2) postscript(paste0(folder,"PCA_cc.Raw.eps")) plot <- PCAPlot(object=sc10x,do.return=TRUE) @@ -401,7 +404,7 @@ scStress <- function(sc10x,stg="go",res.use=1,pc.use=10,cut=0.95){ dev.off() #Subsample all cells (+Stress) to better visualize their clustering - if (ncol(sc10x@data)<2500){ + if (ncol(sc10x@data)>2500){ rnd <- sample(1:ncol(sc10x@data),2500) } else { rnd <- 1:ncol(sc10x@data) @@ -898,7 +901,7 @@ scNE <- function(sc10x,neg="EurUro",cut=0.95){ dev.off() #Subsample all cells (+NE) to better visualize their clustering - if (ncol(sc10x@data)<2500){ + if (ncol(sc10x@data)>2500){ rnd <- sample(1:ncol(sc10x@data),2500) } else { rnd <- 1:ncol(sc10x@data) diff --git a/r.scripts/sc-TissueMapper_RUN.DS_D17.R b/r.scripts/sc-TissueMapper_RUN.DS_D17.R new file mode 100644 index 0000000000000000000000000000000000000000..b1766dc054b9bec797537e88a4d194604f7aa1d7 --- /dev/null +++ b/r.scripts/sc-TissueMapper_RUN.DS_D17.R @@ -0,0 +1,356 @@ +gc() +library(methods) +library(optparse) +library(Seurat) +library(readr) +library(fBasics) +library(pastecs) +library(qusage) +library(RColorBrewer) +library(monocle) +library(dplyr) +library(viridis) +library(reshape2) +library(NMI) + +source("../r.scripts/sc-TissueMapper.R") + +#Create folder structure +setwd("../") +if (!dir.exists("./analysis")){ + dir.create("./analysis") +} +if (!dir.exists("./analysis/qc")){ + dir.create("./analysis/qc") +} +if (!dir.exists("./analysis/qc/cc")){ + dir.create("./analysis/qc/cc") +} +if (!dir.exists("./analysis/tSNE")){ + dir.create("./analysis/tSNE") +} +if (!dir.exists("./analysis/tSNE/pre.stress")){ + dir.create("./analysis/tSNE/pre.stress") +} +if (!dir.exists("./analysis/pca")){ + dir.create("./analysis/pca") +} +if (!dir.exists("./analysis/pca/stress")){ + dir.create("./analysis/pca/stress") +} +if (!dir.exists("./analysis/violin")){ + dir.create("./analysis/violin") +} +if (!dir.exists("./analysis/violin/stress")){ + dir.create("./analysis/violin/stress") +} +if (!dir.exists("./analysis/table")){ + dir.create("./analysis/table") +} +if (!dir.exists("./analysis/tSNE/post.stress")){ + dir.create("./analysis/tSNE/post.stress") +} +if (!dir.exists("./analysis/cor")){ + dir.create("./analysis/cor") +} +if (!dir.exists("./analysis/tSNE/lin")){ + dir.create("./analysis/tSNE/lin") +} +if (!dir.exists("./analysis/tSNE/epi")){ + dir.create("./analysis/tSNE/epi") +} +if (!dir.exists("./analysis/tSNE/st")){ + dir.create("./analysis/tSNE/st") +} +if (!dir.exists("./analysis/tSNE/merge")){ + dir.create("./analysis/tSNE/merge") +} +if (!dir.exists("./analysis/pca/ne")){ + dir.create("./analysis/pca/ne") +} +if (!dir.exists("./analysis/tSNE/ne")){ + dir.create("./analysis/tSNE/ne") +} +if (!dir.exists("./analysis/violin/ne")){ + dir.create("./analysis/violin/ne") +} +if (!dir.exists("./analysis/tSNE/FINAL")){ + dir.create("./analysis/tSNE/FINAL") +} +if (!dir.exists("./analysis/deg")){ + dir.create("./analysis/deg") +} +if (!dir.exists("./analysis/cca")){ + dir.create("./analysis/cca") +} +if (!dir.exists("./analysis/diy")){ + dir.create("./analysis/diy") +} +if (!dir.exists("./analysis/pseudotime")){ + dir.create("./analysis/pseudotime") +} + +#Retrieve command-line options +option_list=list( + make_option("--p",action="store",default="DPrF",type='character',help="Project Name"), + make_option("--g",action="store",default="ALL",type='character',help="Group To analyze"), + make_option("--lg",action="store",default=0,type='integer',help="Threshold for cells with minimum genes"), + make_option("--hg",action="store",default=3000,type='integer',help="Threshold for cells with maximum genes"), + make_option("--lm",action="store",default=0,type='numeric',help="Threshold for cells with minimum %mito genes"), + make_option("--hm",action="store",default=0.1,type='numeric',help="Threshold for cells with maximum %mito genes"), + make_option("--lx",action="store",default=0.2,type='numeric',help="x low threshold for hvg selection"), + make_option("--hx",action="store",default=5,type='numeric',help="x high threshold for hvg selection"), + make_option("--ly",action="store",default=1,type='numeric',help="y low threshold for hvg selection"), + make_option("--cc",action="store",default=TRUE,type='logical',help="Scale cell cycle?"), + make_option("--cca",action="store",default=50,type='integer',help="Number of CCAs to cacluate"), + make_option("--acca",action="store",default=30,type='integer',help="Number of CCAs to align"), + make_option("--pc",action="store",default=50,type='integer',help="Number of PCs to cacluate"), + make_option("--res.prestress",action="store",default=1,type='numeric',help="Resolution to cluster, pre-stress"), + make_option("--st",action="store",default=TRUE,type='logical',help="Remove stressed cells?"), + make_option("--stg",action="store",default="dws",type='character',help="Geneset to use for stress ID"), + make_option("--cut.stress",action="store",default=0.9,type='numeric',help="Cutoff for stress score"), + make_option("--res.poststress",action="store",default=1,type='numeric',help="Resolution to cluster, post-stress"), + make_option("--cut.ne",action="store",default=0.999,type='numeric',help="Cutoff for NE score") +) +opt=parse_args(OptionParser(option_list=option_list)) +rm(option_list) +if (opt$lg==0){opt$lg=-Inf} +if (opt$lm==0){opt$lm=-Inf} + +sc10x.data <- Read10X(data.dir="./analysis/DATA/10x/filtered_gene_bc_matrices/GRCh38/") +sc10x <- new("seurat",raw.data=sc10x.data) +cell.codes <- as.data.frame(sc10x@raw.data@Dimnames[[2]]) +colnames(cell.codes) <- "barcodes" +rownames(cell.codes) <- cell.codes$barcodes +cell.codes$samples <- "All" +sc10x <- CreateSeuratObject(raw.data=sc10x.data,meta.data=cell.codes["samples"],min.cells=3,min.genes=-Inf,project="DS.D17") +rm(cell.codes) +rm(sc10x.data) + +if (opt$cc==TRUE){ + results <- scCellCycle(sc10x) + sc10x <- results[[1]] + genes.s <- results[[2]] + genes.g2m <- results[[3]] + rm(results) +} else { + genes.s="" + genes.g2m="" +} + +results <- scQC(sc10x,lg=opt$lg,hg=opt$hg,lm=opt$lm,hm=opt$hm) +sc10x <- results[[1]] +counts.cell.raw <- results[[2]] +counts.gene.raw <- results[[3]] +counts.cell.filtered <- results[[4]] +counts.gene.filtered <- results[[5]] +rm(results) + +gc() +if (opt$cc==TRUE){ + sc10x <- ScaleData(object=sc10x,vars.to.regress=c("nUMI","percent.mito","S.Score","G2M.Score"),display.progress=FALSE,do.par=TRUE,num.cores=45) +} else { + sc10x <- ScaleData(object=sc10x,vars.to.regress=c("nUMI","percent.mito"),display.progress=FALSE,do.par=TRUE,num.cores=45) +} +gc() + +results <- scPC(sc10x,lx=opt$lx,hx=opt$hx,ly=opt$ly,cc=opt$cc,pc=50,hpc=0.85,file="pre.stress",cca=FALSE) +sc10x <- results[[1]] +genes.hvg.prestress <- results[[2]] +pc.use.prestress <- results[[3]] +rm(results) + +sc10x <- scCluster(sc10x,pc.use=pc.use.prestress,res.use=opt$res.prestress,folder="pre.stress",red="pca") + +if (opt$st==TRUE){ + results <- scStress(sc10x,stg=opt$stg,res.use=opt$res.prestress,cut=opt$cut.stress) + sc10x <- results[[1]] + counts.cell.filtered.stress <- results[[2]] + sc10x.Stress <- results[[3]] + rm(results) + + results <- scPC(sc10x,lx=opt$lx,hx=opt$hx,ly=opt$ly,cc=opt$cc,pc=50,hpc=0.85,file="post.stress",cca=FALSE) + sc10x <- results[[1]] + genes.hvg.poststress <- results[[2]] + pc.use.poststress <- results[[3]] + rm(results) + + sc10x <- scCluster(sc10x,pc.use=pc.use.poststress,res.use=opt$res.poststress,folder="post.stress",red="pca") +} + +gene.set1 <- read_delim("./genesets/genes.deg.Epi.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +gene.set1 <- gene.set1[1] +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "Epi" +gene.set <- c(gene.set1) +gene.set1 <- read_delim("./genesets/genes.deg.St.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +gene.set1 <- gene.set1[1] +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "St" +gene.set <- c(gene.set,gene.set1) +rm(gene.set1) +gc() +min.all <- min(table(sc10x@meta.data[,paste0("res",opt$res.poststress)])) +results <- scQuSAGE(sc10x,gs=gene.set,res.use=opt$res.poststress,ds=min.all,nm="Lin",folder="lin") +sc10x <- results[[1]] +results.cor.Lin <- results[[2]] +results.clust.Lin.id <- results[[3]] +rm(results) +rm(gene.set) + +sc10x <- SetAllIdent(object=sc10x,id="Lin") +sc10x.Epi <- scSubset(sc10x,i="Lin",g="Epi") +if (any(levels(sc10x@ident)=="Unknown")){ + sc10x.St <- scSubset(sc10x,i="Lin",g=c("St","Unknown")) +} else { + sc10x.St <- scSubset(sc10x,i="Lin",g="St") +} +sc10x.Epi <- SetAllIdent(object=sc10x.Epi,id=paste0("res",opt$res.poststress)) +sc10x.Epi <- BuildClusterTree(sc10x.Epi,do.reorder=TRUE,reorder.numeric=TRUE,do.plot=FALSE) +sc10x.Epi <- StashIdent(object=sc10x.Epi,save.name=paste0("res",opt$res.poststress)) +sc10x.St <- SetAllIdent(object=sc10x.St,id=paste0("res",opt$res.poststress)) +sc10x.St <- BuildClusterTree(sc10x.St,do.reorder=TRUE,reorder.numeric=TRUE,do.plot=FALSE) +sc10x.St <- StashIdent(object=sc10x.St,save.name=paste0("res",opt$res.poststress)) + +sc10x.Epi <- RunTSNE(object=sc10x.Epi,reduction.use="pca",dims.use=1:pc.use.poststress,do.fast=TRUE) +postscript(paste0("./analysis/tSNE/epi/tSNE_Sample.eps")) +plot <- TSNEPlot(object=sc10x.Epi,group.by="samples",pt.size=2.5,do.return=TRUE,vector.friendly=FALSE) +plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) +plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) +plot(plot) +dev.off() +postscript(paste0("./analysis/tSNE/epi/tSNE_res",opt$res.poststress,".eps")) +plot <- TSNEPlot(object=sc10x.Epi,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE,vector.friendly=FALSE) +plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) +plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) +plot(plot) +dev.off() +rm(plot) + +sc10x.St <- RunTSNE(object=sc10x.St,reduction.use="pca",dims.use=1:pc.use.poststress,do.fast=TRUE) +postscript(paste0("./analysis/tSNE/st/tSNE_Sample.eps")) +plot <- TSNEPlot(object=sc10x.St,group.by="samples",pt.size=2.5,do.return=TRUE,vector.friendly=FALSE) +plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) +plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) +plot(plot) +dev.off() +postscript(paste0("./analysis/tSNE/st/tSNE_res",opt$res.poststress,".eps")) +plot <- TSNEPlot(object=sc10x.St,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE,vector.friendly=FALSE) +plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) +plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) +plot(plot) +dev.off() +rm(plot) + +gene.set1 <- read_delim("./genesets/genes.deg.BE.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +gene.set1 <- gene.set1[1] +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "BE" +gene.set <- c(gene.set1) +gene.set1 <- read_delim("./genesets/genes.deg.LE.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +gene.set1 <- gene.set1[1] +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "LE" +gene.set <- c(gene.set,gene.set1) +gene.set1 <- read_delim("./genesets/genes.deg.OE1.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +gene.set1 <- gene.set1[1] +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "OE_SCGB" +gene.set <- c(gene.set,gene.set1) +gene.set1 <- read_delim("./genesets/genes.deg.OE2.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +gene.set1 <- gene.set1[1] +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "OE_KRT13" +gene.set <- c(gene.set,gene.set1) +rm(gene.set1) +gc() +min.epi <- min(table(sc10x.Epi@meta.data[,paste0("res",opt$res.poststress)])) +results <- scQuSAGE(sc10x.Epi,gs=gene.set,res.use=opt$res.poststress,ds=min.epi,nm="Epi.dws.sc",folder="epi") +sc10x.Epi <- results[[1]] +results.cor.Epi.dws <- results[[2]] +results.clust.Epi.dws.id <- results[[3]] +rm(results) +rm(gene.set) + +gene.set1 <- read_delim("./genesets/genes.deg.Endo.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +gene.set1 <- gene.set1[1] +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "Endo" +gene.set <- c(gene.set1) +gene.set1 <- read_delim("./genesets/genes.deg.SM.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +gene.set1 <- gene.set1[1] +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "SM" +gene.set <- c(gene.set,gene.set1) +gene.set1 <- read_delim("./genesets/genes.deg.Fib.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +gene.set1 <- gene.set1[1] +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "Fib" +gene.set <- c(gene.set,gene.set1) +gene.set1 <- read_delim("./genesets/genes.deg.Leu.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +gene.set1 <- gene.set1[1] +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "Leu" +gene.set <- c(gene.set,gene.set1) +rm(gene.set1) +gc() +min.st <- min(table(sc10x.St@meta.data[,paste0("res",opt$res.poststress)])) +results <- scQuSAGE(sc10x.St,gs=gene.set,res.use=opt$res.poststress,ds=min.st,nm="St.dws.sc",folder="st") +sc10x.St <- results[[1]] +results.cor.St.go <- results[[2]] +results.clust.St.go.id <- results[[3]] +rm(results) +rm(gene.set) + +sc10x.Epi.NE <- scNE(sc10x.Epi,neg="dws",cut=opt$cut.ne) + +sc10x <- scMerge(sc10x,sc10x.Epi,sc10x.St,i.1="Epi.dws.sc",i.2="St.dws.sc",nm="Merge_Epi.dws.sc_St.dws.sc") + +sc10x <- SetAllIdent(object=sc10x,id="Merge_Epi.dws.sc_St.dws.sc") +sc10x <- SetAllIdent(object=sc10x,id="Merge_Epi.dws.sc_St.dws.sc") +sc10x@ident <- factor(sc10x@ident,levels=c("BE","LE","OE_SCGB","OE_KRT13","Fib","SM","Endo","Leu")) +postscript("./analysis/tSNE/FINAL/tSNE_FINAL.eps") +plot <- TSNEPlot(object=sc10x,pt.size=2.5,do.return=TRUE,vector.friendly=FALSE) +plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) +plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) +plot(plot) +dev.off() + +scTables(sc10x,i.1="samples",i.2="Merge_Epi.dws.sc_St.dws.sc") + +sctSNECustCol(sc10x,i="Lin",bl="Epi",rd="St",file="D17") +sctSNECustCol(sc10x,i="Merge_Epi.dws.sc_St.dws.sc",bl=c("BE","LE","OE_SCGB","OE_KRT13"),rd=c("Fib","SM","Endo","Leu"),file="D17") +sctSNECustCol(sc10x.Epi,i="Epi.dws.sc",bl=c("BE","LE","OE_SCGB","OE_KRT13"),rd="",file="D17") +sctSNECustCol(sc10x.St,i="St.dws.sc",bl="",rd=c("Fib","SM","Endo","Leu"),file="D17") + +sctSNEbwCol(sc10x,i=paste0("res",opt$res.poststress),file="ALL",files="D17") +sctSNEbwCol(sc10x.Epi,i=paste0("res",opt$res.poststress),file="Epi",files="D17") +sctSNEbwCol(sc10x.St,i=paste0("res",opt$res.poststress),file="St",files="D17") +sctSNEbwCol(sc10x,i="Merge_Epi.dws.sc_St.dws.sc",file="ALL",files="D17") +sctSNEbwCol(sc10x.Epi,i="Epi.dws.sc",file="Epi",files="D17") +sctSNEbwCol(sc10x.St,i="St.dws.sc",file="St",files="D17") + +for (g in c("Epi","St","Unknown")){ + sctSNEHighlight(sc10x,i="Lin",g=g,file="D17") +} +for (g in c("BE","LE","OE_SCGB","OE_KRT13")){ + sctSNEHighlight(sc10x,i="Merge_Epi.dws.sc_St.dws.sc",g=g,file="D17") + sctSNEHighlight(sc10x.Epi,i="Epi.dws.c",g=g,file="D17") +} +for (g in c("Fib","SM","Endo","Leu")){ + sctSNEHighlight(sc10x,i="Merge_Epi.dws.sc_St.dws.sc",g=g,file="D17") + sctSNEHighlight(sc10x.St,i="St.dws.sc",g=g,file="D17") +} +rm(i) +rm(g) + + +save(list=ls(pattern="sc10x.Stress"),file="./analysis/sc10x.Stress.Rda") +rm(list=ls(pattern="sc10x.Stress")) +save(list=ls(pattern="sc10x.Epi"),file="./analysis/sc10x.Epi.Rda") +rm(list=ls(pattern="^sc10x.Epi")) +save(list=ls(pattern="sc10x.St"),file="./analysis/sc10x.St.Rda") +rm(list=ls(pattern="sc10x.St")) +save(list=ls(pattern="^sc10x"),file="./analysis/sc10x.Rda") +rm(list=ls(pattern="^sc10x")) +save.image(file="./analysis/Data.RData") diff --git a/r.scripts/sc-TissueMapper_RUN.DS_D17.aggr.R b/r.scripts/sc-TissueMapper_RUN.DS_D17.aggr.R new file mode 100755 index 0000000000000000000000000000000000000000..cac02a21cb42514e98900e8a21a90ffc2f57dadb --- /dev/null +++ b/r.scripts/sc-TissueMapper_RUN.DS_D17.aggr.R @@ -0,0 +1,104 @@ +gc() +library(methods) +library(optparse) +library(Seurat) +library(readr) +library(fBasics) +library(pastecs) +library(qusage) +library(RColorBrewer) +library(monocle) +library(dplyr) +library(viridis) +library(reshape2) +library(NMI) + +source("../r.scripts/sc-TissueMapper.R") + +setwd("../") + +load("./analysis/sc10x.Rda") +sc10x.All <-sc10x +rm(sc10x) + +downsample <- c("All","350","300","250","200","150","100","075","050","037","025","012","007","005","002") +for (i in downsample[-1]){ + load(paste0("../../",i,"/sc-TissueMapper_Pr/analysis/sc10x.Rda")) + assign(paste0("sc10x.",i),sc10x) + rm(sc10x) +} + +all.cells <- NULL +for (i in downsample){ + all.cells <- c(all.cells,get(paste0("sc10x.",i))@data@Dimnames[[2]]) +} +all.cells <- unique(all.cells) + +shared.cells <- all.cells +shared.cells.no002<- all.cells +for (i in downsample){ + shared.cells <- intersect(shared.cells,get(paste0("sc10x.",i))@data@Dimnames[[2]]) + if (i != "002"){shared.cells <- intersect(shared.cells.no002,get(paste0("sc10x.",i))@data@Dimnames[[2]])} +} + +for (i in downsample){ + assign(paste0("sc10x.",i),SetAllIdent(get(paste0("sc10x.",i)),id="Merge_Epi.dws.sc_St.dws.sc")) + assign(paste0("cluster.",i),data.frame(Barcodes=names(get(paste0("sc10x.",i))@ident),Cluster=get(paste0("sc10x.",i))@ident)) + assign(paste0("cluster.",i,".filter"),get(paste0("cluster.",i))[get(paste0("cluster.",i))$Barcodes %in% sc10x.All@data@Dimnames[[2]],]) +} + +nmi <- data.frame(Sample=character(),value=double()) +for (i in downsample[-1]){ + nmi <- rbind(nmi,data.frame(Sample=i,value=NMI(cluster.All.filter,get(paste0("cluster.",i,".filter"))))) +} +nmi$Sample <- as.numeric(levels(nmi$Sample)) + +png(paste0("./analysis/NMI.png"),width=1000,height=500,type="cairo") +plot.nmi <- ggplot(nmi,aes(x=Sample,y=value))+geom_point()+geom_smooth(method='loess',formula=y~log(x))+labs(x="Sample (Million Reads)",y="NMI") +model.nmi <- loess(value~log(Sample),data=nmi) +fit.nmi.y <- 0.9 +fit.nmi.x <- approx(x=predict(model.nmi),y=nmi$Sample,xout=fit.nmi.y)$y +plot.nmi <- plot.nmi+geom_vline(xintercept=fit.nmi.x)+geom_hline(yintercept=fit.nmi.y) +plot(plot.nmi) +dev.off() + +for (i in downsample[-1]){ + assign(paste0("rpc.",i),read_csv(paste0("../../../../count/",i,"M_D17PrTzF_Via/outs/metrics_summary.csv"))[,2]) +} + +rpc <- data.frame(Sample=character(),value=double()) +for (i in downsample[-1]){ + rpc <- rbind(rpc,data.frame(Sample=i,value=get(paste0("rpc.",i)))) +} +colnames(rpc)[2] <- "value" +rpc$Sample <- as.numeric(levels(rpc$Sample)) + +png(paste0("./analysis/RPC.png"),width=1000,height=500,type="cairo") +plot.rpc <- ggplot(rpc,aes(x=Sample,y=value))+geom_point()+geom_smooth(method='lm',formula=y~x)+labs(x="Sample (Million Reads)",y="Mean Reads Per Cell") +model.rpc <- lm(value~Sample,data=rpc) +fit.rpc.y <- approx(x=rpc$Sample,y=predict(model.rpc),xout=fit.nmi.x)$y +plot.rpc <- plot.rpc+geom_vline(xintercept=fit.nmi.x)+geom_hline(yintercept=fit.rpc.y) +plot(plot.rpc) +dev.off() + +comb <- cbind(nmi,rpc[,2]) +colnames(comb) <- c("Sample","NMI","RPC") + +nmi.rpc <- merge(nmi,rpc,by="Sample") +nmi.rpc <- nmi.rpc[,-1] +colnames(nmi.rpc) <- c("NMI","RPC") +nmi.rpc$NMI <- round(as.numeric(nmi.rpc$NMI),2) + +postscript("./analysis/RPC+NMI.eps") +plot.comb <- ggplot(nmi.rpc,aes(x=RPC,y=NMI))+geom_point(colour="blue",size=4) +plot.comb <- plot.comb+geom_smooth(method='loess',formula=y~log(x),size=2) +model <- loess(NMI~RPC,data=nmi.rpc) +fit.y <- 0.9 +fit.x <- approx(y=nmi.rpc$RPC,x=predict(model),xout=fit.y)$y +plot.comb <- plot.comb+geom_vline(xintercept=fit.x,linetype=2,size=1.5)+geom_hline(yintercept=fit.y,linetype=2,size=1.5) +plot.comb <- plot.comb+labs(x="Mean Reads Per Cell",y="NMI") +plot.comb <- plot.comb+scale_x_continuous(expand=c(0,0),limits=c(0,80000),breaks=c(seq(0,100000,25000),round(fit.x,0)))+scale_y_continuous(expand=c(0,0),limits=c(0,1),breaks=c(seq(0,1,0.2),fit.y)) +plot(plot.comb) +dev.off() + +save.image(file="./analysis/NMI.RData") diff --git a/r.scripts/sc-TissueMapper_RUN.Pd.R b/r.scripts/sc-TissueMapper_RUN.Pd.R index 2b76a12cf783845896c1e4cf532c3380aea995b6..7772609db250c04cfb7c6511a9d7f3b344563a11 100644 --- a/r.scripts/sc-TissueMapper_RUN.Pd.R +++ b/r.scripts/sc-TissueMapper_RUN.Pd.R @@ -312,7 +312,7 @@ names(gene.set1) <- "Leu" gene.set <- c(gene.set,gene.set1) rm(gene.set1) gc() -min.st <- min(table(sc10x.Epi@meta.data[,paste0("res",opt$res.poststress)])) +min.st <- min(table(sc10x.St@meta.data[,paste0("res",opt$res.poststress)])) results <- scQuSAGE(sc10x.St,gs=gene.set,res.use=0.2,ds=min.st,nm="St.go",folder="st") sc10x.St <- results[[1]] results.cor.St.go <- results[[2]]