diff --git a/r.scripts/sc-TissueMapper.R b/r.scripts/sc-TissueMapper.R index ca0ec8280531dd794e3aabf69877fc2495e80670..b41269c02281921d0b9e34e25e848e67d7d2d7e6 100644 --- a/r.scripts/sc-TissueMapper.R +++ b/r.scripts/sc-TissueMapper.R @@ -75,7 +75,7 @@ scSubset <- function(sc10x,i="ALL",g="ALL"){ } -scCellCycle <- function(sc10x){ +scCellCycle <- function(sc10x,sub=FALSE){ #Runs Seurat based PCA analysis for cell cycle ID #Inputs: @@ -86,6 +86,15 @@ scCellCycle <- function(sc10x){ #results[2] = s genes #results[3] = g2m genes + #Make sub-folders if necessary + if (sub==FALSE){ + folder <- "./analysis/qc/cc/" + } else { + folder <- paste0("./analysis/qc/cc/",sub,"/") + if (!dir.exists(folder)){ + dir.create(folder) + }} + #score cell cycle and generate figures genes.cc <- readLines(con="./genesets/regev_lab_cell_cycle_genes.txt") genes.s <- genes.cc[1:43] @@ -94,16 +103,16 @@ scCellCycle <- function(sc10x){ sc10x <- ScaleData(object=sc10x,display.progress=FALSE,do.par=TRUE,num.cores=45) sc10x <- CellCycleScoring(object=sc10x,s.genes=genes.s,g2m.genes=genes.g2m,set.ident=TRUE) - postscript("./analysis/qc/cc/Ridge_cc.Raw.eps") + postscript(paste0(folder,"Ridge_cc.Raw.eps")) plot <- RidgePlot(object=sc10x,features.plot=c("PCNA","TOP2A","MCM6","MKI67"),y.log=TRUE,nCol=2,do.return=TRUE) plot(plot) dev.off() - postscript("./analysis/qc/cc/Violin_cc.Raw.eps") + postscript(paste0(folder,"Violin_cc.Raw.eps")) plot <- VlnPlot(object=sc10x,features.plot=c("PCNA","TOP2A","MCM6","MKI67"),nCol=2,point.size.use=1,size.title.use=20,x.lab.rot=TRUE) plot(plot) dev.off() sc10x <- RunPCA(object=sc10x,pc.genes=c(genes.s,genes.g2m),do.print=FALSE,pcs.store=2) - postscript("./analysis/qc/cc/PCA_cc.Raw.eps") + postscript(paste0(folder,"PCA_cc.Raw.eps")) plot <- PCAPlot(object=sc10x,do.return=TRUE) plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) @@ -113,7 +122,7 @@ scCellCycle <- function(sc10x){ sc10x <- ScaleData(object=sc10x,vars.to.regress=c("S.Score","G2M.Score"),display.progress=FALSE,do.par=TRUE,num.cores=45) gc() sc10x <- RunPCA(object=sc10x,pc.genes=c(genes.s,genes.g2m),do.print=FALSE,pcs.store=2) - postscript("./analysis/qc/cc/PCA_cc.Norm.eps") + postscript(paste0(folder,"PCA_cc.Norm.eps")) plot <- PCAPlot(object=sc10x,do.return=TRUE) plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20)) plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) @@ -129,7 +138,7 @@ scCellCycle <- function(sc10x){ } -scQC <- function(sc10x,lg=500,hg=2500,lm=-Inf,hm=0.1){ +scQC <- function(sc10x,lg=500,hg=2500,lm=-Inf,hm=0.1,sub=FALSE){ #QC and filter Seurat object #Inputs: @@ -146,18 +155,27 @@ scQC <- function(sc10x,lg=500,hg=2500,lm=-Inf,hm=0.1){ #result[4] = filtered cell count #result[5] = filtered gene count + #Make sub-folders if necessary + if (sub==FALSE){ + folder <- "./analysis/qc/" + } else { + folder <- paste0("./analysis/qc/",sub,"/") + if (!dir.exists(folder)){ + dir.create(folder) + }} + #Calculate %mito genes mito.genes <- grep(pattern="^MT-",x=rownames(x=sc10x@data),value=TRUE) - percent.mito <- colSums(sc10x@raw.data[mito.genes,])/colSums(sc10x@raw.data) + percent.mito <- colSums(as.matrix(sc10x@raw.data[mito.genes,]))/colSums(as.matrix(sc10x@raw.data)) sc10x <- AddMetaData(object=sc10x,metadata=percent.mito,col.name="percent.mito") #Plot raw stats sc10x <- SetAllIdent(object=sc10x,id="ALL") - postscript("./analysis/qc/Violin_qc.raw.eps") + postscript(paste0(folder,"Violin_qc.raw.eps")) plot <- VlnPlot(object=sc10x,features.plot=c("nGene","nUMI","percent.mito"),nCol=3,do.return=TRUE) plot(plot) dev.off() - postscript("./analysis/qc/Plot_qc.raw.eps") + postscript(paste0(folder,"Plot_qc.raw.eps")) par(mfrow=c(1,2)) GenePlot(object=sc10x,gene1="nUMI",gene2="percent.mito") GenePlot(object=sc10x,gene1="nUMI",gene2="nGene") @@ -170,11 +188,11 @@ scQC <- function(sc10x,lg=500,hg=2500,lm=-Inf,hm=0.1){ sc10x <- NormalizeData(object=sc10x,normalization.method="LogNormalize",scale.factor=10000) #Plot filtered stats - postscript("./analysis/qc/Violin_qc.filtered.eps") + postscript(paste0(folder,"Violin_qc.filtered.eps")) plot <- VlnPlot(object=sc10x,features.plot=c("nGene","nUMI","percent.mito"),nCol=3,do.return=TRUE) plot(plot) dev.off() - postscript("./analysis/qc/Plot_qc.filtered.eps") + postscript(paste0(folder,"Plot_qc.filtered.eps")) par(mfrow=c(1,2)) GenePlot(object=sc10x,gene1="nUMI",gene2="percent.mito") GenePlot(object=sc10x,gene1="nUMI",gene2="nGene") @@ -549,12 +567,13 @@ scQuSAGE <- function(sc10x,gs,res.use=0.1,ds=25000,nm="Lin",folder="lin"){ for (j in 1:number.clusters){ qs <- get(paste0("results.",j)) if (j==1){ - plotDensityCurves(qs,path.index=i,col=j,main=names(gs)[i],xlim=c(min.x.rg-0.05,max.x.rg+0.05),ylim=c(0,50*ceiling(max.y.rg/50)),xlab="Gene Set Activation",lwd=2,cex.main=2.5) + plotDensityCurves(qs,path.index=i,col=viridis(number.clusters)[j],main=names(gs)[i],xlim=c(min.x.rg-0.05,max.x.rg+0.05),ylim=c(0,50*ceiling(max.y.rg/50)),xlab="Gene Set Activation",lwd=5,cex.main=2.5,cex.axis=1.5,cex.lab=2) } else { - plotDensityCurves(qs,path.index=i,add=TRUE,col=j,lwd=2) + plotDensityCurves(qs,path.index=i,add=TRUE,col=viridis(number.clusters)[j],lwd=5) }} leg <- paste0("Cluster ",1:number.clusters) - legend("topright",legend=leg,lty=1,col=1:number.clusters,lwd=2,cex=2,ncol=2,bty="n",pt.cex=1) + legend("topright",legend=leg,lty=1,col=viridis(number.clusters),lwd=5,cex=2,ncol=1,bty="n",pt.cex=2) + box(lwd=5) dev.off() } @@ -564,11 +583,12 @@ scQuSAGE <- function(sc10x,gs,res.use=0.1,ds=25000,nm="Lin",folder="lin"){ postscript(paste0("./analysis/cor/QuSAGE_",nm,"_Clust",i,".eps")) for (j in 1:length(gs)){ if (j==1){ - plotDensityCurves(qs,path.index=j,col=j,main=paste0("Cluster ",i),xlim=c(min.x.rg-0.05,max.x.rg+0.05),ylim=c(0,50*ceiling(max.y.rg/50)),xlab="Gene Set Activation",lwd=2,cex.main=2.5) + plotDensityCurves(qs,path.index=j,col=j,main=paste0("Cluster ",i),xlim=c(min.x.rg-0.05,max.x.rg+0.05),ylim=c(0,50*ceiling(max.y.rg/50)),xlab="Gene Set Activation",lwd=5,cex.main=2.5,cex.axis=1.5,cex.lab=2) } else { - plotDensityCurves(qs,path.index=j,add=TRUE,col=j,lwd=2) + plotDensityCurves(qs,path.index=j,add=TRUE,col=j,lwd=5) }} - legend("topright",legend=names(gs),lty=1,col=1:length(gs),lwd=2,cex=2,ncol=2,bty="n",pt.cex=1) + legend("topright",legend=names(gs),lty=1,col=1:length(gs),lwd=5,cex=2,ncol=1,bty="n",pt.cex=2) + box(lwd=5) dev.off() } @@ -676,12 +696,13 @@ scQuSAGEsm <- function(sc10x,gs,ds=25000,nm="Lin",folder="lin"){ for (j in clusters){ qs <- get(paste0("results.",j)) if (j==clusters[1]){ - plotDensityCurves(qs,path.index=i,col=which(clusters == j),main=names(gs)[i],xlim=c(min.x.rg-0.05,max.x.rg+0.05),ylim=c(0,50*ceiling(max.y.rg/50)),xlab="Gene Set Activation",lwd=2,cex.main=2.5) + plotDensityCurves(qs,path.index=i,col=which(clusters == j),main=names(gs)[i],xlim=c(min.x.rg-0.05,max.x.rg+0.05),ylim=c(0,50*ceiling(max.y.rg/50)),xlab="Gene Set Activation",lwd=5,cex.main=2.5,cex.axis=1.5,cex.lab=2) } else { - plotDensityCurves(qs,path.index=i,add=TRUE,col=which(clusters == j),lwd=2) + plotDensityCurves(qs,path.index=i,add=TRUE,col=which(clusters == j),lwd=5) }} leg <- clusters - legend("topright",legend=leg,lty=1,col=1:length(clusters),lwd=2,cex=2,ncol=2,bty="n",pt.cex=1) + legend("topright",legend=leg,lty=1,col=1:length(clusters),lwd=5,cex=2,ncol=1,bty="n",pt.cex=2) + dev.off() } @@ -691,11 +712,12 @@ scQuSAGEsm <- function(sc10x,gs,ds=25000,nm="Lin",folder="lin"){ postscript(paste0("./analysis/cor/QuSAGE_",nm,".",folder,"_",i,".eps")) for (j in 1:length(gs)){ if (j==1){ - plotDensityCurves(qs,path.index=j,col=j,main=i,xlim=c(min.x.rg-0.05,max.x.rg+0.05),ylim=c(0,50*ceiling(max.y.rg/50)),xlab="Gene Set Activation",lwd=2,cex.main=2.5) + plotDensityCurves(qs,path.index=j,col=j,main=i,xlim=c(min.x.rg-0.05,max.x.rg+0.05),ylim=c(0,50*ceiling(max.y.rg/50)),xlab="Gene Set Activation",lwd=5,cex.main=2.5,cex.axis=1.5,cex.lab=2) } else { - plotDensityCurves(qs,path.index=j,add=TRUE,col=j,lwd=2) + plotDensityCurves(qs,path.index=j,add=TRUE,col=j,lwd=5) }} - legend("topright",legend=names(gs),lty=1,col=1:length(gs),lwd=2,cex=2,ncol=2,bty="n",pt.cex=1) + legend("topright",legend=names(gs),lty=1,col=1:length(gs),lwd=5,cex=2,ncol=1,bty="n",pt.cex=2) + dev.off() } @@ -1049,7 +1071,7 @@ scCCA <- function(sc10x.1,sc10x.2,nm.1="D17",nm.2="D27",cc=FALSE){ } -sc3CCA <- function(sc10x.1,sc10x.2,sc10x.3,nm.1="D17",nm.2="D27",nm.3="D35",cc=FALSE){ +sc3CCA <- function(sc10x.1,sc10x.2,sc10x.3,nm.1="D17",nm.2="D27",nm.3="D35",cc=FALSE,cca=25,acca=15){ gc() if (cc==TRUE){ sc10x.1 <- ScaleData(object=sc10x.1,vars.to.regress=c("nUMI","percent.mito","S.Score","G2M.Score"),display.progress=FALSE,do.par=TRUE,num.cores=45) @@ -1077,9 +1099,9 @@ sc3CCA <- function(sc10x.1,sc10x.2,sc10x.3,nm.1="D17",nm.2="D27",nm.3="D35",cc= sc10x.1 <- FindVariableGenes(sc10x.1,do.plot=FALSE) sc10x.2 <- FindVariableGenes(sc10x.2,do.plot=FALSE) sc10x.3 <- FindVariableGenes(sc10x.3,do.plot=FALSE) - genes.hvg.1 <- head(rownames(sc10x.1@hvg.info),1000) - genes.hvg.2 <- head(rownames(sc10x.2@hvg.info),1000) - genes.hvg.3 <- head(rownames(sc10x.3@hvg.info),1000) + genes.hvg.1 <- head(rownames(sc10x.1@hvg.info),2000) + genes.hvg.2 <- head(rownames(sc10x.2@hvg.info),2000) + genes.hvg.3 <- head(rownames(sc10x.3@hvg.info),2000) genes.hvg.Comb <- unique(c(genes.hvg.1,genes.hvg.2,genes.hvg.3)) genes.hvg.Comb <- intersect(genes.hvg.Comb,rownames(sc10x.1@scale.data)) genes.hvg.Comb <- intersect(genes.hvg.Comb,rownames(sc10x.2@scale.data)) @@ -1092,7 +1114,7 @@ sc3CCA <- function(sc10x.1,sc10x.2,sc10x.3,nm.1="D17",nm.2="D27",nm.3="D35",cc= sc10x.2 <- RenameCells(sc10x.2,nm.2) sc10x.3 <- RenameCells(sc10x.3,nm.3) sc10x.list <- list(sc10x.1,sc10x.2,sc10x.3) - sc10x <- RunMultiCCA(sc10x.list,genes.use=genes.hvg.Comb,num.cc=50) + sc10x <- RunMultiCCA(sc10x.list,genes.use=genes.hvg.Comb,num.cc=cca) rm(sc10x.1) rm(sc10x.2) rm(sc10x.3) @@ -1109,12 +1131,12 @@ sc3CCA <- function(sc10x.1,sc10x.2,sc10x.3,nm.1="D17",nm.2="D27",nm.3="D35",cc= plot <- VlnPlot(sc10x,features.plot="CC2",group.by="patient",size.title.use=20,point.size.use=0.05) plot(plot) dev.off() - #postscript("./analysis/cca/Bicor.eps",paper="special",width=10,height=5,horizontal=FALSE) - #MetageneBicorPlot(sc10x,dims.eval=1:50,grouping.var="patient") - #dev.off() + postscript("./analysis/cca/Bicor.eps",paper="special",width=10,height=5,horizontal=FALSE) + MetageneBicorPlot(sc10x,dims.eval=1:cca,grouping.var="patient") + dev.off() gc() - sc10x <- AlignSubspace(sc10x,reduction.type="cca",grouping.var="patient",dims.align=1:25) + sc10x <- AlignSubspace(sc10x,reduction.type="cca",grouping.var="patient",dims.align=1:acca) gc() postscript("./analysis/cca/CCA.Aligned.eps",paper="special",width=10,height=5,horizontal=FALSE) @@ -1316,3 +1338,4 @@ RenameCells <- function(object, add.cell.id = NULL, new.names = NULL, return(object) } + diff --git a/r.scripts/sc-TissueMapper_RUN.Pd.R b/r.scripts/sc-TissueMapper_RUN.Pd.R new file mode 100644 index 0000000000000000000000000000000000000000..d02030a61ba95f3c1e7c2f34162a645fa2cdfedc --- /dev/null +++ b/r.scripts/sc-TissueMapper_RUN.Pd.R @@ -0,0 +1,441 @@ +gc() +library(methods) +library(optparse) +library(Seurat) +library(readr) +library(fBasics) +library(pastecs) +library(qusage) +library(RColorBrewer) +library(monocle) +library(dplyr) +library(viridis) + +source("../r.scripts/sc-TissueMapper.R") + +#Create folder structure +setwd("../") +if (!dir.exists("./analysis")){ + dir.create("./analysis") +} +if (!dir.exists("./analysis/qc")){ + dir.create("./analysis/qc") +} +if (!dir.exists("./analysis/qc/cc")){ + dir.create("./analysis/qc/cc") +} +if (!dir.exists("./analysis/tSNE")){ + dir.create("./analysis/tSNE") +} +if (!dir.exists("./analysis/tSNE/pre.stress")){ + dir.create("./analysis/tSNE/pre.stress") +} +if (!dir.exists("./analysis/pca")){ + dir.create("./analysis/pca") +} +if (!dir.exists("./analysis/pca/stress")){ + dir.create("./analysis/pca/stress") +} +if (!dir.exists("./analysis/violin")){ + dir.create("./analysis/violin") +} +if (!dir.exists("./analysis/violin/stress")){ + dir.create("./analysis/violin/stress") +} +if (!dir.exists("./analysis/table")){ + dir.create("./analysis/table") +} +if (!dir.exists("./analysis/tSNE/post.stress")){ + dir.create("./analysis/tSNE/post.stress") +} +if (!dir.exists("./analysis/cor")){ + dir.create("./analysis/cor") +} +if (!dir.exists("./analysis/tSNE/lin")){ + dir.create("./analysis/tSNE/lin") +} +if (!dir.exists("./analysis/tSNE/epi")){ + dir.create("./analysis/tSNE/epi") +} +if (!dir.exists("./analysis/tSNE/st")){ + dir.create("./analysis/tSNE/st") +} +if (!dir.exists("./analysis/tSNE/merge")){ + dir.create("./analysis/tSNE/merge") +} +if (!dir.exists("./analysis/pca/ne")){ + dir.create("./analysis/pca/ne") +} +if (!dir.exists("./analysis/tSNE/ne")){ + dir.create("./analysis/tSNE/ne") +} +if (!dir.exists("./analysis/violin/ne")){ + dir.create("./analysis/violin/ne") +} +if (!dir.exists("./analysis/tSNE/FINAL")){ + dir.create("./analysis/tSNE/FINAL") +} +if (!dir.exists("./analysis/deg")){ + dir.create("./analysis/deg") +} +if (!dir.exists("./analysis/cca")){ + dir.create("./analysis/cca") +} +if (!dir.exists("./analysis/diy")){ + dir.create("./analysis/diy") +} +if (!dir.exists("./analysis/pseudotime")){ + dir.create("./analysis/pseudotime") +} + +#Retrieve command-line options +option_list=list( + make_option("--p",action="store",default="DPrF",type='character',help="Project Name"), + make_option("--g",action="store",default="ALL",type='character',help="Group To analyze"), + make_option("--lg",action="store",default=500,type='integer',help="Threshold for cells with minimum genes"), + make_option("--hg",action="store",default=2500,type='integer',help="Threshold for cells with maximum genes"), + make_option("--lm",action="store",default=0,type='numeric',help="Threshold for cells with minimum %mito genes"), + make_option("--hm",action="store",default=0.1,type='numeric',help="Threshold for cells with maximum %mito genes"), + make_option("--lx",action="store",default=0.0125,type='numeric',help="x low threshold for hvg selection"), + make_option("--hx",action="store",default=3.5,type='numeric',help="x high threshold for hvg selection"), + make_option("--ly",action="store",default=0.5,type='numeric',help="y low threshold for hvg selection"), + make_option("--cc",action="store",default=TRUE,type='logical',help="Scale cell cycle?"), + make_option("--cca",action="store",default=50,type='integer',help="Number of CCAs to cacluate"), + make_option("--acca",action="store",default=30,type='integer',help="Number of CCAs to align"), + make_option("--pc",action="store",default=25,type='integer',help="Number of PCs to cacluate"), + make_option("--hpc",action="store",default=0.85,type='numeric',help="Max variance cutoff for PCs to use, pre-stress"), + make_option("--res.prestress",action="store",default=1,type='numeric',help="Resolution to cluster, pre-stress"), + make_option("--st",action="store",default=TRUE,type='logical',help="Remove stressed cells?"), + make_option("--stg",action="store",default="dws",type='character',help="Geneset to use for stress ID"), + make_option("--cut.stress",action="store",default=0.9,type='numeric',help="Cutoff for stress score"), + #make_option("--hpc.poststress",action="store",default=0.85,type='numeric',help="Max variance cutoff for PCs to use, post-stress"), + make_option("--res.poststress",action="store",default=0.15,type='numeric',help="Resolution to cluster, post-stress"), + make_option("--ds",action="store",default=10000,type='integer',help="Number of cells to downsample"), + #make_option("--hpc.epi",action="store",default=0.85,type='numeric',help="Max variance cutoff for PCs to use, Epi"), + make_option("--res.epi",action="store",default=0.15,type='numeric',help="Resolution to cluster, Epi"), + #make_option("--hpc.st",action="store",default=0.85,type='numeric',help="Max variance cutoff for PCs to use, St"), + make_option("--res.st",action="store",default=0.15,type='numeric',help="Resolution to cluster, St"), + make_option("--cut.ne",action="store",default=0.999,type='numeric',help="Cutoff for NE score") +) +opt=parse_args(OptionParser(option_list=option_list)) +rm(option_list) +if (opt$lm==0){opt$lm=-Inf} + +sc10x <- scLoad("Pd") + +if (opt$cc==TRUE){ + results <- scCellCycle(sc10x) + sc10x <- results[[1]] + genes.s <- results[[2]] + genes.g2m <- results[[3]] + rm(results) +} else { + genes.s="" + genes.g2m="" +} + +results <- scQC(sc10x,lg=opt$lg,hg=opt$hg,lm=opt$lm,hm=opt$hm) +sc10x <- results[[1]] +counts.cell.raw <- results[[2]] +counts.gene.raw <- results[[3]] +counts.cell.filtered <- results[[4]] +counts.gene.filtered <- results[[5]] +rm(results) + +sc10x.D17 <- scSubset(sc10x,"D17","D17") +sc10x.D27 <- scSubset(sc10x,"D27","D27") +sc10x.D35 <- scSubset(sc10x,"D35","D35") + +results <- sc3CCA(sc10x.D17,sc10x.D27,sc10x.D35,"D17","D27","D35",cc=opt$cc,cca=opt$cca,acca=opt$acca) +sc10x <- results[[1]] +genes.hvg.cca <- results[[2]] +rm(results) + +#results <- scPC(sc10xlx=opt$lx,hx=opt$hx,ly=opt$ly,cc=opt$cc,pc=opt$pc,hpc=opt$hpc,file="pre.stress",cca=TRUE) +#sc10x <- results[[1]] +#genes.hvg.prestress <- results[[2]] +#pc.use.prestress <- results[[3]] +#rm(results) + +sc10x <- scCluster(sc10x,pc.use=opt$acca,res.use=opt$res.prestress,folder="pre.stress",red="cca.aligned") + +if (opt$st==TRUE){ + results <- scStress(sc10x,stg=opt$stg,res.use=opt$res.prestress,pc.use=pc.use.prestress,cut=opt$cut.stress) + sc10x <- results[[1]] + counts.cell.filtered.stress <- results[[2]] + sc10x.Stress <- results[[3]] + rm(results) + + #results <- scPC(sc10x,lx=opt$lx,hx=opt$hx,ly=opt$ly,cc=opt$cc,pc=opt$pc,hpc=opt$hpc,file="post.stress",cca=TRUE) + #sc10x <- results[[1]] + #genes.hvg.poststress <- results[[2]] + #pc.use.poststress <- results[[3]] + #rm(results) + + sc10x <- scCluster(sc10x,pc.use=opt$acca,res.use=opt$res.poststress,folder="post.stress",red="cca.aligned") +} + +gene.set1 <- read_delim("./genesets/DEG_FMSt_5FC.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=FALSE) +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "St" +gene.set <- c(gene.set1) +gene.set1 <- read_delim("./genesets/DEG_Epi_5FC.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=FALSE) +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "Epi" +gene.set <- c(gene.set,gene.set1) +rm(gene.set1) +gc() +results <- scQuSAGE(sc10x,gs=gene.set,res.use=opt$res.poststress,ds=opt$ds,nm="Lin",folder="lin") +sc10x <- results[[1]] +results.cor.Lin <- results[[2]] +results.clust.Lin.id <- results[[3]] +rm(results) +rm(gene.set) + +sc10x.Epi <- scSubset(sc10x,i="Lin",g="Epi") +if (any(levels(sc10x@ident)=="Unknown")){ + sc10x.St <- scSubset(sc10x,i="Lin",g=c("St","Unknown")) +} else { + sc10x.St <- scSubset(sc10x,i="Lin",g="St") +} + +sc10x.Epi <- scCluster(sc10x.Epi,pc.use=opt$acca,res.use=opt$res.epi,folder="epi",red="cca.aligned") + +gene.set1 <- read_delim("./genesets/DEG_BE_5FC.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=FALSE) +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "BE" +gene.set <- c(gene.set1) +gene.set1 <- read_delim("./genesets/DEG_LE_5FC.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=FALSE) +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "LE" +gene.set <- c(gene.set,gene.set1) +gene.set1 <- read_delim("./genesets/DEG_OE_5FC.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=FALSE) +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "OE" +gene.set <- c(gene.set,gene.set1) +rm(gene.set1) +gc() +results <- scQuSAGE(sc10x.Epi,gs=gene.set,res.use=0.15,ds=opt$ds,nm="Epi.dws",folder="epi") +sc10x.Epi <- results[[1]] +results.cor.Epi.dws <- results[[2]] +results.clust.Epi.dws.id <- results[[3]] +rm(results) +rm(gene.set) + +sc10x.Epi <- SetAllIdent(object=sc10x.Epi,id=paste0("res",opt$res.epi)) +OE <- levels(factor(sc10x.Epi@ident[sc10x.Epi@meta.data$Epi.dws=="OE"])) +OE.cells <- NULL +for (i in 1:length(OE)){ + OE.cells[i] <- list(names(sc10x.Epi@ident[sc10x.Epi@ident==OE[i]])) +} +sc10x.Epi <- SetAllIdent(object=sc10x.Epi,id="Epi.dws") +for (i in 1:length(OE)){ + sc10x.Epi <- SetIdent(object=sc10x.Epi,cells.use=unlist(OE.cells[i]),ident.use=paste0("OE",i)) +} +sc10x.Epi <- StashIdent(object=sc10x.Epi,save.name="Epi.dws.sub") +postscript("./analysis/tSNE/epi/tSNE_Epi.dws.sub.eps") +plot <- TSNEPlot(object=sc10x.Epi,pt.size=2.5,do.return=TRUE,vector.friendly=FALSE) +plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) +plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) +plot(plot) +dev.off() +rm(plot) +rm(OE) +rm(OE.cells) + +gene.set1 <- read_csv("./genesets/Basal cells-signature-genes.csv") +gene.set1 <- gene.set1[,2] +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "BC" +gene.set <- c(gene.set1) +gene.set1 <- read_csv("./genesets/Normal AT2 cells-signature-genes.csv") +gene.set1 <- gene.set1[,2] +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "AT2" +gene.set <- c(gene.set,gene.set1) +gene.set1 <- read_csv("./genesets/Club_Goblet cells-signature-genes.csv") +gene.set1 <- gene.set1[,2] +gene.set1 <- as.list(gene.set1) +names(gene.set1) <- "CGC" +gene.set<- c(gene.set,gene.set1) +rm(gene.set1) +gc() +results.cor.Epi.lgea <- scQuSAGEsm(sc10x.Epi,gs=gene.set,ds=opt$ds,nm="Epi.dws.sc",folder="lgea") +rm(gene.set) + +sc10x.St <- scCluster(sc10x.St,pc.use=opt$acca,res.use=opt$res.st,folder="st",red="cca.aligned") + +gene.set1 <- read_delim("./genesets/DEG_C5.BP.M11704.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +gene.set1 <- as.list(gene.set1[-1,]) +names(gene.set1) <- "Endo" +gene.set <- c(gene.set1) +gene.set1 <- read_delim("./genesets/DEG_C5.BP.M10794.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +gene.set1 <- as.list(gene.set1[-1,]) +names(gene.set1) <- "SM" +gene.set <- c(gene.set,gene.set1) +gene.set1 <- read_delim("./genesets/DEG_C5.BP.M13024.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +gene.set1 <- as.list(gene.set1[-1,]) +names(gene.set1) <- "Fib" +gene.set <- c(gene.set,gene.set1) +gene.set1 <- read_delim("./genesets/DEG_C5.BP.M10124.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE) +gene.set1 <- as.list(gene.set1[-1,]) +names(gene.set1) <- "Leu" +gene.set <- c(gene.set,gene.set1) +rm(gene.set1) +gc() +results <- scQuSAGE(sc10x.St,gs=gene.set,res.use=0.15,ds=opt$ds,nm="St.go",folder="st") +sc10x.St <- results[[1]] +results.cor.St.go <- results[[2]] +results.clust.St.go.id <- results[[3]] +rm(results) +rm(gene.set) + +sc10x.Epi.NE <- scNE(sc10x.Epi,neg="EurUro",cut=opt$cut.ne) + +sc10x.Epi <- scMergeSubClust(sc10x.Epi,i="Epi.dws.sub",g=c("BE","LE"),nm="Merge") + +sc10x.St <- scMergeSubClust(sc10x.St,i="St.go",g=c("Endo","SM","Fib","Leu"),nm="Merge") + +sc10x <- scMerge(sc10x,sc10x.Epi,sc10x.St,i.1="Merge",i.2="Merge",nm="Merge_Epi.dws_St.go") + +sc10x <- scMerge(sc10x,sc10x,sc10x.Epi.NE,i.1="Merge_Epi.dws_St.go",i.2="NE",nm="Merge_Epi.dws_St.go_NE") + +gene.set.c2.all <- read.gmt("./genesets/c2.all.v6.1.symbols.gmt") +results.cor.c2.ALL <- scQuSAGElg(sc10x,gs=gene.set.c2.all,ds=opt$ds,nm="Merge_Epi.dws_St.go",folder="c2.all") +rm(gene.set.c2.all) + +sc10x <- SetAllIdent(object=sc10x,id="Merge_Epi.dws_St.go") +sc10x@ident <- factor(sc10x@ident,levels=c("BE","LE","OE1","OE2","Fib","SM","Endo","Leu")) +postscript("./analysis/tSNE/FINAL/tSNE_FINAL.eps") +plot <- TSNEPlot(object=sc10x,pt.size=2.5,do.return=TRUE,vector.friendly=FALSE) +plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20)) +plot <- plot+guides(colour=guide_legend(override.aes=list(size=10))) +plot(plot) +dev.off() + +scTables(sc10x,i.1="samples",i.2="Merge_Epi.dws_St.go") +scTables(sc10x,i.1="samples",i.2="Merge_Epi.dws_St.go_NE") +scTables(sc10x,i.1="Merge_Epi.dws_St.go_NE",i.2="Merge_Epi.dws_St.go") + +genes.deg.Stress <- scDEG(sc10x.Stress,i="Stress",g.1="Stress",g.2="ALL",pct=0.5,t=5) + +genes.deg.Epi <- scDEG(sc10x,i="Lin",g.1="Epi",g.2="St",t=2) +genes.deg.St <- scDEG(sc10x,i="Lin",g.1="St",g.2="Epi",t=2) + +genes.deg.BE <- scDEG(sc10x.Epi.NE,i="Epi.dws.sub",g.1="BE",g.2=c("LE","OE1","OE2"),pct=0.25,t=2) +genes.deg.LE <- scDEG(sc10x.Epi.NE,i="Epi.dws.sub",g.1="LE",g.2=c("BE","LE","OE1"),pct=0.25,t=2) +genes.deg.OE1 <- scDEG(sc10x.Epi.NE,i="Epi.dws.sub",g.1="OE1",g.2=c("BE","LE","OE2"),pct=0.25,t=2) +genes.deg.OE2 <- scDEG(sc10x.Epi.NE,i="Epi.dws.sub",g.1="OE2",g.2=c("BE","LE","OE1"),pct=0.25,t=2) + +genes.deg.NE <- scDEG(sc10x.Epi.NE,i="NE",g.1="NE",g.2="ALL",pct=0.01,t=1) + +genes.deg.Fib <- scDEG(sc10x.St,i="Merge_Epi.dws_St.go_NE",g.1="Fib",g.2=c("SM","Endo","Leu"),pct=0.25,t=2) +genes.deg.SM <- scDEG(sc10x.St,i="Merge_Epi.dws_St.go_NE",g.1="SM",g.2=c("Fib","Endo","Leu"),pct=0.25,t=2) +genes.deg.Endo <- scDEG(sc10x.St,i="Merge_Epi.dws_St.go_NE",g.1="Endo",g.2=c("Fib","SM","Leu"),pct=0.25,t=2) +genes.deg.Leu <- scDEG(sc10x.St,i="St.go",g.1="Leu",g.2=c("Fib","SM","Endo"),pct=0.25,t=2) + +genes.deg.BE.unique <- setdiff(rownames(genes.deg.BE),Reduce(union,list(rownames(genes.deg.LE),rownames(genes.deg.OE1),rownames(genes.deg.OE2),rownames(genes.deg.NE),rownames(genes.deg.Fib),rownames(genes.deg.SM),rownames(genes.deg.Endo),rownames(genes.deg.Leu)))) +genes.deg.LE.unique <- setdiff(rownames(genes.deg.LE),Reduce(union,list(rownames(genes.deg.BE),rownames(genes.deg.OE1),rownames(genes.deg.OE2),rownames(genes.deg.NE),rownames(genes.deg.Fib),rownames(genes.deg.SM),rownames(genes.deg.Endo),rownames(genes.deg.Leu)))) +genes.deg.OE1.unique <- setdiff(rownames(genes.deg.OE1),Reduce(union,list(rownames(genes.deg.BE),rownames(genes.deg.LE),rownames(genes.deg.OE2),rownames(genes.deg.NE),rownames(genes.deg.Fib),rownames(genes.deg.SM),rownames(genes.deg.Endo),rownames(genes.deg.Leu)))) +genes.deg.OE2.unique <- setdiff(rownames(genes.deg.OE2),Reduce(union,list(rownames(genes.deg.BE),rownames(genes.deg.LE),rownames(genes.deg.OE1),rownames(genes.deg.NE),rownames(genes.deg.Fib),rownames(genes.deg.SM),rownames(genes.deg.Endo),rownames(genes.deg.Leu)))) +genes.deg.NE.unique <- setdiff(rownames(genes.deg.NE),Reduce(union,list(rownames(genes.deg.BE),rownames(genes.deg.LE),rownames(genes.deg.OE1),rownames(genes.deg.OE2),rownames(genes.deg.Fib),rownames(genes.deg.SM),rownames(genes.deg.Endo),rownames(genes.deg.Leu)))) +genes.deg.Fib.unique <- setdiff(rownames(genes.deg.Fib),Reduce(union,list(rownames(genes.deg.BE),rownames(genes.deg.LE),rownames(genes.deg.OE1),rownames(genes.deg.OE2),rownames(genes.deg.NE),rownames(genes.deg.SM),rownames(genes.deg.Endo),rownames(genes.deg.Leu)))) +genes.deg.SM.unique <- setdiff(rownames(genes.deg.SM),Reduce(union,list(rownames(genes.deg.BE),rownames(genes.deg.LE),rownames(genes.deg.OE1),rownames(genes.deg.OE2),rownames(genes.deg.NE),rownames(genes.deg.Fib),rownames(genes.deg.Endo),rownames(genes.deg.Leu)))) +genes.deg.Endo.unique <- setdiff(rownames(genes.deg.Endo),Reduce(union,list(rownames(genes.deg.BE),rownames(genes.deg.LE),rownames(genes.deg.OE1),rownames(genes.deg.OE2),rownames(genes.deg.NE),rownames(genes.deg.Fib),rownames(genes.deg.SM),rownames(genes.deg.Leu)))) +genes.deg.Leu.unique <- setdiff(rownames(genes.deg.Leu),Reduce(union,list(rownames(genes.deg.BE),rownames(genes.deg.LE),rownames(genes.deg.OE1),rownames(genes.deg.OE2),rownames(genes.deg.NE),rownames(genes.deg.Fib),rownames(genes.deg.SM),rownames(genes.deg.Endo)))) + +genes.deg.5 <- c(genes.deg.BE.unique[1:5],genes.deg.LE.unique[1:5],genes.deg.OE1.unique[1:5],genes.deg.OE2.unique[1:5],genes.deg.NE.unique[1:5],genes.deg.Fib.unique[1:5],genes.deg.SM.unique[1:5],genes.deg.Endo.unique[1:5],genes.deg.Leu.unique[1:5]) +genes.deg.5 <- rev(genes.deg.5) +genes.deg.10 <- c(genes.deg.BE.unique[1:10],genes.deg.LE.unique[1:10],genes.deg.OE1.unique[1:10],genes.deg.OE2.unique[1:10],genes.deg.NE.unique[1:10],genes.deg.Fib.unique[1:10],genes.deg.SM.unique[1:10],genes.deg.Endo.unique[1:10],genes.deg.Leu.unique[1:10]) +genes.deg.10 <- rev(genes.deg.10) + +sc10x <- SetAllIdent(object=sc10x,id="Merge_Epi.dws_St.go_NE") +sc10x <- SetAllIdent(object=sc10x,id="Merge_Epi.dws_St.go_NE") +sc10x@ident <- factor(sc10x@ident,levels=c("BE","LE","OE1","OE2","NE","Fib","SM","Endo","Leu")) +postscript("./analysis/deg/Dot.DEG.eps",paper="special",width=10,height=5,horizontal=FALSE) +DotPlot(sc10x,genes.deg.5,x.lab.rot=TRUE,plot.legend=TRUE,dot.scale=4) +dev.off() +postscript("./analysis/deg/Heatmap.DEG.eps",paper="special",width=10,height=5,horizontal=FALSE) +plot(DoHeatmap(sc10x,genes.use=genes.deg.10,slim.col.label=TRUE,group.label.rot=TRUE,group.spacing=0.25,cex.row=2.5)) +dev.off() + +for (i in ls(pattern="^genes.deg*unique")){ + postscript(paste0("./analysis/deg/Violin.",i,".eps"),paper="special",width=10,height=5,horizontal=FALSE) + plot(VlnPlot(sc10x,features.plot=get(i)[1:10],nCol=5,point.size.use=0.1,size.title.use=15,x.lab.rot=TRUE)) + dev.off() + postscript(paste0("./analysis/deg/Ridge.",i,".eps"),paper="special",width=10,height=5,horizontal=FALSE) + plot(RidgePlot(sc10x,features.plot=get(i)[1:10],nCol=5,size.x.use=10,size.title.use=15)) + dev.off() + postscript(paste0("./analysis/deg/Heatmap.",i,".eps"),paper="special",width=10,height=5,horizontal=FALSE) + plot(DoHeatmap(sc10x,genes.use=get(i),slim.col.label=TRUE,group.label.rot=TRUE,group.spacing=0.25,cex.row=2.5)) + dev.off() +} + +sc10x.Stress <- SetAllIdent(object=sc10x.Stress,id="Stress") +postscript("./analysis/deg/Violin.Stress.eps",paper="special",width=10,height=5,horizontal=FALSE) +plot(VlnPlot(sc10x.Stress,features.plot=rownames(genes.deg.Stress)[1:10],nCol=5,point.size.use=0.1,size.title.use=15,x.lab.rot=TRUE)) +dev.off() +postscript("./analysis/deg/Ridge.Stress.eps",paper="special",width=10,height=5,horizontal=FALSE) +plot(RidgePlot(sc10x.Stress,features.plot=rownames(genes.deg.Stress)[1:10],nCol=5,size.x.use=10,size.title.use=15)) +dev.off() +postscript("./analysis/deg/Heatmap.Stress.eps",paper="special",width=10,height=5,horizontal=FALSE) +plot(DoHeatmap(sc10x.Stress,genes.use=rownames(genes.deg.Stress),slim.col.label=TRUE,group.label.rot=TRUE,group.spacing=0.25,cex.row=2.5)) +dev.off() + +sc10x.Epi.NE <- SetAllIdent(object=sc10x.Epi.NE,id="NE") +postscript("./analysis/deg/Violin.NE.eps",paper="special",width=10,height=5,horizontal=FALSE) +plot(VlnPlot(sc10x.Epi.NE,features.plot=rownames(genes.deg.NE)[1:10],nCol=5,point.size.use=0.1,size.title.use=15,x.lab.rot=TRUE)) +dev.off() +postscript("./analysis/deg/Ridge.NE.eps",paper="special",width=10,height=5,horizontal=FALSE) +plot(RidgePlot(sc10x.Epi.NE,features.plot=rownames(genes.deg.NE)[1:10],nCol=5,size.x.use=10,size.title.use=15)) +dev.off() + +for (i in ls(pattern="^genes.deg")){ + write.table(get(i),file=paste0("./analysis/deg/",i,".csv"),row.names=TRUE,col.names=NA,append=FALSE,sep=",") +} + +sctSNECustCol(sc10x,i="Lin",bl="Epi",rd="St",file="D") +sctSNECustCol(sc10x,i="Merge_Epi.dws_St.go",bl=c("BE","LE","OE1","OE2"),rd=c("Fib","SM","Endo","Leu"),file="D") +sctSNECustCol(sc10x.Epi,i="Epi.dws.sub",bl=c("BE","LE","OE1","OE2"),rd="",file="D") +sctSNECustCol(sc10x.St,i="St.go",bl="",rd=c("Fib","SM","Endo","Leu"),file="D") + +sctSNEbwCol(sc10x,i="res0.2",file="ALL",files="D") +sctSNEbwCol(sc10x.Epi,i="res0.2",file="Epi",files="D") +sctSNEbwCol(sc10x.St,i="res0.2",file="St",files="D") +sctSNEbwCol(sc10x,i="Merge_Epi.dws_St.go",file="ALL",files="D") +sctSNEbwCol(sc10x.Epi,i="Epi.dws.sub",file="Epi",files="D") +sctSNEbwCol(sc10x.St,i="St.go",file="St",files="D") + +for (g in c("Epi","St")){ + sctSNEHighlight(sc10x,i="Lin",g=g,file="D") +} +for (g in c("BE","LE","OE1","OE2")){ + sctSNEHighlight(sc10x,i="Merge_Epi.dws_St.go",g=g,file="D") + sctSNEHighlight(sc10x.Epi,i="Epi.dws.sub",g=g,file="D") +} +sctSNEHighlight(sc10x.Epi.NE,i="NE",g="NE",file="D") +for (g in c("Fib","SM","Endo","Leu")){ + sctSNEHighlight(sc10x,i="Merge_Epi.dws_St.go",g=g,file="D") + sctSNEHighlight(sc10x.St,i="St.go",g=g,file="D") +} +for (g in c("D17","D27","D35")){ + sctSNEHighlight(sc10x,i="patient",g=g,file="D") +} +rm(g) + +scCustHeatmap(sc10x.Epi,i="Epi.dws.sub",gs=c(genes.deg.BE.unique,genes.deg.LE.unique,genes.deg.OE1.unique,genes.deg.OE2.unique),g=c("BE","LE","OE1","OE2")) +scCustHeatmap(sc10x.St,i="St.go",gs=c(genes.deg.Fib.unique,genes.deg.SM.unique,genes.deg.Endo.unique,genes.deg.OE2.unique,genes.deg.Leu.unique),g=c("Fib","SM","Endo","Leu")) + +scPseudotime(sc10x.Epi,i="Epi.dws.sub",ds=10000) + +save(list=ls(pattern="sc10x.Stress"),file="./analysis/sc10x.Stress.Rda") +rm(list=ls(pattern="sc10x.Stress")) +save(list=ls(pattern="^genes.deg"),file="./analysis/DEG.Rda") +rm(list=ls(pattern="^genes.deg")) +save(list=ls(pattern="sc10x.Epi"),file="./analysis/sc10x.Epi.Rda") +rm(list=ls(pattern="^sc10x.Epi")) +save(list=ls(pattern="sc10x.St"),file="./analysis/sc10x.St.Rda") +rm(list=ls(pattern="sc10x.St")) +save(list=ls(pattern="^sc10x"),file="./analysis/sc10x.Rda") +rm(list=ls(pattern="^sc10x")) +save.image(file="./analysis/Data.RData")