Skip to content
Snippets Groups Projects
Commit 2096510b authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Finalize code for lineage subclustering and QuSAGE

parent 1838a861
No related merge requests found
......@@ -20,24 +20,22 @@ if (opt$st==TRUE){
} else {
sub.folder <- ""
sub.file <- ""
}
}
#load data
setwd("../")
load(paste0("./analysis/sc10x.",Project.Name,".Cumulative.RData"))
load(paste0("./analysis/sc10x.",Project.Name,".",opt$g,".cluster",sub.file,".IDepi+st+ne+LGEAepi+LGEAst.Rda"))
sc10x.Group <- get(paste0("sc10x.",opt$g,".cluster",sub.file,".IDepi+st+ne+LGEAepi+LGEAst"))
rm(list=paste0("sc10x.",opt$g,".cluster",sub.file,".IDepi+st+ne+LGEAepi+LGEAst"))
#subset Epi + St pops
sc10x.Group <- SetAllIdent(object=sc10x.Group,id="Lineage")
sc10x.Group.Epi <- SubsetData(object=sc10x.Group,ident.use="Epi")
sc10x.Group.St <- SubsetData(object=sc10x.Group,ident.use="St")
sc10x.Group.All <- sc10x.Group
load(paste0("./analysis/sc10x.",Project.Name,".",opt$g,".cluster",sub.file,".IDlineage.Rda"))
sc10x.Group <- get(paste0("sc10x.",opt$g,".cluster",sub.file,".IDlineage"))
rm(list=paste0("sc10x.",opt$g,".cluster",sub.file,".IDlineage"))
if (!dir.exists(paste0("./anaysis/",opt$g))){
#create folder structure
if (!dir.exists(paste0("./analysis/",opt$g))){
dir.create(paste0("./analysis/",opt$g))
}
if (!dir.exists(paste0("./analysis/",opt$g,"/qc"))){
dir.create(paste0("./analysis/",opt$g,"/qc"))
}
if (!dir.exists(paste0("./analysis/",opt$g,"/global"))){
dir.create(paste0("./analysis/",opt$g,"/global"))
}
......@@ -45,13 +43,28 @@ if (!dir.exists(paste0("./analysis/",opt$g,"/global",sub.folder))){
dir.create(paste0("./analysis/",opt$g,"/global",sub.folder))
}
#subset Epi + St + UK pops
sc10x.Group <- SetAllIdent(object=sc10x.Group,id="Lineage")
sc10x.Group.Epi <- SubsetData(object=sc10x.Group,ident.use="Epi")
sc10x.Group.St <- SubsetData(object=sc10x.Group,ident.use="St")
tryCatch({
sc10x.Group.UK <- SubsetData(object=sc10x.Group,ident.use="?")
},error={UK <- FALSE})
rm(sc10x.Group)
#re-detect variable genes in Epi + St subsets
sc10x.Group.Epi <- FindVariableGenes(object=sc10x.Group.Epi,mean.function=ExpMean,dispersion.function=LogVMR,x.low.cutoff=0.0125,x.high.cutoff=3,y.cutoff=0.5,do.plot=FALSE)
sc10x.Group.St <- FindVariableGenes(object=sc10x.Group.St,mean.function=ExpMean,dispersion.function=LogVMR,x.low.cutoff=0.0125,x.high.cutoff=3,y.cutoff=0.5,do.plot=FALSE)
if (UK==TRUE){
sc10x.Group.UK <- FindVariableGenes(object=sc10x.Group.UK,mean.function=ExpMean,dispersion.function=LogVMR,x.low.cutoff=0.0125,x.high.cutoff=3,y.cutoff=0.5,do.plot=FALSE)
}
#re-PCA on new variable genes
sc10x.Group.Epi <- RunPCA(object=sc10x.Group.Epi,pc.genes=sc10x.Group.Epi@var.genes,do.print=FALSE,pcs.compute=50)
sc10x.Group.St <- RunPCA(object=sc10x.Group.St,pc.genes=sc10x.Group.St@var.genes,do.print=FALSE,pcs.compute=50)
if (UK==TRUE){
sc10x.Group.UK <- RunPCA(object=sc10x.Group.UK,pc.genes=sc10x.Group.St@var.genes,do.print=FALSE,pcs.compute=50)
}
#generate QC figures
postscript(paste0("./analysis/",opt$g,"/qc/Plot_PCElbow.Epi.eps"))
......@@ -65,6 +78,13 @@ plot <- PCElbowPlot(object=sc10x.Group.St,num.pc=50)
plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20))
plot(plot)
dev.off()
if (UK==TRUE){
postscript(paste0("./analysis/",opt$g,"/qc/Plot_PCElbow.UK.eps"))
plot <- PCElbowPlot(object=sc10x.Group.UK,num.pc=50)
plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20))
plot(plot)
dev.off()
}
#re-generate tSNE dimensions and generage sample figures
sc10x.Group.Epi <- RunTSNE(object=sc10x.Group.Epi,dims.use=1:PC.Max,do.fast=TRUE)
......@@ -83,12 +103,29 @@ plot <- plot+guides(colour=guide_legend(override.aes=list(size=10)))
plot(plot)
dev.off()
if (UK==TRUE){
sc10x.Group.UK <- RunTSNE(object=sc10x.Group.UK,dims.use=1:PC.Max,do.fast=TRUE)
postscript(paste0("./analysis/",opt$g,"/global/NoStress/tSNE_Sample.UK.eps"))
plot <- TSNEPlot(object=sc10x.Group.UK,group.by="samples",pt.size=2.5,do.return=TRUE)
plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20))
plot <- plot+guides(colour=guide_legend(override.aes=list(size=10)))
plot(plot)
dev.off()
}
re-cluster at different resolutions and generate figures
for (j in (c("Epi","St"))){
if (UK==TRUE){
pops <- c("Epi","St","UK"){
} else {
pops <- c("Epi","St"){
}
for (j in pops){
if (j=="Epi"){
sc10x.Group <- sc10x.Group.Epi
} else {
} else if (j=="St") {
sc10x.Group <- sc10x.Group.St
} else if (j=="UK") {
sc10x.Group <- sc10x.Group.UK
}
for (i in c(5,2,1,0.5,0.2,0.1)){
gc()
......@@ -104,11 +141,33 @@ for (j in (c("Epi","St"))){
}
if (j=="Epi"){
sc10x.Group.Epi <- sc10x.Group
} else {
} else if (j=="St"){
sc10x.Group.St <- sc10x.Group
} else {
sc10x.Group.UK <- sc10x.Group
}
}
rm(pops)
rm(sc10x.Group)
rm(plot)
rm(i)
rm(j)
#make generic variables of run specific variables
rm(UK)
rm(PC.Max)
opt.subclust <- opt
rm(opt)
#save subcluster .Rda files
save(sc10x.Group.Epi,file=paste0("./analysis/sc10x.",Project.Name,".",opt.lgeaepipop$g,".EpiSubcluster",sub.file,".IDepi+st+ne+LGEAepi+LGEAst.Rda"))
save(sc10x.Group.St,file=paste0("./analysis/sc10x.",Project.Name,".",opt.lgeaepipop$g,".StSubcluster",sub.file,".IDepi+st+ne+LGEAepi+LGEAst.Rda"))
\ No newline at end of file
save(sc10x.Group.Epi,file=paste0("./analysis/sc10x.",Project.Name,".",opt.subclust$g,".EpiSubcluster",sub.file,".Rda"))
save(sc10x.Group.St,file=paste0("./analysis/sc10x.",Project.Name,".",opt.subclust$g,".StSubcluster",sub.file,".Rda"))
if (UK==TRUE){
save(sc10x.Group.UK,file=paste0("./analysis/sc10x.",Project.Name,".",opt.subclust$g,".UKSubcluster",sub.file,".Rda"))
}
rm(sc10x.Group.Epi)
rm(sc10x.Group.St)
rm(sc10x.Group.UK)
rm(sub.folder)
rm(sub.file)
save.image(file=paste0("./analysis/sc10x.",Project.Name,".Cumulative.RData"))
gc()
.libPaths("/home2/ghenry/R/x86_64-pc-linux-gnu-library/3.3")
library(optparse)
library(Seurat)
library(readr)
library(readxl)
library(qusage)
#retrive command line options
option_list=list(
make_option("--p",action="store",default="Pr",type='character',help="Project Name"),
make_option("--g",action="store",default="ALL",type='character',help="Group To Analyze"),
make_option("--r",action="store",default=0.1,type='numeric',help="Resolution"),
make_option("--st",action="store",default=TRUE,type='logical',help="Analyze Data With Stress Pops Removed?"),
make_option("--s",action="store",default=5000,type='integer',help="Number Of Cells To Downsample To"),
make_option("--dws",action="store",default=TRUE,type='logical',help="Correlate to DWS Epi?"),
make_option("--lgea",action="store",default=TRUE,type='logical',help="Correlate to LGEA Epi?"),
make_option("--c2",action="store",default=FALSE,type='logical',help="Correlate to c2?"),
make_option("--c5",action="store",default=FALSE,type='logical',help="Correlate to c5?")
)
opt=parse_args(OptionParser(option_list=option_list))
rm(option_list)
Project.Name <- opt$p
if (opt$st==TRUE){
sub.folder <- "/NoStress"
sub.file <- ".NOStress"
} else {
sub.folder <- ""
sub.file <- ""
}
#load data
setwd("../")
load(paste0("./analysis/sc10x.",Project.Name,".Cumulative.RData"))
load(paste0("./analysis/sc10x.",Project.Name,".",opt$g,".cluster",sub.file,".IDlineage.Rda"))
sc10x.Group <- get(paste0("sc10x.",opt$g,".cluster",sub.file,".IDlineage"))
rm(list=paste0("sc10x.",opt$g,".cluster",sub.file,".IDlineage"))
load(paste0("./analysis/sc10x.",Project.Name,".",opt$g,".EpiSubcluster",sub.file,".Rda"))
#create folder structure
if (!dir.exists(paste0("./analysis/",opt$g))){
dir.create(paste0("./analysis/",opt$g))
}
if (!dir.exists(paste0("./analysis/",opt$g,"/global"))){
dir.create(paste0("./analysis/",opt$g,"/global"))
}
if (!dir.exists(paste0("./analysis/",opt$g,"/global",sub.folder))){
dir.create(paste0("./analysis/",opt$g,"/global",sub.folder))
}
if (!dir.exists(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation"))){
dir.create(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation"))
}
#load resolution
sc10x.Group <- SetAllIdent(object=sc10x.Group,id="Lineage")
sc10x.Group.Epi <- SetAllIdent(object=sc10x.Group.Epi,id=paste0("res",opt$r))
#downsample if number given
if (!is.na(opt$s)){
rnd <- sample(1:ncol(sc10x.Group.Epi@data),opt$s)
} else {
rnd <- 1:ncol(sc10x.Group.Epi@data)
}
eset <- as.data.frame(as.matrix(sc10x.Group.Epi@data))
eset <- eset[,rnd]
labels <- paste0("Cluster_",as.vector(factor(sc10x.Group.Epi@ident)))
labels <- labels[rnd]
rm(rnd)
#make QuSAGE labels
Number.Clusters <- length(unique(sc10x.Group.Epi@ident))
C <- list()
cC <- list()
for (i in 1:Number.Clusters){
t <- labels
t[!(t %in% paste0("Cluster_",i))] <- "REST"
C[i] <- list(i=t)
rm(t)
cC[i] <- paste0("Cluster_",i,"-REST")
}
rm(labels)
#load QuSAGE genesets
sets <- NULL
if (opt$dws==TRUE){
sets=c(sets,"dws")
}
if (opt$lgea==TRUE){
sets=c(sets,"lgea")
}
gene.set2 <- read_delim("./genesets/DEG_BE_2FC.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=FALSE)
gene.set2 <- as.list(gene.set2)
names(gene.set2) <- "BE"
gene.set <- c(gene.set2)
gene.set2 <- read_delim("./genesets/DEG_LE_2FC.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=FALSE)
gene.set2 <- as.list(gene.set2)
names(gene.set2) <- "LE"
gene.set <- c(gene.set,gene.set2)
gene.set2 <- read_delim("./genesets/DEG_OE_2FC.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=FALSE)
gene.set2 <- as.list(gene.set2)
names(gene.set2) <- "OE"
gene.set.dws <- c(gene.set,gene.set2)
rm(gene.set)
rm(gene.set2)
gene.set2 <- read_csv("./genesets/Basal cells-signature-genes.csv")
gene.set2 <- gene.set2[,2]
gene.set2 <- as.list(gene.set2)
names(gene.set2) <- "BC"
gene.set <- c(gene.set2)
gene.set2 <- read_csv("./genesets/Normal AT2 cells-signature-genes.csv")
gene.set2 <- gene.set2[,2]
gene.set2 <- as.list(gene.set2)
names(gene.set2) <- "AT2"
gene.set <- c(gene.set,gene.set2)
gene.set2 <- read_csv("./genesets/Club_Goblet cells-signature-genes.csv")
gene.set2 <- gene.set2[,2]
gene.set2 <- as.list(gene.set2)
names(gene.set2) <- "CGC"
gene.set.lgea <- c(gene.set,gene.set2)
rm(gene.set)
rm(gene.set2)
gene.set.c2 <- read.gmt("./genesets/c2.all.v6.1.symbols.gmt")
gene.set.c5 <- read.gmt("./genesets/c5.all.v6.1.symbols.gmt")
#run QuSAGE
for (i in 1:Number.Clusters){
if (opt$dws==TRUE){
assign(paste0("qs.results.epi.dws.",i),qusage(eset,unlist(C[i]),unlist(cC[i]),gene.set.dws))
}
if (opt$lgea==TRUE){
assign(paste0("qs.results.epi.lgea.",i),qusage(eset,unlist(C[i]),unlist(cC[i]),gene.set.lgea))
}
if (opt$c2==TRUE){
assign(paste0("qs.results.epi.c2.",i),qusage(eset,unlist(C[i]),unlist(cC[i]),gene.set.c2))
}
if (opt$c5==TRUE){
assign(paste0("qs.results.epi.c5.",i),qusage(eset,unlist(C[i]),unlist(cC[i]),gene.set.c5))
}
}
rm(eset)
rm(C)
rm(cC)
#generate correlation figures
if (opt$dws==TRUE){
max.x.rg.dws <- 0
min.x.rg.dws <- 0
max.y.rg.dws <- 0
for (i in 1:Number.Clusters){
qs <- get(paste0("qs.results.epi.dws.",i))
if (max(qs$path.mean)>max.x.rg.dws){
max.x.rg.dws <- max(qs$path.mean)
if (min(qs$path.mean)<min.x.rg.dws){
min.x.rg.dws <- min(qs$path.mean)
}}
if (max(qs$path.PDF)>max.y.rg.dws){
max.y.rg.dws <- max(qs$path.PDF)
}}}
if (opt$lgea==TRUE){
max.x.rg.lgea <- 0
min.x.rg.lgea <- 0
max.y.rg.lgea <- 0
for (i in 1:Number.Clusters){
qs <- get(paste0("qs.results.epi.lgea.",i))
if (max(qs$path.mean)>max.x.rg.lgea){
max.x.rg.lgea <- max(qs$path.mean)
if (min(qs$path.mean)<min.x.rg.lgea){
min.x.rg.lgea <- min(qs$path.mean)
}}
if (max(qs$path.PDF)>max.y.rg.lgea){
max.y.rg.lgea <- max(qs$path.PDF)
}}}
for (i in 1:Number.Clusters){
if (opt$dws==TRUE){
qs <- get(paste0("qs.results.epi.dws.",i))
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_Epi_",i,".DWS.eps"))
plotDensityCurves(qs,path.index=1:3,col=1:numPathways(qs),main=paste0("Cluster ",i),xlim=c(min.x.rg.dws-0.05,max.x.rg.dws+0.05),ylim=c(0,50*ceiling(max.y.rg.dws/50)),xlab="Gene Set Activation")
legend("topright",legend=c("BE","LE","OE"),lty=1,col=1:numPathways(qs),lwd=1,cex=1,ncol=1)
dev.off()
}
if (opt$lgea==TRUE){
qs <- get(paste0("qs.results.epi.lgea.",i))
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_Epi_",i,".LGEA.eps"))
plotDensityCurves(qs,path.index=1:3,col=1:numPathways(qs),main=paste0("Cluster ",i),xlim=c(min.x.rg.lgea-0.05,max.x.rg.lgea+0.05),ylim=c(0,50*ceiling(max.y.rg.lgea/50)),xlab="Gene Set Activation")
legend("topright",legend=c("BC","AT2","CGC"),lty=1,col=1:numPathways(qs),lwd=1,cex=1,ncol=1)
dev.off()
}
if (opt$c2==TRUE){
qs <- get(paste0("qs.results.epi.c2.",i))
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_Epi_",i,".CI.c2.eps"))
plotCIs(qs,path.index=1:numPathways(qs))
dev.off()
tab <- qsTable(qs,number=numPathways(qs),sort.by="logFC")
tab <- tab[tab$FDR<0.05,]
tab <- tab[order(-tab[,2]),]
write.table(tab,file=paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_Epi_",i,".Table.c2.csv"),row.names=TRUE,col.names=NA,append=FALSE,sep=",")
}
if (opt$c5==TRUE){
qs <- get(paste0("qs.results.epi.c5.",i))
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_Epi_",i,".CI.c5.eps"))
plotCIs(qs,path.index=1:numPathways(qs))
dev.off()
tab <- qsTable(qs,number=numPathways(qs),sort.by="logFC")
tab <- tab[tab$FDR<0.05,]
tab <- tab[order(-tab[,2]),]
write.table(tab,file=paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_Epi_",i,".Table.c5.csv"),row.names=TRUE,col.names=NA,append=FALSE,sep=",")
}}
rm(max.x.rg.dws)
rm(min.x.rg.dws)
rm(max.y.rg.dws)
rm(max.x.rg.lgea)
rm(min.x.rg.lgea)
rm(max.y.rg.lgea)
#generate ID tables
rm(qs.Cor.table)
for (j in sets){
for (i in 1:Number.Clusters){
qs <- qsTable(get(paste0("qs.results.epi.",j,".",i)),number=length(get(paste0("gene.set.",j))))
qs <- qs[(qs$FDR < 0.05) & (qs$log.fold.change > 0),1:2]
if (nrow(qs)==0){
qs <- NULL
qs$pathway.name <- "?"
qs$log.fold.change <- 0
qs <- as.data.frame(qs)
}
qs$Cluster <- i
if (i==1){
qs.Cor.table <- qs
} else {
qs.Cor.table <- rbind(qs.Cor.table,qs)
}}
rownames(qs.Cor.table) <- NULL
rm(qs)
write.table(qs.Cor.table,file=paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_Epi.Table.",j,".csv"),row.names=TRUE,col.names=NA,append=FALSE,sep=",")
assign(paste0("qs.Cor.table.Epi.",j),qs.Cor.table)
}
rm(qs.Cor.table)
rm(list=ls(pattern="^gene.set"))
#save EpiPop labels
for (j in sets){
sc10x.Group <- SetAllIdent(object=sc10x.Group,id="Lineage")
sc10x.Group.Epi <- SetAllIdent(object=sc10x.Group.Epi,id=paste0("res",opt$r))
for (i in 1:Number.Clusters){
qs <- qsTable(get(paste0("qs.results.epi.",j,".",i)))
assign(paste0("clust.",i),names(sc10x.Group.Epi@ident[sc10x.Group.Epi@ident==i]))
sc10x.Group <- SetIdent(object=sc10x.Group,cells.use=get(paste0("clust.",i)),ident.use=as.character(factor(qs[which.max(qs$log.fold.change),1])))
sc10x.Group.Epi <- SetIdent(object=sc10x.Group.Epi,cells.use=get(paste0("clust.",i)),ident.use=as.character(factor(qs[which.max(qs$log.fold.change),1])))
}
sc10x.Group <- StashIdent(object=sc10x.Group,save.name=paste0("Epi.",j))
sc10x.Group.Epi <- StashIdent(object=sc10x.Group.Epi,save.name=paste0("Epi.",j))
}
rm(qs)
rm(list=ls(pattern="^clust."))
#generate tSNEs
for (i in sets){
sc10x.Group <- SetAllIdent(object=sc10x.Group,id=paste0("Epi.",i))
sc10x.Group.Epi <- SetAllIdent(object=sc10x.Group.Epi,id=paste0("Epi.",i))
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/tSNE_Epi.",i,".eps"))
plot <- TSNEPlot(object=sc10x.Group,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE)
plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20))
plot <- plot+guides(colour=guide_legend(override.aes=list(size=10)))
plot(plot)
dev.off()
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/tSNE_EpiSubPop.",i,".eps"))
plot <- TSNEPlot(object=sc10x.Group.Epi,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE)
plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20))
plot <- plot+guides(colour=guide_legend(override.aes=list(size=10)))
plot(plot)
dev.off()
}
rm(plot)
#make generic variables of run specific variables
for (i in sets){
Group.List <- c(Group.List,paste0("Epi.",i))
}
rm(i)
rm(j)
rm(sets)
rm(Number.Clusters)
opt.episubclust <- opt
rm(opt)
#save QuSAGE.EpiPop.Rda,IDepi.Rda file,Cumulative.RData files
save(list=ls(pattern="^qs."),file=paste0("./analysis/sc10x.",Project.Name,".",opt.episubclust$g,sub.file,".QuSAGE.Epi.Rda"))
rm(list=ls(pattern="^qs."))
save(sc10x.Group,file=paste0("./analysis/sc10x.",Project.Name,".",opt.episubclust$g,".cluster",sub.file,".IDepi.Rda"))
rm(sc10x.Group)
save(sc10x.Group.Epi,file=paste0("./analysis/sc10x.",Project.Name,".",opt.episubclust$g,".EpiSubcluster",sub.file,".IDepi.Rda"))
rm(sc10x.Group.Epi)
rm(sub.folder)
rm(sub.file)
save.image(file=paste0("./analysis/sc10x.",Project.Name,".Cumulative.RData"))
gc()
.libPaths("/home2/ghenry/R/x86_64-pc-linux-gnu-library/3.3")
library(optparse)
library(Seurat)
library(readr)
library(readxl)
library(qusage)
#retrive command line options
option_list=list(
make_option("--p",action="store",default="Pr",type='character',help="Project Name"),
make_option("--g",action="store",default="ALL",type='character',help="Group To Analyze"),
make_option("--r",action="store",default=0.1,type='numeric',help="Resolution"),
make_option("--st",action="store",default=TRUE,type='logical',help="Analyze Data With Stress Pops Removed?"),
make_option("--s",action="store",default=5000,type='integer',help="Number Of Cells To Downsample To"),
make_option("--lgea",action="store",default=TRUE,type='logical',help="Correlate to LGEA Epi?"),
make_option("--c2",action="store",default=FALSE,type='logical',help="Correlate to c2?"),
make_option("--c5",action="store",default=FALSE,type='logical',help="Correlate to c5?")
)
opt=parse_args(OptionParser(option_list=option_list))
rm(option_list)
Project.Name <- opt$p
if (opt$st==TRUE){
sub.folder <- "/NoStress"
sub.file <- ".NOStress"
} else {
sub.folder <- ""
sub.file <- ""
}
#load data
setwd("../")
load(paste0("./analysis/sc10x.",Project.Name,".Cumulative.RData"))
load(paste0("./analysis/sc10x.",Project.Name,".",opt$g,".cluster",sub.file,".IDlineage.Rda"))
sc10x.Group <- get(paste0("sc10x.",opt$g,".cluster",sub.file,".IDlineage"))
rm(list=paste0("sc10x.",opt$g,".cluster",sub.file,".IDlineage"))
load(paste0("./analysis/sc10x.",Project.Name,".",opt$g,".StSubcluster",sub.file,".Rda"))
#create folder structure
if (!dir.exists(paste0("./analysis/",opt$g))){
dir.create(paste0("./analysis/",opt$g))
}
if (!dir.exists(paste0("./analysis/",opt$g,"/global"))){
dir.create(paste0("./analysis/",opt$g,"/global"))
}
if (!dir.exists(paste0("./analysis/",opt$g,"/global",sub.folder))){
dir.create(paste0("./analysis/",opt$g,"/global",sub.folder))
}
if (!dir.exists(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation"))){
dir.create(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation"))
}
#load resolution
sc10x.Group <- SetAllIdent(object=sc10x.Group,id="Lineage")
sc10x.Group.St <- SetAllIdent(object=sc10x.Group.St,id=paste0("res",opt$r))
#downsample if number given
if (!is.na(opt$s)){
rnd <- sample(1:ncol(sc10x.Group.St@data),opt$s)
} else {
rnd <- 1:ncol(sc10x.Group.St@data)
}
eset <- as.data.frame(as.matrix(sc10x.Group.St@data))
eset <- eset[,rnd]
labels <- paste0("Cluster_",as.vector(factor(sc10x.Group.St@ident)))
labels <- labels[rnd]
rm(rnd)
#make QuSAGE labels
Number.Clusters <- length(unique(sc10x.Group.St@ident))
C <- list()
cC <- list()
for (i in 1:Number.Clusters){
t <- labels
t[!(t %in% paste0("Cluster_",i))] <- "REST"
C[i] <- list(i=t)
rm(t)
cC[i] <- paste0("Cluster_",i,"-REST")
}
rm(labels)
#load QuSAGE genesets
sets <- NULL
if (opt$lgea==TRUE){
sets=c(sets,"lgea")
}
gene.set3 <- read_excel("./genesets/journal.pcbi.1004575.s026.XLSX",skip=2)
gene.set3 <- gene.set3[,c(3,9)]
gene.set2 <- gene.set3[gene.set3$`CELL TYPE`=="C1 : Proliferative Fibroblast",]
gene.set2 <- as.list(gene.set2[!(is.na(gene.set2$`hu Gene Symbol`)),1])
names(gene.set2) <- "ProlFib"
gene.set <- c(gene.set2)
gene.set2 <- gene.set3[gene.set3$`CELL TYPE`=="C2: Myofibroblast / Smooth Muscle-like",]
gene.set2 <- as.list(gene.set2[!(is.na(gene.set2$`hu Gene Symbol`)),1])
names(gene.set2) <- "MyoFibSM"
gene.set <- c(gene.set,gene.set2)
gene.set2 <- gene.set3[gene.set3$`CELL TYPE`=="C3: Pericyte",]
gene.set2 <- as.list(gene.set2[!(is.na(gene.set2$`hu Gene Symbol`)),1])
names(gene.set2) <- "Peri"
gene.set <- c(gene.set,gene.set2)
gene.set2 <- gene.set3[gene.set3$`CELL TYPE`=="C5: Matrix Fibroblast",]
gene.set2 <- as.list(gene.set2[!(is.na(gene.set2$`hu Gene Symbol`)),1])
names(gene.set2) <- "MatFib"
gene.set <- c(gene.set,gene.set2)
gene.set2 <- gene.set3[gene.set3$`CELL TYPE`=="C7: Endothelial Cells",]
gene.set2 <- as.list(gene.set2[!(is.na(gene.set2$`hu Gene Symbol`)),1])
names(gene.set2) <- "EndoC"
gene.set <- c(gene.set,gene.set2)
gene.set2 <- gene.set3[gene.set3$`CELL TYPE`=="C8: Myeloid / Immune Cells",]
gene.set2 <- as.list(gene.set2[!(is.na(gene.set2$`hu Gene Symbol`)),1])
names(gene.set2) <- "MylImm"
gene.set.lgea <- c(gene.set,gene.set2)
rm(gene.set)
rm(gene.set2)
rm(gene.set3)
gene.set.c2 <- read.gmt("./genesets/c2.all.v6.1.symbols.gmt")
gene.set.c5 <- read.gmt("./genesets/c5.all.v6.1.symbols.gmt")
#run QuSAGE
for (i in 1:Number.Clusters){
if (opt$lgea==TRUE){
assign(paste0("qs.results.st.lgea.",i),qusage(eset,unlist(C[i]),unlist(cC[i]),gene.set.lgea))
}
if (opt$c2==TRUE){
assign(paste0("qs.results.st.c2.",i),qusage(eset,unlist(C[i]),unlist(cC[i]),gene.set.c2))
}
if (opt$c5==TRUE){
assign(paste0("qs.results.st.c5.",i),qusage(eset,unlist(C[i]),unlist(cC[i]),gene.set.c5))
}
}
rm(eset)
rm(C)
rm(cC)
#generate correlation figures
if (opt$lgea==TRUE){
max.x.rg.lgea <- 0
min.x.rg.lgea <- 0
max.y.rg.lgea <- 0
for (i in 1:Number.Clusters){
qs <- get(paste0("qs.results.st.lgea.",i))
if (max(qs$path.mean)>max.x.rg.lgea){
max.x.rg.lgea <- max(qs$path.mean)
if (min(qs$path.mean)<min.x.rg.lgea){
min.x.rg.lgea <- min(qs$path.mean)
}}
if (max(qs$path.PDF)>max.y.rg.lgea){
max.y.rg.lgea <- max(qs$path.PDF)
}}}
for (i in 1:Number.Clusters){
if (opt$lgea==TRUE){
qs <- get(paste0("qs.results.st.lgea.",i))
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_St_",i,".LGEA.eps"))
plotDensityCurves(qs,path.index=1:6,col=1:numPathways(qs),main=paste0("Cluster ",i),xlim=c(min.x.rg.lgea-0.05,max.x.rg.lgea+0.05),ylim=c(0,50*ceiling(max.y.rg.lgea/50)),xlab="Gene Set Activation")
legend("topright",legend=c("ProlFib","MyoFibSM","Peri","MatFib","EndoC","MylImm"),lty=1,col=1:numPathways(qs),lwd=1,cex=1,ncol=2)
dev.off()
}
if (opt$c2==TRUE){
qs <- get(paste0("qs.results.St.c2.",i))
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_St_",i,".CI.c2.eps"))
plotCIs(qs,path.index=1:numPathways(qs))
dev.off()
tab <- qsTable(qs,number=numPathways(qs),sort.by="logFC")
tab <- tab[tab$FDR<0.05,]
tab <- tab[order(-tab[,2]),]
write.table(tab,file=paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_St_",i,".Table.c2.csv"),row.names=TRUE,col.names=NA,append=FALSE,sep=",")
}
if (opt$c5==TRUE){
qs <- get(paste0("qs.results.st.c5.",i))
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_St_",i,".CI.c5.eps"))
plotCIs(qs,path.index=1:numPathways(qs))
dev.off()
tab <- qsTable(qs,number=numPathways(qs),sort.by="logFC")
tab <- tab[tab$FDR<0.05,]
tab <- tab[order(-tab[,2]),]
write.table(tab,file=paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_St_",i,".Table.c5.csv"),row.names=TRUE,col.names=NA,append=FALSE,sep=",")
}}
rm(max.x.rg.lgea)
rm(min.x.rg.lgea)
rm(max.y.rg.lgea)
#generate ID tables
rm(qs.Cor.table)
for (j in sets){
for (i in 1:Number.Clusters){
qs <- qsTable(get(paste0("qs.results.st.",j,".",i)),number=length(get(paste0("gene.set.",j))))
qs <- qs[(qs$FDR < 0.05) & (qs$log.fold.change > 0),1:2]
if (nrow(qs)==0){
qs <- NULL
qs$pathway.name <- "?"
qs$log.fold.change <- 0
qs <- as.data.frame(qs)
}
qs$Cluster <- i
if (i==1){
qs.Cor.table <- qs
} else {
qs.Cor.table <- rbind(qs.Cor.table,qs)
}}
rownames(qs.Cor.table) <- NULL
rm(qs)
write.table(qs.Cor.table,file=paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_St.Table.",j,".csv"),row.names=TRUE,col.names=NA,append=FALSE,sep=",")
assign(paste0("qs.Cor.table.St.",j),qs.Cor.table)
}
rm(qs.Cor.table)
rm(list=ls(pattern="^gene.set"))
#save StPop labels
for (j in sets){
sc10x.Group <- SetAllIdent(object=sc10x.Group,id="Lineage")
sc10x.Group.St <- SetAllIdent(object=sc10x.Group.St,id=paste0("res",opt$r))
for (i in 1:Number.Clusters){
qs <- qsTable(get(paste0("qs.results.st.",j,".",i)))
assign(paste0("clust.",i),names(sc10x.Group.St@ident[sc10x.Group.St@ident==i]))
sc10x.Group <- SetIdent(object=sc10x.Group,cells.use=get(paste0("clust.",i)),ident.use=as.character(factor(qs[which.max(qs$log.fold.change),1])))
sc10x.Group.St <- SetIdent(object=sc10x.Group.St,cells.use=get(paste0("clust.",i)),ident.use=as.character(factor(qs[which.max(qs$log.fold.change),1])))
}
sc10x.Group <- StashIdent(object=sc10x.Group,save.name=paste0("St.",j))
sc10x.Group.St <- StashIdent(object=sc10x.Group.St,save.name=paste0("St.",j))
}
rm(qs)
rm(list=ls(pattern="^clust."))
#generate tSNEs
for (i in sets){
sc10x.Group <- SetAllIdent(object=sc10x.Group,id=paste0("St.",i))
sc10x.Group.St <- SetAllIdent(object=sc10x.Group.St,id=paste0("St.",i))
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/tSNE_St.",i,".eps"))
plot <- TSNEPlot(object=sc10x.Group,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE)
plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20))
plot <- plot+guides(colour=guide_legend(override.aes=list(size=10)))
plot(plot)
dev.off()
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/tSNE_StSubPop.",i,".eps"))
plot <- TSNEPlot(object=sc10x.Group.St,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE)
plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20))
plot <- plot+guides(colour=guide_legend(override.aes=list(size=10)))
plot(plot)
dev.off()
}
rm(plot)
#make generic variables of run specific variables
for (i in sets){
Group.List <- c(Group.List,paste0("St.",i))
}
rm(i)
rm(j)
rm(sets)
rm(Number.Clusters)
opt.stsubclust <- opt
rm(opt)
#save QuSAGE.EpiPop.Rda,IDepi.Rda file,Cumulative.RData files
save(list=ls(pattern="^qs."),file=paste0("./analysis/sc10x.",Project.Name,".",opt.episubclust$g,sub.file,".QuSAGE.Epi.Rda"))
rm(list=ls(pattern="^qs."))
save(sc10x.Group,file=paste0("./analysis/sc10x.",Project.Name,".",opt.episubclust$g,".cluster",sub.file,".IDepi.Rda"))
rm(sc10x.Group)
save(sc10x.Group.St,file=paste0("./analysis/sc10x.",Project.Name,".",opt.stsubclust$g,".StSubcluster",sub.file,".IDepi.Rda"))
rm(sc10x.Group.St)
rm(sub.folder)
rm(sub.file)
save.image(file=paste0("./analysis/sc10x.",Project.Name,".Cumulative.RData"))
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment