Skip to content
Snippets Groups Projects
Commit 03cc2e3f authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Add St QuSAGE for UK clusters

parent 8fd6047e
No related merge requests found
gc()
.libPaths("/home2/ghenry/R/x86_64-pc-linux-gnu-library/3.3")
library(optparse)
library(Seurat)
library(readr)
library(readxl)
library(qusage)
#retrive command line options
option_list=list(
make_option("--p",action="store",default="Pr",type='character',help="Project Name"),
make_option("--g",action="store",default="ALL",type='character',help="Group To Analyze"),
make_option("--r",action="store",default=0.1,type='numeric',help="Resolution"),
make_option("--st",action="store",default=TRUE,type='logical',help="Analyze Data With Stress Pops Removed?"),
make_option("--s",action="store",default=5000,type='integer',help="Number Of Cells To Downsample To"),
make_option("--go",action="store",default=TRUE,type='logical',help="Correlate to GO St?"),
make_option("--lgea",action="store",default=TRUE,type='logical',help="Correlate to LGEA St?"),
make_option("--c2",action="store",default=FALSE,type='logical',help="Correlate to c2?"),
make_option("--c5",action="store",default=FALSE,type='logical',help="Correlate to c5?")
)
opt=parse_args(OptionParser(option_list=option_list))
rm(option_list)
Project.Name <- opt$p
if (opt$st==TRUE){
sub.folder <- "/NoStress"
sub.file <- ".NOStress"
} else {
sub.folder <- ""
sub.file <- ""
}
#load data
setwd("../")
if (file.exists(paste0("./analysis/sc10x.",Project.Name,".",opt$g,".UKSubcluster",sub.file,".Rda"))){
load(paste0("./analysis/sc10x.",Project.Name,".Cumulative.RData"))
load(paste0("./analysis/sc10x.",Project.Name,".",opt$g,".cluster",sub.file,".IDlineage.Rda"))
sc10x.Group <- get(paste0("sc10x.",opt$g,".cluster",sub.file,".IDlineage"))
rm(list=paste0("sc10x.",opt$g,".cluster",sub.file,".IDlineage"))
load(paste0("./analysis/sc10x.",Project.Name,".",opt$g,".UKSubcluster",sub.file,".Rda"))
#create folder structure
if (!dir.exists(paste0("./analysis/",opt$g))){
dir.create(paste0("./analysis/",opt$g))
}
if (!dir.exists(paste0("./analysis/",opt$g,"/global"))){
dir.create(paste0("./analysis/",opt$g,"/global"))
}
if (!dir.exists(paste0("./analysis/",opt$g,"/global",sub.folder))){
dir.create(paste0("./analysis/",opt$g,"/global",sub.folder))
}
if (!dir.exists(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation"))){
dir.create(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation"))
}
#load resolution
sc10x.Group <- SetAllIdent(object=sc10x.Group,id="Lineage")
sc10x.Group.UK <- SetAllIdent(object=sc10x.Group.UK,id=paste0("res",opt$r))
#downsample if number given
if (!is.na(opt$s)){
rnd <- sample(1:ncol(sc10x.Group.UK@data),opt$s)
} else {
rnd <- 1:ncol(sc10x.Group.UK@data)
}
eset <- as.data.frame(as.matrix(sc10x.Group.UK@data))
eset <- eset[,rnd]
labels <- paste0("Cluster_",as.vector(factor(sc10x.Group.UK@ident)))
labels <- labels[rnd]
rm(rnd)
#make QuSAGE labels
Number.Clusters <- length(unique(sc10x.Group.UK@ident))
C <- list()
cC <- list()
for (i in 1:Number.Clusters){
t <- labels
t[!(t %in% paste0("Cluster_",i))] <- "REST"
C[i] <- list(i=t)
rm(t)
cC[i] <- paste0("Cluster_",i,"-REST")
}
rm(labels)
#load QuSAGE genesets
sets <- NULL
if (opt$go==TRUE){
sets=c(sets,"go")
}
if (opt$lgea==TRUE){
sets=c(sets,"lgea")
}
gene.set2 <- read_delim("./genesets/DEG_C5.BP.M11704.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE)
gene.set2 <- as.list(gene.set2[-1,])
names(gene.set2) <- "Endo"
gene.set <- c(gene.set2)
gene.set2 <- read_delim("./genesets/DEG_C5.BP.M10794.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE)
gene.set2 <- as.list(gene.set2[-1,])
names(gene.set2) <- "SM"
gene.set <- c(gene.set,gene.set2)
gene.set2 <- read_delim("./genesets/DEG_C5.BP.M13024.txt","\t",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE)
gene.set2 <- as.list(gene.set2[-1,])
names(gene.set2) <- "Fib"
gene.set.go <- c(gene.set,gene.set2)
rm(gene.set)
rm(gene.set2)
gene.set3 <- read_excel("./genesets/journal.pcbi.1004575.s026.XLSX",skip=2)
gene.set3 <- gene.set3[,c(3,9)]
gene.set2 <- gene.set3[gene.set3$`CELL TYPE`=="C1 : Proliferative Fibroblast",]
gene.set2 <- as.list(gene.set2[!(is.na(gene.set2$`hu Gene Symbol`)),1])
names(gene.set2) <- "ProlFib"
gene.set <- c(gene.set2)
gene.set2 <- gene.set3[gene.set3$`CELL TYPE`=="C2: Myofibroblast / Smooth Muscle-like",]
gene.set2 <- as.list(gene.set2[!(is.na(gene.set2$`hu Gene Symbol`)),1])
names(gene.set2) <- "MyoFibSM"
gene.set <- c(gene.set,gene.set2)
gene.set2 <- gene.set3[gene.set3$`CELL TYPE`=="C3: Pericyte",]
gene.set2 <- as.list(gene.set2[!(is.na(gene.set2$`hu Gene Symbol`)),1])
names(gene.set2) <- "Peri"
gene.set <- c(gene.set,gene.set2)
gene.set2 <- gene.set3[gene.set3$`CELL TYPE`=="C5: Matrix Fibroblast",]
gene.set2 <- as.list(gene.set2[!(is.na(gene.set2$`hu Gene Symbol`)),1])
names(gene.set2) <- "MatFib"
gene.set <- c(gene.set,gene.set2)
gene.set2 <- gene.set3[gene.set3$`CELL TYPE`=="C7: Endothelial Cells",]
gene.set2 <- as.list(gene.set2[!(is.na(gene.set2$`hu Gene Symbol`)),1])
names(gene.set2) <- "EndoC"
gene.set <- c(gene.set,gene.set2)
gene.set2 <- gene.set3[gene.set3$`CELL TYPE`=="C8: Myeloid / Immune Cells",]
gene.set2 <- as.list(gene.set2[!(is.na(gene.set2$`hu Gene Symbol`)),1])
names(gene.set2) <- "MylImm"
gene.set.lgea <- c(gene.set,gene.set2)
rm(gene.set)
rm(gene.set2)
rm(gene.set3)
gene.set.c2 <- read.gmt("./genesets/c2.all.v6.1.symbols.gmt")
gene.set.c5 <- read.gmt("./genesets/c5.all.v6.1.symbols.gmt")
#run QuSAGE
for (i in 1:Number.Clusters){
if (opt$go==TRUE){
assign(paste0("qs.results.uk.go.",i),qusage(eset,unlist(C[i]),unlist(cC[i]),gene.set.go))
}
if (opt$lgea==TRUE){
assign(paste0("qs.results.uk.lgea.",i),qusage(eset,unlist(C[i]),unlist(cC[i]),gene.set.lgea))
}
if (opt$c2==TRUE){
assign(paste0("qs.results.uk.c2.",i),qusage(eset,unlist(C[i]),unlist(cC[i]),gene.set.c2))
}
if (opt$c5==TRUE){
assign(paste0("qs.results.uk.c5.",i),qusage(eset,unlist(C[i]),unlist(cC[i]),gene.set.c5))
}
}
rm(eset)
rm(C)
rm(cC)
#generate correlation figures
if (opt$go==TRUE){
max.x.rg.go <- 0
min.x.rg.go <- 0
max.y.rg.go <- 0
for (i in 1:Number.Clusters){
qs <- get(paste0("qs.results.uk.go.",i))
if (max(qs$path.mean)>max.x.rg.go){
max.x.rg.go <- max(qs$path.mean)
if (min(qs$path.mean)<min.x.rg.go){
min.x.rg.go <- min(qs$path.mean)
}}
if (max(qs$path.PDF)>max.y.rg.go){
max.y.rg.go <- max(qs$path.PDF)
}}}
if (opt$lgea==TRUE){
max.x.rg.lgea <- 0
min.x.rg.lgea <- 0
max.y.rg.lgea <- 0
for (i in 1:Number.Clusters){
qs <- get(paste0("qs.results.uk.lgea.",i))
if (max(qs$path.mean)>max.x.rg.lgea){
max.x.rg.lgea <- max(qs$path.mean)
if (min(qs$path.mean)<min.x.rg.lgea){
min.x.rg.lgea <- min(qs$path.mean)
}}
if (max(qs$path.PDF)>max.y.rg.lgea){
max.y.rg.lgea <- max(qs$path.PDF)
}}}
for (i in 1:Number.Clusters){
if (opt$go==TRUE){
qs <- get(paste0("qs.results.uk.go.",i))
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_UK_",i,".GO.eps"))
plotDensityCurves(qs,path.index=1:3,col=1:numPathways(qs),main=paste0("Cluster ",i),xlim=c(min.x.rg.lgea-0.05,max.x.rg.lgea+0.05),ylim=c(0,50*ceiling(max.y.rg.lgea/50)),xlab="Gene Set Activation")
legend("topright",legend=c("Enod","SM","Fib"),lty=1,col=1:numPathways(qs),lwd=1,cex=1,ncol=1)
dev.off()
}
if (opt$lgea==TRUE){
qs <- get(paste0("qs.results.uk.lgea.",i))
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_UK_",i,".LGEA.eps"))
plotDensityCurves(qs,path.index=1:6,col=1:numPathways(qs),main=paste0("Cluster ",i),xlim=c(min.x.rg.lgea-0.05,max.x.rg.lgea+0.05),ylim=c(0,50*ceiling(max.y.rg.lgea/50)),xlab="Gene Set Activation")
legend("topright",legend=c("ProlFib","MyoFibSM","Peri","MatFib","EndoC","MylImm"),lty=1,col=1:numPathways(qs),lwd=1,cex=1,ncol=2)
dev.off()
}
if (opt$c2==TRUE){
qs <- get(paste0("qs.results.uk.c2.",i))
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_UK_",i,".CI.c2.eps"))
plotCIs(qs,path.index=1:numPathways(qs))
dev.off()
tab <- qsTable(qs,number=numPathways(qs),sort.by="logFC")
tab <- tab[tab$FDR<0.05,]
tab <- tab[order(-tab[,2]),]
write.table(tab,file=paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_UK_",i,".Table.c2.csv"),row.names=TRUE,col.names=NA,append=FALSE,sep=",")
}
if (opt$c5==TRUE){
qs <- get(paste0("qs.results.uk.c5.",i))
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_UK_",i,".CI.c5.eps"))
plotCIs(qs,path.index=1:numPathways(qs))
dev.off()
tab <- qsTable(qs,number=numPathways(qs),sort.by="logFC")
tab <- tab[tab$FDR<0.05,]
tab <- tab[order(-tab[,2]),]
write.table(tab,file=paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_UK_",i,".Table.c5.csv"),row.names=TRUE,col.names=NA,append=FALSE,sep=",")
}}
rm(max.x.rg.go)
rm(min.x.rg.go)
rm(max.y.rg.go)
rm(max.x.rg.lgea)
rm(min.x.rg.lgea)
rm(max.y.rg.lgea)
#generate ID tables
rm(qs.Cor.table)
for (j in sets){
for (i in 1:Number.Clusters){
qs <- qsTable(get(paste0("qs.results.uk.",j,".",i)),number=length(get(paste0("gene.set.",j))))
qs <- qs[(qs$FDR < 0.05) & (qs$log.fold.change > 0),1:2]
if (nrow(qs)==0){
qs <- NULL
qs$pathway.name <- "?"
qs$log.fold.change <- 0
qs <- as.data.frame(qs)
}
qs$Cluster <- i
if (i==1){
qs.Cor.table <- qs
} else {
qs.Cor.table <- rbind(qs.Cor.table,qs)
}}
rownames(qs.Cor.table) <- NULL
rm(qs)
write.table(qs.Cor.table,file=paste0("./analysis/",opt$g,"/global",sub.folder,"/correlation/QuSAGE_UK.Table.",j,".csv"),row.names=TRUE,col.names=NA,append=FALSE,sep=",")
assign(paste0("qs.Cor.table.uk.",j),qs.Cor.table)
}
rm(qs.Cor.table)
rm(list=ls(pattern="^gene.set"))
#save StPop labels
for (j in sets){
sc10x.Group <- SetAllIdent(object=sc10x.Group,id="Lineage")
sc10x.Group.UK <- SetAllIdent(object=sc10x.Group.UK,id=paste0("res",opt$r))
for (i in 1:Number.Clusters){
qs <- qsTable(get(paste0("qs.results.uk.",j,".",i)))
assign(paste0("clust.",i),names(sc10x.Group.UK@ident[sc10x.Group.UK@ident==i]))
sc10x.Group <- SetIdent(object=sc10x.Group,cells.use=get(paste0("clust.",i)),ident.use=as.character(factor(qs[which.max(qs$log.fold.change),1])))
sc10x.Group.UK <- SetIdent(object=sc10x.Group.UK,cells.use=get(paste0("clust.",i)),ident.use=as.character(factor(qs[which.max(qs$log.fold.change),1])))
}
sc10x.Group <- StashIdent(object=sc10x.Group,save.name=paste0("UK.",j))
sc10x.Group.UK <- StashIdent(object=sc10x.Group.UK,save.name=paste0("UK.",j))
}
rm(qs)
rm(list=ls(pattern="^clust."))
#generate tSNEs
for (i in sets){
sc10x.Group <- SetAllIdent(object=sc10x.Group,id=paste0("UK.",i))
sc10x.Group.UK <- SetAllIdent(object=sc10x.Group.UK,id=paste0("UK.",i))
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/tSNE_UK.",i,".eps"))
plot <- TSNEPlot(object=sc10x.Group,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE)
plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20))
plot <- plot+guides(colour=guide_legend(override.aes=list(size=10)))
plot(plot)
dev.off()
postscript(paste0("./analysis/",opt$g,"/global",sub.folder,"/tSNE_UKSubPop.",i,".eps"))
plot <- TSNEPlot(object=sc10x.Group.UK,pt.size=5,do.label=TRUE,label.size=10,do.return=TRUE)
plot <- plot+theme(axis.text.x=element_text(size=20),axis.text.y=element_text(size=20),axis.title.x=element_text(size=20),axis.title.y=element_text(size=20),legend.text=element_text(size=20))
plot <- plot+guides(colour=guide_legend(override.aes=list(size=10)))
plot(plot)
dev.off()
}
rm(plot)
#make generic variables of run specific variables
for (i in sets){
Group.List <- c(Group.List,paste0("UK.",i))
}
rm(i)
rm(j)
rm(sets)
rm(Number.Clusters)
opt.UKsubclust <- opt
rm(opt)
#save QuSAGE.UK.Rda,IDepi+st.Rda file,Cumulative.RData files
save(list=ls(pattern="^qs."),file=paste0("./analysis/sc10x.",Project.Name,".",opt.UKsubclust$g,sub.file,".QuSAGE.UK.Rda"))
rm(list=ls(pattern="^qs."))
save(sc10x.Group,file=paste0("./analysis/sc10x.",Project.Name,".",opt.UKsubclust$g,".cluster",sub.file,".IDepi+st.Rda"))
rm(sc10x.Group)
save(sc10x.Group.UK,file=paste0("./analysis/sc10x.",Project.Name,".",opt.UKsubclust$g,".UKSubcluster",sub.file,".IDst.Rda"))
rm(sc10x.Group.UK)
rm(sub.folder)
rm(sub.file)
save.image(file=paste0("./analysis/sc10x.",Project.Name,".Cumulative.RData"))
}
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment