Skip to content
Snippets Groups Projects
Commit ce300d01 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch 'refactor' into 'develop'

Refactor

See merge request !5
parents 94067e3e 1210457a
Branches
Tags
2 merge requests!6Develop,!5Refactor
Showing
with 770 additions and 36752 deletions
......@@ -8,37 +8,9 @@
#SBATCH --mail-type ALL
#SBATCH --mail-user gervaise.henry@utsouthwestern.edu
git pull origin Seurat3.0
rm ../analysis/*.rda
rm ../analysis/*.RData
rm -r ../analysis/cor/
rm -r ../analysis/qc/
rm -r ../analysis/score_id/
rm -r ../analysis/shiny/
rm -r ../analysis/vis/
module load python/3.6.4-anaconda
source activate umap
module load R/3.5.1-gccmkl
module load gdal
module load cairo/1.14.8
module load R/4.0.2-gccmkl
module load hdf5_18/1.8.17
Rscript ../r.scripts/sc-TissueMapper_RUN.R --p "$1" --s "$2"
module unload R/3.5.1-gccmkl
module load R/3.6.1-gccmkl
if [[ "$1" == "huPr_PdPgb" ]] || [[ "$1" == "huPr_PdPb" ]] || [[ "$1" == "huBl" ]]
then
Rscript ../r.scripts/SingleR.R --p "$1" --s "$2" --o "$3"
elif [[ "$1" == "muPrUr" ]]
then
Rscript ../r.scripts/huPr_muPr.R --p "$1" --r "$3"
fi
module unload R/3.5.1-gccmkl
module load R/3.6.1-gccmkl
if [[ "$1" == "huPr_PdPgb" ]]
then
Rscript ../r.scripts/diy_PdPgb.R
elif [[ "$1" == "muPrUr" ]]
then
Rscript ../r.scripts/diy_muPrUr_Epi.R
fi
Rscript ../r.scripts/sc-TissueMapper_RAW.R --p "$1" --s "$2" --m "$3"
Rscript ../r.scripts/sc-TissueMapper_ID.R --p "$1" --s "$2" --o "$4"
#!/bin/bash
#SBATCH --job-name sc10x.id
#SBATCH -p 128GB,256GB,256GBv1,384GB
#SBATCH -p 256GB,256GBv1,384GB
#SBATCH -N 1
#SBATCH -t 7-0:0:0
#SBATCH -o job_%j.out
......@@ -8,9 +8,8 @@
#SBATCH --mail-type ALL
#SBATCH --mail-user gervaise.henry@utsouthwestern.edu
module load python/3.6.4-anaconda
source activate umap
module load R/3.6.1-gccmkl
module load gdal
module load cairo/1.14.8
module load R/4.0.2-gccmkl
module load hdf5_18/1.8.17
Rscript ../r.scripts/SingleR.R --p "$1" --s "$2" --o "$3"
Rscript ../r.scripts/sc-TissueMapper_ID.R --p "$1" --s "$2" --o "$4"
......@@ -8,9 +8,8 @@
#SBATCH --mail-type ALL
#SBATCH --mail-user gervaise.henry@utsouthwestern.edu
module load python/3.6.4-anaconda
source activate umap
module load R/3.5.1-gccmkl
module load gdal
module load cairo/1.14.8
module load R/4.0.2-gccmkl
module load hdf5_18/1.8.17
Rscript ../r.scripts/sc-TissueMapper_RUN.R --p "$1" --s "$2"
Rscript ../r.scripts/sc-TissueMapper_RAW.R --p "$1" --s "$2" --m "$3"
#!/bin/bash
# Input: project name (eg. huPr_Pd)
mkdir ../analysis/DATA/10x
for i in `cat ../analysis/DATA/${1}-demultiplex.csv`; do
if [[ ${i} = *Samples* ]]
if [[ ${i} != *Samples* ]]
then
continue
else
sample=`echo ${i} | cut -f1 -d ','`
mkdir ../analysis/DATA/10x/${sample}
ln -s /work/urology/ghenry/RNA-Seq/SingleCell/PIPELINE/DATA/"${sample}"/outs/* ../analysis/DATA/10x/${sample}
ln -s /work/urology/ghenry/RNA-Seq/SingleCell/ANALYSIS/DATA/"${sample}"/outs/* ../analysis/DATA/10x/${sample}
fi
done
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
File deleted
"","p_val","avg_logFC","pct.1","pct.2","p_val_adj"
"JUN",0,2.78514474389602,0.953,0.609,0
"FOS",0,2.6180888667142,0.843,0.459,0
"EGR1",0,2.27744499658095,0.827,0.313,0
"ATF3",0,2.24777061964067,0.877,0.331,0
"JUNB",0,1.97088620438955,0.899,0.663,0
"GADD45B",0,1.92104125732309,0.72,0.423,0
"IER2",0,1.88242169361821,0.806,0.625,0
"ZFP36",0,1.86471330757214,0.93,0.655,0
"DNAJB1",0,1.81462475826555,0.91,0.467,0
"RHOB",0,1.70021664456101,0.72,0.179,0
"NR4A1",0,1.63532584305745,0.727,0.208,0
"UBC",0,1.6303655140661,0.991,0.936,0
File deleted
van.den.Brink1
Fos
Hspa1a
Jun
Fosb
Junb
Egr1
Hspa1b
Ubc
Zfp36
Hspb1
Hsp90aa1
Mt2
Dnajb1
Btg2
Nr4a1
Cebpd
Hspa8
Mt1
Ier2
Dnaja1
Socs3
Atf3
Jund
Cebpb
Id3
Ppp1r15a
Hspe1
Cxcl1
Dusp1
Hsp90ab1
Nfkbia
Hsph1
Samples,ALL,Pz,Tz,D17,D27,D35
D17PrPzF_Via,1,1,0,1,0,0
D17PrTzF_Via,1,0,1,1,0,0
D27PrTzF_Via,1,0,1,0,1,0
D27PrPzF_Via,1,1,0,0,1,0
D35PrPzF_Via,1,1,0,0,0,1
D35PrTzF_Via,1,0,1,0,0,1
Samples,ALL,Keep,Zone
D17PrPzF_Via,1,1,Pz
D17PrTzF_Via,1,1,Tz
D27PrPzF_Via,1,1,Pz
D27PrTzF_Via,1,1,Tz
D35PrPzF_Via,1,1,Pz
D35PrTzF_Via,1,1,Tz
gc()
library(methods)
library(optparse)
library(Seurat)
library(readr)
library(readxl)
library(fBasics)
library(pastecs)
library(qusage)
library(RColorBrewer)
library(dplyr)
library(viridis)
library(gridExtra)
library(SingleR)
library(sctransform)
library(autothresholdr)
library(ggplot2)
options(bitmapType="cairo")
option_list=list(
make_option("--p",action="store",default="Biopsy",type='character',help="Project Name"),
make_option("--s",action="store",default="hu",type='character',help="Species"),
make_option("--o",action="store",default="pr",type='character',help="Organ")
)
opt=parse_args(OptionParser(option_list=option_list))
rm(option_list)
setwd("../")
source("./r.scripts/sc-TissueMapper_functions.R")
source("./r.scripts/sc-TissueMapper_process.R")
if (opt$s == "hu"){
hpca.se <- HumanPrimaryCellAtlasData()
lin.se <- hpca.se
leu.se <- hpca.se
} else if (opt$s == "mu"){
igd.se <- ImmGenData()
#lin.se <- igd.se
#leu.se <- igd.se
lin.se <- subset(igd.se,select=(as.numeric(gsub("_","",substr(colnames(igd.se),4,10))) >= 920648 & as.numeric(gsub("_","",substr(colnames(igd.se),4,10))) <= 2112477))
lin.se <- subset(lin.se,select=!(as.numeric(gsub("_","",substr(colnames(lin.se),4,10))) %in% 1398469:1398471))
leu.se <- lin.se
}
if (opt$o == "pr" && opt$s == "hu"){
load("./genesets/Pd.shallow.Rda")
try(
if (as.numeric(substring(sc10x@version,1,1))<3){
sc10x <- UpdateSeuratObject(sc10x)
}
)
scDWSpr.se <- as.SingleCellExperiment(sc10x,assay="RNA")
scDWSpr.se$Lin[scDWSpr.se$Lin == "Unknown"] <- "Leu"
levels(scDWSpr.se$ident)[levels(scDWSpr.se$ident)=="OE2"] <- "Hillock"
levels(scDWSpr.se$ident)[levels(scDWSpr.se$ident)=="OE1"] <- "Club"
rm(sc10x)
}
load("./analysis/sc10x.raw.rda")
sc10x.se <- as.SingleCellExperiment(sc10x)
common <- intersect(rownames(sc10x.se),rownames(lin.se))
lin.se <- lin.se[common,]
sc10x.se <- sc10x.se[common,]
rm(common)
singler.lin <- SingleR(sc10x.se,ref=lin.se,method="cluster",clusters=sc10x.se$integrated_snn_res.5,labels=lin.se$label.fine,BPPARAM=MulticoreParam(workers=10))
sc10x$lin <- singler.lin$labels[match(sc10x.se$integrated_snn_res.5,singler.lin@rownames)]
#singler.lin <- SingleR(sc10x.se,ref=lin.se,method="single",labels=lin.se$label.main,BPPARAM=MulticoreParam(workers=10))
#sc10x$lin <- singler.lin$labels
sc10x$lin[sc10x$lin %in% unique(lin.se@colData[lin.se@colData[,"label.main"] %in% c(
c("DC","B_cell","Neutrophil","T_cells","Monocyte","Macrophage","NK_cell","Neutrophils","CMP","GMP","MEP","Myelocyte","Pre-B_cell_CD34-","Pro-B_cell_CD34+","Pro-Myelocyte","HSC_-G-CSF","HSC_CD34+"),
c("Macrophages","Monocytes","B cells","DC","Eosinophils","Neutrophils","T cells","ILC","NK cells","Basophils","Mast cells","Tgd","NKT","B cells, pro","Microglia")
),c("label.main","label.fine")])$label.fine] <- "Leu"
sc10x$lin[sc10x$lin %in% unique(lin.se@colData[lin.se@colData[,"label.main"] %in% c(
c("Endothelial_cells","Erythroblast","Platelets"),
c("Endothelial cells")
),c("label.main","label.fine")])$label.fine] <- "Endo"
sc10x$lin[sc10x$lin %in% unique(lin.se@colData[lin.se@colData[,"label.main"] %in% c(
c("Smooth_muscle_cells","Fibroblasts","Chondrocytes","Osteoblasts","MSC","Tissue_stem_cells","iPS_cells"),
c("Stromal cells","Fibroblasts")
),c("label.main","label.fine")])$label.fine] <- "FMSt"
sc10x$lin[sc10x$lin %in% unique(lin.se@colData[lin.se@colData[,"label.main"] %in% c(
c("Epithelial_cells","Keratinocytes","Neuroepithelial_cell"),
c("Epithelial cells")
),c("label.main","label.fine")])$label.fine] <- "Epi"
#DimPlot(sc10x,group.by="lin",reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none")
Idents(sc10x) <- "lin"
sc10x.epi <- subset(sc10x,idents="Epi")
sc10x.fmst <- subset(sc10x,idents="FMSt")
sc10x.st <- subset(sc10x,idents=setdiff(levels(sc10x),"Epi"))
sc10x.leu <- subset(sc10x,idents="Leu")
if (opt$o == "pr" && opt$s == "hu"){
sc10x.se.epi <- as.SingleCellExperiment(sc10x.epi,assay="RNA")
scDWSpr.se.epi <- scDWSpr.se[,scDWSpr.se$Lin=="Epi",]
common <- intersect(rownames(sc10x.se.epi),rownames(scDWSpr.se.epi))
scDWSpr.se.epi <- scDWSpr.se.epi[common,]
sc10x.se.epi <- sc10x.se.epi[common,]
rm(common)
sc10x.se.fmst <- as.SingleCellExperiment(sc10x.fmst,assay="RNA")
scDWSpr.se.fmst <- scDWSpr.se[,scDWSpr.se$Merge_Epi.dws_St.go %in% c("Fib","SM"),]
common <- intersect(rownames(sc10x.se.fmst),rownames(scDWSpr.se.fmst))
scDWSpr.se.fmst <- scDWSpr.se.fmst[common,]
sc10x.se.fmst <- sc10x.se.fmst[common,]
rm(common)
singler.epi <- SingleR(sc10x.se.epi,ref=scDWSpr.se.epi,labels=scDWSpr.se.epi$ident,BPPARAM=MulticoreParam(workers=10))
sc10x.epi$scDWSpr <- singler.epi$labels
Idents(sc10x.epi) <- "scDWSpr"
singler.fmst <- SingleR(sc10x.se.fmst,ref=scDWSpr.se.fmst,labels=scDWSpr.se.fmst$ident,BPPARAM=MulticoreParam(workers=10))
sc10x.fmst$scDWSpr <- singler.fmst$labels
Idents(sc10x.fmst) <- "scDWSpr"
}
sc10x.se.leu <- as.SingleCellExperiment(sc10x.leu,assay="RNA")
leu.se <- leu.se[,leu.se$label.main %in% c("DC","B_cell","Neutrophil","T_cells","Monocyte","Macrophage","NK_cell","Neutrophils","CMP","GMP","MEP","Myelocyte","Pre-B_cell_CD34-","Pro-B_cell_CD34+","Pro-Myelocyte","Macrophages","Monocytes","B cells","Eosinophils","T cells","ILC","NK cells","Basophils","Mast cells","Tgd","NKT","B cells, pro","Microglia")]
common <- intersect(rownames(sc10x.se.leu),rownames(leu.se))
leu.se <- leu.se[common,]
sc10x.se.leu <- sc10x.se.leu[common,]
rm(common)
singler.leu <- SingleR(sc10x.se.leu,ref=leu.se,labels=leu.se$label.main,BPPARAM=MulticoreParam(workers=10))
sc10x.leu$leu <- singler.leu$labels
Idents(sc10x.leu) <- "leu"
#DimPlot(sc10x.leu,group.by="leu",reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none")
sc10x$pops <- sc10x$lin
sc10x$pops[names(sc10x.leu$leu)] <- sc10x.leu$leu
sc10x.epi$pops <- sc10x.epi$lin
sc10x.fmst$pops <- sc10x.fmst$lin
sc10x.st$pops <- sc10x.st$lin
sc10x.leu$pops <- sc10x.leu$leu
if (opt$o == "pr" && opt$s == "hu"){
sc10x$pops[names(sc10x.epi$scDWSpr)] <- sc10x.epi$scDWSpr
sc10x.epi$pops <- sc10x.epi$scDWSpr
sc10x$pops[names(sc10x.fmst$scDWSpr)] <- sc10x.fmst$scDWSpr
sc10x.fmst$pops <- sc10x.fmst$scDWSpr
sc10x.st$pops[names(sc10x.fmst$scDWSpr)] <- sc10x.fmst$scDWSpr
sc10x$pops[names(sc10x.leu$leu)] <- sc10x.leu$leu
sc10x.st$pops[names(sc10x.fmst$scDWSpr)] <- sc10x.fmst$scDWSpr
}
#DimPlot(sc10x,group.by="pops",reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none")
sc10x$leu <- "non-Leu"
sc10x$leu[names(sc10x.leu$leu)] <- sc10x.leu$leu
#DimPlot(sc10x,group.by="leu",reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none")
res <- c(seq(0.1,0.5,0.1),0.75,seq(1,5,1))
try({
results <- scPC(sc10x.epi,pc=100,hpc=0.9,file="epi",print="2")
sc10x.epi <- results[[1]]
pc.use.epi <- results[[2]]
rm(results)
sc10x.epi <- scCluster(sc10x.epi,res=res,red="pca",dim=pc.use.epi,print="2",folder="epi")
})
try({
results <- scPC(sc10x.fmst,pc=100,hpc=0.9,file="fmst",print="2")
sc10x.fmst <- results[[1]]
pc.use.fmst <- results[[2]]
rm(results)
sc10x.fmst <- scCluster(sc10x.fmst,res=res,red="pca",dim=pc.use.fmst,print="2",folder="fmst")
})
try({
results <- scPC(sc10x.st,pc=100,hpc=0.9,file="st",print="2")
sc10x.st <- results[[1]]
pc.use.st <- results[[2]]
rm(results)
sc10x.st <- scCluster(sc10x.st,res=res,red="pca",dim=pc.use.st,print="2",folder="st")
})
try({
results <- scPC(sc10x.leu,pc=100,hpc=0.9,file="leu",print="2")
sc10x.leu <- results[[1]]
pc.use.leu <- results[[2]]
rm(results)
sc10x.leu <- scCluster(sc10x.leu,res=res,red="pca",dim=pc.use.leu,print="2",folder="leu")
})
if (!dir.exists(paste0("./analysis/vis/singler"))){
dir.create(paste0("./analysis/vis/singler"))
}
plot <- DimPlot(sc10x,reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none")
postscript(paste0("./analysis/vis/singler/UMAP_all.lin.eps"))
print(plot)
dev.off()
Idents(sc10x) <- "pops"
Idents(sc10x.epi) <- "pops"
Idents(sc10x.fmst) <- "pops"
Idents(sc10x.st) <- "pops"
Idents(sc10x.leu) <- "leu"
plot <- DimPlot(sc10x,reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none")
postscript(paste0("./analysis/vis/singler/UMAP_all.eps"))
print(plot)
dev.off()
for (i in c("epi","fmst","st","leu")){
plot <- DimPlot(get(paste0("sc10x.",i)),reduction="umap",label=TRUE,repel=TRUE)+theme(legend.position="none")
postscript(paste0("./analysis/vis/singler/UMAP_",i,".eps"))
print(plot)
dev.off()
}
rm(list=ls(pattern="^sc10x.se"))
#scShinyOutput(sc10x,anal="id")
#scShinyOutput(sc10x.epi,anal="id.epi")
#scShinyOutput(sc10x.fmst,anal="id.fmst")
#scShinyOutput(sc10x.st,anal="id.st")
#scShinyOutput(sc10x.leu,anal="id.leu")
save(list=ls(pattern="^singler"),file='./analysis/singler_objects.RData')
save(list=ls(pattern="^sc10x"),file='./analysis/sc10x.id.rda')
save(list=ls(pattern="^sc10x.epi"),file='./analysis/sc10x.epi.id.rda')
save(list=ls(pattern="^sc10x.st"),file='./analysis/sc10x.st.id.rda')
#save.image(file="./analysis/sc10x.id.RData")
This diff is collapsed.
gc()
library(optparse)
library(Seurat)
library(readr)
library(dplyr)
library(autothresholdr)
library(RColorBrewer)
library(viridis)
library(gridExtra)
library(ggplot2)
options(future.globals.maxSize= 1000000000000)
option_list=list(
make_option("--p",action="store",default="huPr_PdPb",type='character',help="Project Name"),
make_option("--s",action="store",default="hu",type='character',help="Species"),
make_option("--m",action="store",default="Patient",type='character',help="Group to merge on")
)
opt=parse_args(OptionParser(option_list=option_list))
rm(option_list)
setwd("../")
source("./r.scripts/sc-TissueMapper_functions.R")
source("./r.scripts/sc-TissueMapper_process.R")
project.name=opt$p
scFolders()
results <- scLoad(p=project.name,cellranger=4,aggr=FALSE,ncell=0,nfeat=0,seurat=TRUE)
sc10x <- results[[1]]
sc10x.groups <- results[[2]]
rm(results)
sc10x <- sc10x[as.character(unlist(sc10x.groups[sc10x.groups$ALL==1,1]))]
results <- scQC(sc10x,sp=opt$s,feature=c("nFeature_RNA"))
sc10x <- results[[1]]
counts.cell.raw <- results[[2]]
counts.gene.raw <- results[[3]]
counts.cell.filtered.1umi <- results[[4]]
counts.gene.filtered.1umi <- results[[5]]
rm(results)
results <- scQC(sc10x,sp=opt$s,feature="percent.mito")
sc10x <- results[[1]]
counts.cell.filtered.2mito <- results[[4]]
counts.gene.filtered.2mito <- results[[5]]
rm(results)
results <- scQC(sc10x,sp=opt$s,feature="percent.ribo")
sc10x <- results[[1]]
counts.cell.filtered.3ribo <- results[[4]]
counts.gene.filtered.3ribo <- results[[5]]
rm(results)
results <- scQC(sc10x,sp=opt$s,feature="nCount_RNA")
sc10x <- results[[1]]
counts.cell.filtered.4gene <- results[[4]]
counts.gene.filtered.4gene <- results[[5]]
rm(results)
#sc10x <- lapply(sc10x,function(x) subset(x,subset= nCount_RNA>2500))
#counts.cell.filtered.3gene <- lapply(sc10x,ncol)
#counts.gene.filtered.3gene <- lapply(sc10x,nrow)
counts.cell.filtered <- counts.cell.filtered.4gene
counts.gene.filtered <- counts.gene.filtered.4gene
save.image(file="./analysis/sc10x.raw_filtered.RData")
sc10x <- sc10x[as.character(unlist(sc10x.groups[sc10x.groups$Keep==1,1]))]
pc.use.prestress <- list()
for (i in names(sc10x)){
sc10x.temp <- sc10x[[i]]
sc10x.temp <- SCTransform(sc10x.temp,vars.to.regress=c("nFeature_RNA","percent.mito"),verbose=FALSE,assay="RNA")
if (ncol(sc10x.temp) > 100) {
pc.calc <- 100
} else if (ncol(sc10x.temp) <= 100) {
pc.calc <- ncol(sc10x.temp)-1
}
results <- scPC(sc10x.temp,pc=pc.calc,hpc=0.9,file=paste0(i,".pre.stress"),print="umap",assay="SCT")
sc10x.temp <- results[[1]]
pc.use.prestress.temp <- results[[2]]
rm(results)
sc10x.temp <- scCluster(sc10x.temp,res=0.5,red="pca",dim=pc.use.prestress.temp,print="umap",folder=paste0("preStress/",i),assay="SCT")
sc10x[i] <- sc10x.temp
pc.use.prestress[i] <- pc.use.prestress.temp
rm(sc10x.temp)
rm(pc.calc)
rm(pc.use.prestress.temp)
}
rm(i)
if (opt$s == "hu"){
genes.stress <- read_delim("/work/urology/ghenry/RNA-Seq/SingleCell/ANALYSIS/REF/stress/genes.deg.Stress.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE)
genes.stress <- genes.stress[1]
colnames(genes.stress) <- "scDWS.Stress"
anchor <- c("EGR1","FOS","JUN")
} else if (opt$s == "mu"){
genes.stress <- read_delim("/work/urology/ghenry/RNA-Seq/SingleCell/ANALYSIS/REF/stress/van.den.Brink1.txt",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE)
genes.stress <- genes.stress[1]
colnames(genes.stress) <- "van.den.Brink1.Stress"
anchor <- c("Egr1","Fos","Jun")
}
results <- scScore(sc10x,score="Stress",geneset=as.list(genes.stress),cut.pt="triangle",anchor=anchor)
#sc10x.preStress <- results[[1]]
sc10x <- results[[2]]
rm(results)
counts.cell.destress <- lapply(sc10x,ncol)
rm(anchor)
# merges <- NULL
# for (i in names(counts.cell.destress[as.character(unlist(sc10x.groups[sc10x.groups$Keep==1,1]))])){
# if (counts.cell.destress[[i]]<750){
# merges <- c(merges,i)
# }
# }
# rm(i)
# if (length(merges)>1){
# sc10x.merge <- merge(x=sc10x[[merges[1]]],y=sc10x[merges[-1]])
# sc10x[merges] <- NULL
# sc10x$merge <- sc10x.merge
# rm(sc10x.merge)
# } else if (length(merges)==1){
# merges <- c(merges,names(counts.cell.destress[counts.cell.destress == min(sapply(counts.cell.destress[setdiff(as.character(unlist(sc10x.groups[sc10x.groups$Keep==1,1])),merges)],min))]))
# sc10x.merge <- merge(x=sc10x[[merges[1]]],y=sc10x[merges[-1]])
# sc10x[merges] <- NULL
# sc10x$merge <- sc10x.merge
# }
# rm(merges)
merges <- NULL
for (i in unique(sc10x.groups[[opt$m]][sc10x.groups$Keep==1])){
for (j in sc10x.groups$Samples[sc10x.groups$Keep==1]){
if (sc10x.groups[sc10x.groups$Samples==j,][[opt$m]]==i){
merges <- c(merges,j)
}
}
if (length(merges)>1){
sc10x.merge <- merge(x=sc10x[[merges[1]]],y=sc10x[merges[-1]])
} else {
sc10x.merge <- sc10x[[merges[1]]]
}
sc10x[merges] <- NULL
sc10x[i] <- sc10x.merge
merges <- NULL
rm(sc10x.merge)
}
rm(merges)
rm(i)
rm(j)
gc()
if (length(sc10x)>1){
sc10x <- scAlign(sc10x)
sc10x@project.name <- project.name
sc10x$ALL <- "ALL"
}
gc()
save.image(file="./analysis/sc10x.raw_aligned.RData")
# if (ncol(sc10x) > 100) {
# pc.calc <- 100
# } else if (ncol(sc10x) <= 100) {
# pc.calc <- ncol(sc10x)-1
# }
# results <- scPC(sc10x,pc=pc.calc,hpc=0.9,file=paste0("pre.stress"),print="umap",assay="integrated")
# sc10x <- results[[1]]
# pc.use.prestress <- results[[2]]
# rm(results)
# sc10x <- scCluster(sc10x,res=0.5,red="pca",dim=pc.use.prestress,print="umap",folder=paste0("preStress"),assay="integrated")
# rm(pc.calc)
#
# results <- scScore(list(ALL=sc10x),score="Stress",geneset=as.list(genes.stress),cut.pt="triangle",anchor=anchor)
# #sc10x.preStress <- results[[1]]$ALL
# sc10x <- results[[2]]$ALL
# #sc10x <- results[[1]]$ALL
# rm(results)
# #counts.cell.destress <- lapply(sc10x,ncol)
# counts.cell.destress <- ncol(sc10x)
# rm(anchor)
#
# #sc10x <- SCTransform(sc10x,vars.to.regress=c("nFeature_RNA","percent.mito","Stress1"),verbose=FALSE,return.only.var.genes=FALSE,assay="RNA")
#
# save.image(file="./analysis/sc10x.raw_destress.RData")
if (ncol(sc10x) > 1000) {
pc.calc <- 1000
} else if (ncol(sc10x) > 500) {
pc.calc <- 500
} else if (ncol(sc10x) > 100) {
pc.calc <- ncol(sc10x)-1
}
results <- scPC(sc10x,pc=pc.calc,hpc=0.9,file="ALL",print="umap",assay="integrated")
sc10x <- results[[1]]
pc.use.poststress <- results[[2]]
rm(results)
rm(pc.calc)
res <- c(seq(0.1,0.5,0.1),0.75,seq(1,5,1))
sc10x <- scCluster(sc10x,res=res,red="pca",dim=pc.use.poststress,print="umap",folder="ALL",assay="integrated")
DefaultAssay(object=sc10x) <- "SCT"
sc10x@meta.data <- sc10x@meta.data[,c("samples","integrated_snn_res.0.1","integrated_snn_res.0.2","integrated_snn_res.0.3","integrated_snn_res.0.4","integrated_snn_res.0.5","integrated_snn_res.0.75","integrated_snn_res.1","integrated_snn_res.2","integrated_snn_res.3","integrated_snn_res.4","integrated_snn_res.5","nCount_RNA","nFeature_RNA","percent.mito","percent.ribo","Stress1")]
saveRDS(sc10x,paste0("./analysis/",project.name,"_raw.rds"))
library(sceasy)
library(reticulate)
use_condaenv('sceasy')
convertFormat(sc10x,from="seurat",to="anndata",outFile=paste0("/project/urology/Strand_lab/shared/cellxgene/anndata/",project.name,"_raw.h5ad"),assay="SCT",main_layer="scale.data")
saveRDS(sc10x,paste0("/project/urology/Strand_lab/shared/cellxgene/seurat/",project.name,"_raw.rds"))
rm(list=ls(pattern="sc10x"))
save.image(file="./analysis/sc10x.raw.RData")
gc()
library(methods)
library(optparse)
library(Seurat)
library(readr)
library(readxl)
library(fBasics)
library(pastecs)
library(qusage)
library(RColorBrewer)
library(monocle)
library(dplyr)
library(viridis)
library(gridExtra)
library(SingleR)
library(sctransform)
library(autothresholdr)
library(ggplot2)
options(bitmapType="cairo")
options(future.globals.maxSize= 1000000000000)
option_list=list(
make_option("--p",action="store",default="muBl",type='character',help="Project Name"),
make_option("--s",action="store",default="mu",type='character',help="Species")
)
opt=parse_args(OptionParser(option_list=option_list))
rm(option_list)
setwd("../")
source("./r.scripts/sc-TissueMapper_functions.R")
source("./r.scripts/sc-TissueMapper_process.R")
project.name=opt$p
scFolders()
results <- scLoad(p=project.name,cellranger=3,aggr=FALSE,ncell=0,nfeat=0)
sc10x <- results[[1]]
sc10x.groups <- results[[2]]
rm(results)
sc10x <- sc10x[as.character(unlist(sc10x.groups[sc10x.groups$ALL==1,1]))]
results <- scQC(sc10x,sp=opt$s,feature=c("nFeature_RNA"))
sc10x <- results[[1]]
counts.cell.raw <- results[[2]]
counts.gene.raw <- results[[3]]
counts.cell.filtered.1umi <- results[[4]]
counts.gene.filtered.1umi <- results[[5]]
rm(results)
results <- scQC(sc10x,sp=opt$s,feature="percent.mito")
sc10x <- results[[1]]
counts.cell.filtered.2mito <- results[[4]]
counts.gene.filtered.2mito <- results[[5]]
rm(results)
results <- scQC(sc10x,sp=opt$s,feature="nCount_RNA")
sc10x <- results[[1]]
counts.cell.filtered.3gene <- results[[4]]
counts.gene.filtered.3gene <- results[[5]]
rm(results)
#results <- scQC(sc10x,sp=opt$s,feature="percent.ribo")
#sc10x <- results[[1]]
#counts.cell.filtered.4ribo <- results[[4]]
#counts.gene.filtered.4ribo <- results[[5]]
#rm(results)
counts.cell.filtered <- counts.cell.filtered.3gene
counts.gene.filtered <- counts.gene.filtered.3gene
# results <- scCellCycle(sc10x,sub=FALSE,sp="hu")
# sc10x <- results[[1]]
# genes.s <- results[[2]]
# genes.g2m <- results[[3]]
# rm(results)
sc10x <- sc10x[as.character(unlist(sc10x.groups[sc10x.groups$Deep==1,1]))]
merges <- NULL
for (i in names(counts.cell.filtered[as.character(unlist(sc10x.groups[sc10x.groups$Deep==1,1]))])){
if (counts.cell.filtered[[i]]<750){
merges <- c(merges,i)
}
}
rm(i)
if (length(merges)>1){
sc10x.merge <- merge(x=sc10x[[merges[1]]],y=sc10x[merges[-1]])
sc10x[merges] <- NULL
sc10x$merge <- sc10x.merge
rm(sc10x.merge)
} else if (length(merges)==1){
merges <- c(merges,names(counts.cell.filtered[counts.cell.filtered == min(sapply(counts.cell.filtered[setdiff(as.character(unlist(sc10x.groups[sc10x.groups$Deep==1,1])),merges)],min))]))
sc10x.merge <- merge(x=sc10x[[merges[1]]],y=sc10x[merges[-1]])
sc10x[merges] <- NULL
sc10x$merge <- sc10x.merge
}
rm(merges)
if (length(sc10x)>1){
sc10x <- scCCA(sc10x)
sc10x@project.name <- project.name
sc10x$ALL <- "ALL"
}
results <- scPC(sc10x,pc=100,hpc=0.9,file="pre.stress",print="2")
sc10x <- results[[1]]
pc.use.prestress <- results[[2]]
rm(results)
sc10x <- scCluster(sc10x,res=0.5,red="pca",dim=pc.use.prestress,print="2",folder="pre.stress")
if (opt$s == "hu"){
genes.stress <- read_delim("./genesets/genes.deg.Stress.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE)
genes.stress <- genes.stress[1]
colnames(genes.stress) <- "scDWS.Stress"
anchor <- c("EGR1","FOS","JUN")
} else if (opt$s == "mu"){
genes.stress <- read_delim("./genesets/van.den.Brink1.txt",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE)
genes.stress <- genes.stress[1]
colnames(genes.stress) <- "van.den.Brink1.Stress"
anchor <- c("Egr1","Fos","Jun")
}
results <- scScore(sc10x,score="Stress",geneset=as.list(genes.stress),cut.pt="renyi",anchor=anchor)
sc10x.preStress <- results[[1]]
sc10x <- results[[2]]
rm(results)
counts.cell.destress <- as.list(table(sc10x$samples))
rm(anchor)
#if (opt$p == "muPr" || opt$p == "muPrUr"){
# hto <- as.character(unlist(sc10x.groups[sc10x.groups$HTO==1,1]))
# sc10x.hto.data <- Read10X(data.dir=paste0("./analysis/DATA/10x/",hto,"/filtered_feature_bc_matrix/"))
# sc10x.hto <- CreateSeuratObject(counts=sc10x.hto.data$`Gene Expression`,project=hto)
# sc10x.hto[["HTO"]] <- CreateAssayObject(counts=sc10x.hto.data$`Antibody Capture`)
# rm(sc10x.hto.data)
# sc10x.hto <- NormalizeData(object=sc10x.hto,assay="HTO",normalization.method="CLR")
# sc10x.hto <- HTODemux(sc10x.hto,assay="HTO",positive.quantile=.99)
# sc10x.hto <- subset(sc10x.hto,cells=substr(names(sc10x$samples)[sc10x$samples == paste0(hto,"_GEX")],start=1,stop=16))
# # HTOHeatmap(sc10x.hto)
# # plot1 <- FeatureScatter(sc10x.hto,feature1 = "hto_Anterior-Prostate",feature2 = "hto_Ventral-Prostate",group.by = "HTO_maxID")
# # plot2 <- FeatureScatter(sc10x.hto,feature1 = "hto_Anterior-Prostate",feature2 = "hto_Dorsolateral-Prostate",group.by = "HTO_maxID")
# # plot3 <- FeatureScatter(sc10x.hto,feature1 = "hto_Dorsolateral-Prostate",feature2 = "hto_Ventral-Prostate",group.by = "HTO_maxID")
# # grid.arrange(plot1,plot2,plot3,ncol=1)
# Idents(sc10x) <- "samples"
# Idents(sc10x,cells=paste0(names(sc10x.hto$HTO_maxID)[sc10x.hto$HTO_maxID=="Anterior-Prostate"],"_4")) <- paste0(hto,"_AP")
# Idents(sc10x,cells=paste0(names(sc10x.hto$HTO_maxID)[sc10x.hto$HTO_maxID=="Dorsolateral-Prostate"],"_4")) <- paste0(hto,"_DLP")
# Idents(sc10x,cells=paste0(names(sc10x.hto$HTO_maxID)[sc10x.hto$HTO_maxID=="Ventral-Prostate"],"_4")) <- paste0(hto,"_VP")
# sc10x$HTO_maxID <- Idents(sc10x)
# sc10x$samples_HTO <- Idents(sc10x)
# Idents(sc10x,cells=paste0(names(sc10x.hto$hash.ID)[sc10x.hto$hash.ID=="Anterior-Prostate"],"_4")) <- paste0(hto,"_AP")
# Idents(sc10x,cells=paste0(names(sc10x.hto$hash.ID)[sc10x.hto$hash.ID=="Dorsolateral-Prostate"],"_4")) <- paste0(hto,"_DLP")
# Idents(sc10x,cells=paste0(names(sc10x.hto$hash.ID)[sc10x.hto$hash.ID=="Ventral-Prostate"],"_4")) <- paste0(hto,"_VP")
# Idents(sc10x,cells=paste0(names(sc10x.hto$hash.ID)[sc10x.hto$hash.ID=="Negative"],"_4")) <- paste0(hto,"_negative")
# Idents(sc10x,cells=paste0(names(sc10x.hto$hash.ID)[sc10x.hto$hash.ID=="Doublet"],"_4")) <- paste0(hto,"_doublet")
# sc10x$hash.ID <- Idents(sc10x)
#}
results <- scPC(sc10x,pc=100,hpc=0.9,file="post.stress",print="2")
sc10x <- results[[1]]
pc.use.poststress <- results[[2]]
rm(results)
res <- c(seq(0.1,0.5,0.1),0.75,seq(1,5,1))
sc10x <- scCluster(sc10x,res=res,red="pca",dim=pc.use.poststress,print="2",folder="ALL")
#if (opt$p == "muPr" || opt$p == "muPrUr"){
# rm(sc10x.hto.data)
# rm(sc10x.hto)
# Idents(sc10x) <- "samples"
# sc10x.tmp <- subset(sc10x,ident=paste0(hto,"_GEX"))
# postscript(paste0("./analysis/vis/ALL/HTO_HTO_maxID.eps"))
# DimPlot(sc10x.tmp,reduction="umap",group.by="HTO_maxID")
# dev.off()
# postscript(paste0("./analysis/vis/ALL/HTO_hash.ID.eps"))
# DimPlot(sc10x.tmp,reduction="umap",group.by="hash.ID")
# dev.off()
#}
DefaultAssay(object=sc10x) <- "SCT"
#scShinyOutput(sc10x,anal="raw")
save(sc10x,file=paste0("./analysis/sc10x.raw.rda"))
save.image(file="./analysis/sc10x.raw.RData")
This diff is collapsed.
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment