Commit 087770ca authored by Gervaise Henry's avatar Gervaise Henry 🤠
Browse files

Start refactor RUN

parent 94067e3e
......@@ -8,7 +8,6 @@
#SBATCH --mail-type ALL
#SBATCH --mail-user gervaise.henry@utsouthwestern.edu
git pull origin Seurat3.0
rm ../analysis/*.rda
rm ../analysis/*.RData
rm -r ../analysis/cor/
......
......@@ -11,6 +11,6 @@
module load python/3.6.4-anaconda
source activate umap
module load R/3.5.1-gccmkl
module load R/3.6.1-gccmkl
module load hdf5_18/1.8.17
Rscript ../r.scripts/sc-TissueMapper_RUN.R --p "$1" --s "$2"
#!/bin/bash
# Input: project name (eg. huPr_Pd)
mkdir ../analysis/DATA/10x
for i in `cat ../analysis/DATA/${1}-demultiplex.csv`; do
if [[ ${i} = *Samples* ]]
if [[ ${i} != *Samples* ]]
then
continue
else
sample=`echo ${i} | cut -f1 -d ','`
mkdir ../analysis/DATA/10x/${sample}
ln -s /work/urology/ghenry/RNA-Seq/SingleCell/PIPELINE/DATA/"${sample}"/outs/* ../analysis/DATA/10x/${sample}
ln -s /work/urology/ghenry/RNA-Seq/SingleCell/ANALYSIS/DATA/"${sample}"/outs/* ../analysis/DATA/10x/${sample}
fi
done
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
"","p_val","avg_logFC","pct.1","pct.2","p_val_adj"
"JUN",0,2.78514474389602,0.953,0.609,0
"FOS",0,2.6180888667142,0.843,0.459,0
"EGR1",0,2.27744499658095,0.827,0.313,0
"ATF3",0,2.24777061964067,0.877,0.331,0
"JUNB",0,1.97088620438955,0.899,0.663,0
"GADD45B",0,1.92104125732309,0.72,0.423,0
"IER2",0,1.88242169361821,0.806,0.625,0
"ZFP36",0,1.86471330757214,0.93,0.655,0
"DNAJB1",0,1.81462475826555,0.91,0.467,0
"RHOB",0,1.70021664456101,0.72,0.179,0
"NR4A1",0,1.63532584305745,0.727,0.208,0
"UBC",0,1.6303655140661,0.991,0.936,0
van.den.Brink1
Fos
Hspa1a
Jun
Fosb
Junb
Egr1
Hspa1b
Ubc
Zfp36
Hspb1
Hsp90aa1
Mt2
Dnajb1
Btg2
Nr4a1
Cebpd
Hspa8
Mt1
Ier2
Dnaja1
Socs3
Atf3
Jund
Cebpb
Id3
Ppp1r15a
Hspe1
Cxcl1
Dusp1
Hsp90ab1
Nfkbia
Hsph1
Samples,ALL,Pz,Tz,D17,D27,D35
D17PrPzF_Via,1,1,0,1,0,0
D17PrTzF_Via,1,0,1,1,0,0
D27PrTzF_Via,1,0,1,0,1,0
D27PrPzF_Via,1,1,0,0,1,0
D35PrPzF_Via,1,1,0,0,0,1
D35PrTzF_Via,1,0,1,0,0,1
Samples,ALL,Keep,Zone
D17PrPzF_Via,1,1,Pz
D17PrTzF_Via,1,1,Tz
D27PrPzF_Via,1,1,Pz
D27PrTzF_Via,1,1,Tz
D35PrPzF_Via,1,1,Pz
D35PrTzF_Via,1,1,Tz
......@@ -21,8 +21,8 @@ options(bitmapType="cairo")
options(future.globals.maxSize= 1000000000000)
option_list=list(
make_option("--p",action="store",default="muBl",type='character',help="Project Name"),
make_option("--s",action="store",default="mu",type='character',help="Species")
make_option("--p",action="store",default="huPr_Pd",type='character',help="Project Name"),
make_option("--s",action="store",default="hu",type='character',help="Species")
)
opt=parse_args(OptionParser(option_list=option_list))
rm(option_list)
......@@ -37,7 +37,7 @@ project.name=opt$p
scFolders()
results <- scLoad(p=project.name,cellranger=3,aggr=FALSE,ncell=0,nfeat=0)
results <- scLoad(p=project.name,cellranger=4,aggr=FALSE,ncell=0,nfeat=0,seurat=TRUE)
sc10x <- results[[1]]
sc10x.groups <- results[[2]]
rm(results)
......@@ -79,10 +79,10 @@ counts.gene.filtered <- counts.gene.filtered.3gene
# genes.g2m <- results[[3]]
# rm(results)
sc10x <- sc10x[as.character(unlist(sc10x.groups[sc10x.groups$Deep==1,1]))]
sc10x <- sc10x[as.character(unlist(sc10x.groups[sc10x.groups$Keep==1,1]))]
merges <- NULL
for (i in names(counts.cell.filtered[as.character(unlist(sc10x.groups[sc10x.groups$Deep==1,1]))])){
for (i in names(counts.cell.filtered[as.character(unlist(sc10x.groups[sc10x.groups$Keep==1,1]))])){
if (counts.cell.filtered[[i]]<750){
merges <- c(merges,i)
}
......@@ -94,7 +94,7 @@ if (length(merges)>1){
sc10x$merge <- sc10x.merge
rm(sc10x.merge)
} else if (length(merges)==1){
merges <- c(merges,names(counts.cell.filtered[counts.cell.filtered == min(sapply(counts.cell.filtered[setdiff(as.character(unlist(sc10x.groups[sc10x.groups$Deep==1,1])),merges)],min))]))
merges <- c(merges,names(counts.cell.filtered[counts.cell.filtered == min(sapply(counts.cell.filtered[setdiff(as.character(unlist(sc10x.groups[sc10x.groups$Keep==1,1])),merges)],min))]))
sc10x.merge <- merge(x=sc10x[[merges[1]]],y=sc10x[merges[-1]])
sc10x[merges] <- NULL
sc10x$merge <- sc10x.merge
......@@ -102,7 +102,7 @@ if (length(merges)>1){
rm(merges)
if (length(sc10x)>1){
sc10x <- scCCA(sc10x)
sc10x <- scAlign(sc10x)
sc10x@project.name <- project.name
sc10x$ALL <- "ALL"
}
......@@ -115,12 +115,12 @@ rm(results)
sc10x <- scCluster(sc10x,res=0.5,red="pca",dim=pc.use.prestress,print="2",folder="pre.stress")
if (opt$s == "hu"){
genes.stress <- read_delim("./genesets/genes.deg.Stress.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE)
genes.stress <- read_delim("/work/urology/ghenry/RNA-Seq/SingleCell/ANALYSIS/REF/stress/genes.deg.Stress.csv",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE)
genes.stress <- genes.stress[1]
colnames(genes.stress) <- "scDWS.Stress"
anchor <- c("EGR1","FOS","JUN")
} else if (opt$s == "mu"){
genes.stress <- read_delim("./genesets/van.den.Brink1.txt",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE)
genes.stress <- read_delim("/work/urology/ghenry/RNA-Seq/SingleCell/ANALYSIS/REF/stress/van.den.Brink1.txt",",",escape_double=FALSE,trim_ws=TRUE,col_names=TRUE)
genes.stress <- genes.stress[1]
colnames(genes.stress) <- "van.den.Brink1.Stress"
anchor <- c("Egr1","Fos","Jun")
......@@ -186,5 +186,11 @@ DefaultAssay(object=sc10x) <- "SCT"
#scShinyOutput(sc10x,anal="raw")
save(sc10x,file=paste0("./analysis/sc10x.raw.rda"))
saveRDS(sc10x,paste0("/project/urology/Strand_lab/shared/cellxgene/seurat/",project.name,"_raw.rds"))
#save(sc10x,file=paste0("./analysis/sc10x.raw.rda"))
save.image(file="./analysis/sc10x.raw.RData")
library(sceasy)
library(reticulate)
use_condaenv('sceasy')
convertFormat(sc10x,from="seurat",to="anndata",outFile=paste0("/project/urology/Strand_lab/shared/cellxgene/anndata/",project.name,"_raw.h5ad"),assay="SCT",main_layer="scale.data")
......@@ -5,6 +5,8 @@
scFolders <- function(){
#Create analysis output folders
if (!dir.exists("./analysis/qc/")){
dir.create("./analysis/qc/")
}
......@@ -26,60 +28,19 @@ scFolders <- function(){
if (!dir.exists("./analysis/cor")){
dir.create("./analysis/cor")
}
if (!dir.exists("./analysis/shiny")){
dir.create("./analysis/shiny")
}
if (!dir.exists("./analysis/shiny")){
dir.create("./analysis/shiny")
}
for (i in c("raw","id","id.epi","id.fmst","id.st","id.leu")){
if (!dir.exists(paste0("./analysis/shiny/",i))){
dir.create(paste0("./analysis/shiny/",i))
}
if (!dir.exists(paste0("./analysis/shiny/",i,"/outs"))){
dir.create(paste0("./analysis/shiny/",i,"/outs"))
}
if (!dir.exists(paste0("./analysis/shiny/",i,"/outs/filtered_feature_bc_matrix"))){
dir.create(paste0("./analysis/shiny/",i,"/outs/filtered_feature_bc_matrix"))
}
if (!dir.exists(paste0("./analysis/shiny/",i,"/outs/analysis"))){
dir.create(paste0("./analysis/shiny/",i,"/outs/analysis"))
}
if (!dir.exists(paste0("./analysis/shiny/",i,"/outs/analysis/clustering"))){
dir.create(paste0("./analysis/shiny/",i,"/outs/analysis/clustering"))
}
if (!dir.exists(paste0("./analysis/shiny/",i,"/outs/analysis/diffexp"))){
dir.create(paste0("./analysis/shiny/",i,"/outs/analysis/diffexp"))
}
if (!dir.exists(paste0("./analysis/shiny/",i,"/outs/analysis/pca"))){
dir.create(paste0("./analysis/shiny/",i,"/outs/analysis/pca"))
}
if (!dir.exists(paste0("./analysis/shiny/",i,"/outs/analysis/pca/10_components"))){
dir.create(paste0("./analysis/shiny/",i,"/outs/analysis/pca/10_components"))
}
if (!dir.exists(paste0("./analysis/shiny/",i,"/outs/analysis/tsne"))){
dir.create(paste0("./analysis/shiny/",i,"/outs/analysis/tsne"))
}
if (!dir.exists(paste0("./analysis/shiny/",i,"/outs/analysis/tsne/2_components"))){
dir.create(paste0("./analysis/shiny/",i,"/outs/analysis/tsne/2_components"))
}
if (!dir.exists(paste0("./analysis/shiny/",i,"/outs/analysis/umap"))){
dir.create(paste0("./analysis/shiny/",i,"/outs/analysis/umap"))
}
if (!dir.exists(paste0("./analysis/shiny/",i,"/outs/analysis/umap/2_components"))){
dir.create(paste0("./analysis/shiny/",i,"/outs/analysis/umap/2_components"))
}
}
}
scLoad <- function(p,cellranger=3,aggr=TRUE,ncell=0,nfeat=0){
scLoad <- function(p,cellranger=3,aggr=TRUE,ncell=0,nfeat=0,seurat=FALSE){
#Load and prefilter filtered_gene_bc_matrices_mex output from cellranger
#Inputs:
#p = project name
#cellranger cellranger version number used for count/aggr, 2 or 3
#aggr = if the samples are already aggregated, TRUE if useing the output of aggr, FALSE if using outputs of each count
#cellranger = cellranger version number used for count/aggr, 2, 3, or 4
#aggr = if the samples are already aggregated, TRUE if using the output of aggr, FALSE if using outputs of each count
#ncell = minimum number of cells for initial feature filter
#nfeat = minimum number of features for initial cell filter
#seurat = if Seurat R objects per sample are already created, TRUE if using Seurat objects, FALSE if using output of count
#Outputs:
#sc10x = Seurat object list
......@@ -88,26 +49,43 @@ scLoad <- function(p,cellranger=3,aggr=TRUE,ncell=0,nfeat=0){
sc10x.groups <- read_csv(paste0("./analysis/DATA/",p,"-demultiplex.csv"))
#Load filtered_gene_bc_matrices output from cellranger
sc10x.data <- list()
sc10x <- list()
if (aggr==TRUE){
if (cellranger==2){
sc10x.data[aggr] <- Read10X(data.dir=paste0("./analysis/DATA/10x/filtered_gene_bc_matrices_mex/"))
if (seurat==FALSE) {
sc10x.data <- list()
if (aggr==TRUE){
if (cellranger==2){
sc10x.data[aggr] <- Read10X(data.dir=paste0("./analysis/DATA/10x/filtered_gene_bc_matrices_mex/"))
} else {
sc10x.data[aggr] <- Read10X(data.dir=paste0("./analysis/DATA/10x/filtered_feature_bc_matrix/"))
}
sc10x[aggr] <- new("seurat",raw.data=sc10x.data[aggr])
} else {
sc10x.data[aggr] <- Read10X(data.dir=paste0("./analysis/DATA/10x/filtered_feature_bc_matrix/"))
for (i in sc10x.groups$Samples){
if (cellranger==2){
sc10x.data[i] <- Read10X(data.dir=paste0("./analysis/DATA/10x/",i,"/filtered_gene_bc_matrices/"))
} else {
sc10x.data[i] <- Read10X(data.dir=paste0("./analysis/DATA/10x/",i,"/filtered_feature_bc_matrix/"))
}
sc10x[i] <- CreateSeuratObject(counts=sc10x.data[[i]],project=p,min.cells=ncell,min.features=nfeat)
sc10x[[i]]$samples <- i
}
}
sc10x[aggr] <- new("seurat",raw.data=sc10x.data[aggr])
} else {
for (i in sc10x.groups$Samples){
if (cellranger==2){
sc10x.data[i] <- Read10X(data.dir=paste0("./analysis/DATA/10x/",i,"/filtered_gene_bc_matrices/"))
} else {
sc10x.data[i] <- Read10X(data.dir=paste0("./analysis/DATA/10x/",i,"/filtered_feature_bc_matrix/"))
sc10x[i] <- readRDS(paste0("./analysis/DATA/10x/",i,"/",i,".rds"))
sc10x[[i]] <- sc10x[[i]]
}
}
if (length(colnames(sc10x.groups)[!(colnames(sc10x.groups) %in% c("Samples","ALL","Keep"))])!=0){
for (i in sc10x.groups$Samples){
Idents(sc10x[[i]],cells=1:ncol(sc10x[[i]])) <- i
sc10x[[i]]$samples <- Idents(sc10x[[i]])
for (j in colnames(sc10x.groups)[!(colnames(sc10x.groups) %in% c("Samples","ALL"))]){
Idents(sc10x[[i]],cells=1:ncol(sc10x[[i]])) <- sc10x.groups[sc10x.groups$Samples==i,colnames(sc10x.groups)==j]
sc10x[[i]]@meta.data <- sc10x[[i]]@meta.data[,c("nCount_RNA","nFeature_RNA","samples")]
}
sc10x[i] <- CreateSeuratObject(counts=sc10x.data[[i]],project=p,min.cells=ncell,min.features=nfeat)
sc10x[[i]]$samples <- i
}
}
......@@ -552,7 +530,7 @@ scPC <- function(sc10x,pc=50,hpc=0.9,file="pre.stress",print="tsne"){
}
scCCA <- function(sc10x.l){
scAlign <- function(sc10x.l){
for (i in 1:length(sc10x.l)){
#sc10x.l[[i]] <- NormalizeData(sc10x.l[[i]],verbose=FALSE)
gc()
......@@ -738,7 +716,7 @@ scScore <- function(sc10x,score,geneset,cut.pt=0.9,anchor=FALSE){
Idents(object=sc10x) <- "ALL"
predicate <- paste0(score,"1 >= ",cut.x)
Idents(object=sc10x, cells = WhichCells(object=sc10x,expression= predicate)) <- score
Idents(object=sc10x, cells = rownames(sc10x[[paste0(score,"1")]])[sc10x[[paste0(score,"1")]] >= cut.x]) <- score
sc10x[[score]] <- Idents(object=sc10x)
Idents(sc10x) <- score
sc10x.negative <- subset(x=sc10x,idents="ALL")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment