diff --git a/scripts/05_female_only.R b/scripts/05_female_only.R new file mode 100644 index 0000000000000000000000000000000000000000..2a5e2eb46a758baf7a34354663c322ff552b9d90 --- /dev/null +++ b/scripts/05_female_only.R @@ -0,0 +1,178 @@ +# Separation and reprocessing of female data +# Author: John T Lafin +# Updated: 2024-10-18 + +print("Script begin at: ") +Sys.time() + +# Setup ------------------------------------------------------------------- + +# Load packages +library(dplyr) +library(Seurat) +library(intrinsicDimension) +library(ggplot2) +library(harmony) +library(future) +library(future.apply) + +# Future initialization +future_plan <- "multisession" # Can use multicore if not on Rstudio or Windows +n_workers <- 4 +plan(future_plan, workers=n_workers) +options(future.globals.maxSize = 8000 * 1024^2) +set.seed(42) + +# Create output dir +outdir <- "data_modified/05_female_only/" +dir.create(outdir, showWarnings = FALSE) + +# Create pre-processing function +preprocess <- function(x, + cluster.res = seq(0.1,1.5,0.1), + default.res = "RNA_snn_res.0.4", + batch.var = "Sample"){ + x <- x %>% + NormalizeData() %>% + FindVariableFeatures() %>% + ScaleData() %>% + RunPCA() %>% + RunHarmony(group.by.vars = batch.var) + + numPCs <- maxLikGlobalDimEst(x@reductions$harmony@cell.embeddings, + k = 10)$dim.est %>% + round() + x <- x %>% + RunUMAP(dims = 1:numPCs, + reduction = "harmony", + metric = "euclidean", + min.dist = 0.1) %>% + FindNeighbors(reduction = "harmony", + dims = 1:numPCs) %>% + FindClusters(resolution = cluster.res) %>% + SetIdent(value = default.res) + return(x) +} + +# Load data --------------------------------------------------------------- + +seu <- readRDS("data_modified/04_annotation/objects/global.rds") + +## Subset only female by lineage + +lin.list <- subset(seu, subset = Sex == "female") %>% + SplitObject(split.by="Lineage") + +## Preprocess each lineage +lin.list <- future_lapply(lin.list, preprocess, default.res="cellannotation") + +# Update annotation ------------------------------------------------------- + +## Epithelia +Idents(lin.list$Epithelia) <- "RNA_snn_res.0.5" +cluster.ids <- c("0" = "intermediate cell of transitional epithelia", + "1" = "basal cell of transitional epithelia", + "2" = "basal cell of squamous epithelia", + "3" = "intermediate cell of squamous epithelia", + "4" = "club cell", + "5" = "intermediate cell of squamous epithelia", + "6" = "intermediate cell of squamous epithelia", + "7" = "luminal cell of squamous epithelia", + "8" = "proliferative cells", + "9" = "basal cell of transitional epithelia") +lin.list$Epithelia <- RenameIdents(lin.list$Epithelia, cluster.ids) %>% + StashIdent("cellannotation") + +### Label neuroendocrine cells +lin.list$Epithelia$NE.score <- NULL +NE.markers <- strsplit("CHGA GRP CALCA SCG2 TPH1 ASCL1 SCG5 PHGR1 MIAT MAOB", split = " ") +lin.list$Epithelia <- AddModuleScore(lin.list$Epithelia, + features = NE.markers, + name = "NE.score") +lin.list$Epithelia@meta.data <- rename(lin.list$Epithelia@meta.data, NE.score = NE.score1) +lin.list$Epithelia@meta.data <- lin.list$Epithelia@meta.data %>% + mutate(cellannotation = if_else(NE.score > 0.3, "neuroendocrine cell", cellannotation)) +Idents(lin.list$Epithelia) <- "cellannotation" + +## Endothelia +Idents(lin.list$Endothelia) <- "RNA_snn_res.0.5" +cluster.ids <- c("0" = "venous endothelia", + "1" = "venous endothelia", + "2" = "venous endothelia", + "3" = "venous endothelia", + "4" = "capillary endothelia", + "5" = "arterial endothelia", + "6" = "lymphatic endothelia", + "7" = "venous endothelia") +lin.list$Endothelia <- RenameIdents(lin.list$Endothelia, cluster.ids) %>% + StashIdent("cellannotation") + +## Leukocytes +Idents(lin.list$Leukocyte) <- "RNA_snn_res.0.4" +cluster.ids <- c("0" = "macrophage", + "1" = "T cell", + "2" = "NK cell", + "3" = "mast cell", + "4" = "B cell", + "5" = "T cell") +lin.list$Leukocyte <- RenameIdents(lin.list$Leukocyte, cluster.ids) %>% + StashIdent("cellannotation") + +## Stroma +Idents(lin.list$Stroma) <- "RNA_snn_res.1" +cluster.ids <- c("0" = "smooth muscle cell", + "1" = "pericyte", + "2" = "minor interstiital fibroblast", + "3" = "major interstitial fibroblast", + "4" = "smooth muscle cell", + "5" = "smooth muscle cell", + "6" = "vascular smooth muscle cell", + "7" = "periepithelial fibroblast of vagina", + "8" = "periepithelial fibroblast of urethra", + "9" = "smooth muscle cell", + "10" = "endoneurial fibroblast", + "11" = "glial cell", + "12" = "pericyte", + "13" = "major interstitial fibroblast", + "14" = "pericyte", + "15" = "smooth muscle cell", + "16" = "glial cell") +lin.list$Stroma <- RenameIdents(lin.list$Stroma, cluster.ids) %>% + StashIdent("cellannotation") + +# Recombine --------------------------------------------------------------- + +seu <- merge(lin.list[[1]], lin.list[2:length(lin.list)]) +seu <- preprocess(seu, default.res="cellannotation") + +# Save outputs ------------------------------------------------------------ + +## Save dot plots +plotdir <- paste0(outdir, "dot_plots/") +dir.create(plotdir, showWarnings=FALSE) + +gene.list <- list( + "Epithelia" = strsplit("AKR1C1 AKR1C2 MKI67 TOP2A PIGR LCN2 KRT6A CHGA KRT5 BCAM", split=" ")[[1]], + "Endothelia" = strsplit("LYVE1 EFNB2 ACKR1 TXNIP VWF", split=" ")[[1]], + "Leukocyte" = strsplit("LYVE1 C1QA CD163 PTPRC IL7R CD8A GZMK HLA-DRA KIT MS4A1 CD79A JCHAIN GNLY GZMB", split = " ")[[1]], + "Stroma" = strsplit("C7 SFRP4 STC1 APOE APOD NGFR THY1 CD36 DES ACTG2 PRUNE2 MCAM BCAM RGS5 DLK1 MYF5 CDH19 GPM6B", split = " ")[[1]] +) +for (lin in names(lin.list)) { + DotPlot(lin.list[[lin]], features=gene.list[[lin]], cluster.idents=TRUE) + + theme(axis.text.x=element_text(angle=45, hjust=1)) + ggsave(paste0(plotdir, lin, ".png"), + height=8.5, + width=11) +} + +## Save objects +objdir <- paste0(outdir, "objects/") +dir.create(objdir, showWarnings=FALSE) + +saveRDS(seu, paste0(objdir, "global_female.rds")) +for (lin in names(lin.list)) { + saveRDS(lin.list[[lin]], paste0(objdir, lin, "_female.rds")) +} + +## Save session info +capture.output(sessionInfo(), file = paste0(outdir, "session_info.txt"))