diff --git a/scripts/05_female_only.R b/scripts/05_female_only.R
new file mode 100644
index 0000000000000000000000000000000000000000..2a5e2eb46a758baf7a34354663c322ff552b9d90
--- /dev/null
+++ b/scripts/05_female_only.R
@@ -0,0 +1,178 @@
+# Separation and reprocessing of female data
+# Author: John T Lafin
+# Updated: 2024-10-18
+
+print("Script begin at: ")
+Sys.time()
+
+# Setup -------------------------------------------------------------------
+
+# Load packages
+library(dplyr)
+library(Seurat)
+library(intrinsicDimension)
+library(ggplot2)
+library(harmony)
+library(future)
+library(future.apply)
+
+# Future initialization
+future_plan <- "multisession" # Can use multicore if not on Rstudio or Windows
+n_workers <- 4
+plan(future_plan, workers=n_workers)  
+options(future.globals.maxSize = 8000 * 1024^2)
+set.seed(42)
+
+# Create output dir
+outdir <- "data_modified/05_female_only/"
+dir.create(outdir, showWarnings = FALSE)
+
+# Create pre-processing function
+preprocess <- function(x, 
+                       cluster.res = seq(0.1,1.5,0.1), 
+                       default.res = "RNA_snn_res.0.4", 
+                       batch.var = "Sample"){
+  x <- x %>%
+    NormalizeData() %>%
+    FindVariableFeatures() %>%
+    ScaleData() %>%
+    RunPCA() %>%
+    RunHarmony(group.by.vars = batch.var)
+  
+  numPCs <- maxLikGlobalDimEst(x@reductions$harmony@cell.embeddings, 
+                               k = 10)$dim.est %>%
+    round()
+  x <- x %>%
+    RunUMAP(dims = 1:numPCs,
+            reduction = "harmony",
+            metric = "euclidean",
+            min.dist = 0.1) %>%
+    FindNeighbors(reduction = "harmony",
+                  dims = 1:numPCs) %>%
+    FindClusters(resolution = cluster.res) %>%
+    SetIdent(value = default.res)
+  return(x)
+}
+
+# Load data ---------------------------------------------------------------
+
+seu <- readRDS("data_modified/04_annotation/objects/global.rds")
+
+## Subset only female by lineage
+
+lin.list <- subset(seu, subset = Sex == "female") %>%
+  SplitObject(split.by="Lineage")
+
+## Preprocess each lineage
+lin.list <- future_lapply(lin.list, preprocess, default.res="cellannotation")
+
+# Update annotation -------------------------------------------------------
+
+## Epithelia
+Idents(lin.list$Epithelia) <- "RNA_snn_res.0.5"
+cluster.ids <- c("0" = "intermediate cell of transitional epithelia",
+                 "1" = "basal cell of transitional epithelia",
+                 "2" = "basal cell of squamous epithelia",
+                 "3" = "intermediate cell of squamous epithelia",
+                 "4" = "club cell",
+                 "5" = "intermediate cell of squamous epithelia",
+                 "6" = "intermediate cell of squamous epithelia",
+                 "7" = "luminal cell of squamous epithelia",
+                 "8" = "proliferative cells",
+                 "9" = "basal cell of transitional epithelia")
+lin.list$Epithelia <- RenameIdents(lin.list$Epithelia, cluster.ids) %>%
+  StashIdent("cellannotation")
+
+### Label neuroendocrine cells
+lin.list$Epithelia$NE.score <- NULL
+NE.markers <- strsplit("CHGA GRP CALCA SCG2 TPH1 ASCL1 SCG5 PHGR1 MIAT MAOB", split = " ")
+lin.list$Epithelia <- AddModuleScore(lin.list$Epithelia, 
+                       features = NE.markers,
+                       name = "NE.score")
+lin.list$Epithelia@meta.data <- rename(lin.list$Epithelia@meta.data, NE.score = NE.score1)
+lin.list$Epithelia@meta.data <- lin.list$Epithelia@meta.data %>%
+  mutate(cellannotation = if_else(NE.score > 0.3, "neuroendocrine cell", cellannotation))
+Idents(lin.list$Epithelia) <- "cellannotation"
+
+## Endothelia
+Idents(lin.list$Endothelia) <- "RNA_snn_res.0.5"
+cluster.ids <- c("0" = "venous endothelia",
+                 "1" = "venous endothelia",
+                 "2" = "venous endothelia",
+                 "3" = "venous endothelia",
+                 "4" = "capillary endothelia",
+                 "5" = "arterial endothelia",
+                 "6" = "lymphatic endothelia",
+                 "7" = "venous endothelia")
+lin.list$Endothelia <- RenameIdents(lin.list$Endothelia, cluster.ids) %>%
+  StashIdent("cellannotation")
+
+## Leukocytes
+Idents(lin.list$Leukocyte) <- "RNA_snn_res.0.4"
+cluster.ids <- c("0" = "macrophage",
+                 "1" = "T cell",
+                 "2" = "NK cell",
+                 "3" = "mast cell",
+                 "4" = "B cell",
+                 "5" = "T cell")
+lin.list$Leukocyte <- RenameIdents(lin.list$Leukocyte, cluster.ids) %>%
+  StashIdent("cellannotation")
+
+## Stroma
+Idents(lin.list$Stroma) <- "RNA_snn_res.1"
+cluster.ids <- c("0" = "smooth muscle cell",
+                 "1" = "pericyte",
+                 "2" = "minor interstiital fibroblast",
+                 "3" = "major interstitial fibroblast",
+                 "4" = "smooth muscle cell",
+                 "5" = "smooth muscle cell",
+                 "6" = "vascular smooth muscle cell",
+                 "7" = "periepithelial fibroblast of vagina",
+                 "8" = "periepithelial fibroblast of urethra",
+                 "9" = "smooth muscle cell",
+                 "10" = "endoneurial fibroblast",
+                 "11" = "glial cell",
+                 "12" = "pericyte",
+                 "13" = "major interstitial fibroblast",
+                 "14" = "pericyte",
+                 "15" = "smooth muscle cell",
+                 "16" = "glial cell")
+lin.list$Stroma <- RenameIdents(lin.list$Stroma, cluster.ids) %>%
+  StashIdent("cellannotation")
+
+# Recombine ---------------------------------------------------------------
+
+seu <- merge(lin.list[[1]], lin.list[2:length(lin.list)])
+seu <- preprocess(seu, default.res="cellannotation")
+
+# Save outputs ------------------------------------------------------------
+
+## Save dot plots
+plotdir <- paste0(outdir, "dot_plots/")
+dir.create(plotdir, showWarnings=FALSE)
+
+gene.list <- list(
+  "Epithelia" = strsplit("AKR1C1 AKR1C2 MKI67 TOP2A PIGR LCN2 KRT6A CHGA KRT5 BCAM", split=" ")[[1]],
+  "Endothelia" = strsplit("LYVE1 EFNB2 ACKR1 TXNIP VWF", split=" ")[[1]],
+  "Leukocyte" = strsplit("LYVE1 C1QA CD163 PTPRC IL7R CD8A GZMK HLA-DRA KIT MS4A1 CD79A JCHAIN GNLY GZMB", split = " ")[[1]],
+  "Stroma" = strsplit("C7 SFRP4 STC1 APOE APOD NGFR THY1 CD36 DES ACTG2 PRUNE2 MCAM BCAM RGS5 DLK1 MYF5 CDH19 GPM6B", split = " ")[[1]]
+)
+for (lin in names(lin.list)) {
+  DotPlot(lin.list[[lin]], features=gene.list[[lin]], cluster.idents=TRUE) +
+    theme(axis.text.x=element_text(angle=45, hjust=1))
+  ggsave(paste0(plotdir, lin, ".png"),
+         height=8.5,
+         width=11)
+}
+
+## Save objects
+objdir <- paste0(outdir, "objects/")
+dir.create(objdir, showWarnings=FALSE)
+
+saveRDS(seu, paste0(objdir, "global_female.rds"))
+for (lin in names(lin.list)) {
+  saveRDS(lin.list[[lin]], paste0(objdir, lin, "_female.rds"))
+}
+
+## Save session info
+capture.output(sessionInfo(), file = paste0(outdir, "session_info.txt"))