From 61f7a461c3dcc3f68dd2ddcf4816fad803b5be99 Mon Sep 17 00:00:00 2001
From: John Lafin <john.lafin@utsouthwestern.edu>
Date: Mon, 13 Jan 2025 15:55:57 -0600
Subject: [PATCH] Add pseudobulk DEA

---
 scripts/06_pseudobulk.R | 101 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 scripts/06_pseudobulk.R

diff --git a/scripts/06_pseudobulk.R b/scripts/06_pseudobulk.R
new file mode 100644
index 0000000..357a0ad
--- /dev/null
+++ b/scripts/06_pseudobulk.R
@@ -0,0 +1,101 @@
+# Pseudobulk comparison of male vs female urethra
+# Author: John T Lafin
+# Updated: 2025-01-10
+
+# Load packages
+library(dplyr)
+library(ggplot2)
+library(Seurat)
+library(edgeR)
+library(future)
+library(future.apply)
+
+# Set up parallelization
+future_plan <- "multisession" # Can use multicore if not on Rstudio or Windows
+n_workers <- 4
+plan(future_plan, workers=n_workers)  
+options(future.globals.maxSize = 8000 * 1024^2)
+set.seed(42)
+
+# Create output dir
+outdir <- "data_modified/06_pseudobulk/"
+dir.create(outdir, showWarnings = FALSE)
+
+# Load integrated data
+seu <- readRDS("data_modified/04_annotation/objects/global.rds")
+
+# Fix names and create combined column
+seu$cellannotation <- make.names(seu$cellannotation)
+seu$cellannotation_sex <- paste(seu$cellannotation, seu$Sex, sep="_")
+
+# Calculate pseudobulk as summed counts per Sample and cellannotation
+pb <- Seurat2PB(seu, sample="Sample", cluster="cellannotation_sex")
+
+# Check library sizes
+lib.size.cutoff <- 5e4
+d <- pb$samples %>%
+  mutate(sample_cluster = paste(sample, cluster, sep="_"),
+         remove = lib.size < lib.size.cutoff)
+ggplot(d, aes(x=cluster, y=lib.size, color=remove)) + 
+  geom_point() + 
+  scale_y_log10() + 
+  geom_hline(yintercept=lib.size.cutoff, linetype="dotted") +
+  theme(axis.text.x = element_text(angle=45, hjust=1))
+ggsave(paste0(outdir, "lib_size_cutoff.png"), width=10, height=10/1.62)
+
+# Filter cells and genes
+keep.samples <- pb$samples$lib.size > lib.size.cutoff
+pb <- pb[, keep.samples]
+keep.genes <- filterByExpr(pb, 
+                           group = pb$samples$cluster,
+                           min.count = 10,
+                           min.total.count = 20)
+pb <- pb[keep.genes, ,keep=FALSE]
+
+# Normalize
+pb <- normLibSizes(pb)
+
+# Create a design matrix
+cluster <- as.factor(pb$samples$cluster)
+samples <- pb$samples$sample
+design <- model.matrix(~0+cluster)
+colnames(design) <- gsub("(samples|cluster)", "", colnames(design))
+
+# Estimate dispersion
+pb <- estimateDisp(pb, design, robust = TRUE)
+
+# Fit model
+fit <- glmQLFit(pb, design, robust = TRUE)
+
+# Test major iFib and club cells
+contr <- makeContrasts(
+  major.interstitial.fibroblast_male-major.interstitial.fibroblast_female,
+    club.epithelia.of.urethra_male-club.epithelia.of.urethra_female,
+  levels=fit$design
+)
+
+# Perform statistical tests
+qlf <- list()
+for (i in 1:ncol(contr)) {
+  qlf[[i]] <- glmQLFTest(fit, contrast = contr[,i])
+  names(qlf)[[i]] <- qlf[[i]]$comparison %>%
+    sub("-1\\*", "", .) %>%
+    sub("_f.*$", "", .)
+}
+
+# Save results
+resdir <- paste0(outdir, "DEA/")
+dir.create(resdir, showWarnings=FALSE)
+for (i in seq_along(qlf)) {
+  fn <- names(qlf)[i] %>%
+    gsub("\\.", "_", .) %>%
+    paste0(".csv")
+  topTags(qlf[[i]], n=Inf, sort.by="logFC", p.value=0.05) %>%
+    write.csv(paste0(resdir, fn))
+}
+
+# Save DGEList
+saveRDS(pb, paste0(outdir, "dgelist.rds"))
+
+# Save session info
+capture.output(sessionInfo(), file = paste0(outdir, "sessionInfo.txt"))
-- 
GitLab