From 61f7a461c3dcc3f68dd2ddcf4816fad803b5be99 Mon Sep 17 00:00:00 2001 From: John Lafin <john.lafin@utsouthwestern.edu> Date: Mon, 13 Jan 2025 15:55:57 -0600 Subject: [PATCH] Add pseudobulk DEA --- scripts/06_pseudobulk.R | 101 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 scripts/06_pseudobulk.R diff --git a/scripts/06_pseudobulk.R b/scripts/06_pseudobulk.R new file mode 100644 index 0000000..357a0ad --- /dev/null +++ b/scripts/06_pseudobulk.R @@ -0,0 +1,101 @@ +# Pseudobulk comparison of male vs female urethra +# Author: John T Lafin +# Updated: 2025-01-10 + +# Load packages +library(dplyr) +library(ggplot2) +library(Seurat) +library(edgeR) +library(future) +library(future.apply) + +# Set up parallelization +future_plan <- "multisession" # Can use multicore if not on Rstudio or Windows +n_workers <- 4 +plan(future_plan, workers=n_workers) +options(future.globals.maxSize = 8000 * 1024^2) +set.seed(42) + +# Create output dir +outdir <- "data_modified/06_pseudobulk/" +dir.create(outdir, showWarnings = FALSE) + +# Load integrated data +seu <- readRDS("data_modified/04_annotation/objects/global.rds") + +# Fix names and create combined column +seu$cellannotation <- make.names(seu$cellannotation) +seu$cellannotation_sex <- paste(seu$cellannotation, seu$Sex, sep="_") + +# Calculate pseudobulk as summed counts per Sample and cellannotation +pb <- Seurat2PB(seu, sample="Sample", cluster="cellannotation_sex") + +# Check library sizes +lib.size.cutoff <- 5e4 +d <- pb$samples %>% + mutate(sample_cluster = paste(sample, cluster, sep="_"), + remove = lib.size < lib.size.cutoff) +ggplot(d, aes(x=cluster, y=lib.size, color=remove)) + + geom_point() + + scale_y_log10() + + geom_hline(yintercept=lib.size.cutoff, linetype="dotted") + + theme(axis.text.x = element_text(angle=45, hjust=1)) +ggsave(paste0(outdir, "lib_size_cutoff.png"), width=10, height=10/1.62) + +# Filter cells and genes +keep.samples <- pb$samples$lib.size > lib.size.cutoff +pb <- pb[, keep.samples] +keep.genes <- filterByExpr(pb, + group = pb$samples$cluster, + min.count = 10, + min.total.count = 20) +pb <- pb[keep.genes, ,keep=FALSE] + +# Normalize +pb <- normLibSizes(pb) + +# Create a design matrix +cluster <- as.factor(pb$samples$cluster) +samples <- pb$samples$sample +design <- model.matrix(~0+cluster) +colnames(design) <- gsub("(samples|cluster)", "", colnames(design)) + +# Estimate dispersion +pb <- estimateDisp(pb, design, robust = TRUE) + +# Fit model +fit <- glmQLFit(pb, design, robust = TRUE) + +# Test major iFib and club cells +contr <- makeContrasts( + major.interstitial.fibroblast_male-major.interstitial.fibroblast_female, + club.epithelia.of.urethra_male-club.epithelia.of.urethra_female, + levels=fit$design +) + +# Perform statistical tests +qlf <- list() +for (i in 1:ncol(contr)) { + qlf[[i]] <- glmQLFTest(fit, contrast = contr[,i]) + names(qlf)[[i]] <- qlf[[i]]$comparison %>% + sub("-1\\*", "", .) %>% + sub("_f.*$", "", .) +} + +# Save results +resdir <- paste0(outdir, "DEA/") +dir.create(resdir, showWarnings=FALSE) +for (i in seq_along(qlf)) { + fn <- names(qlf)[i] %>% + gsub("\\.", "_", .) %>% + paste0(".csv") + topTags(qlf[[i]], n=Inf, sort.by="logFC", p.value=0.05) %>% + write.csv(paste0(resdir, fn)) +} + +# Save DGEList +saveRDS(pb, paste0(outdir, "dgelist.rds")) + +# Save session info +capture.output(sessionInfo(), file = paste0(outdir, "sessionInfo.txt")) -- GitLab