Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • StrandLab/scrna_urethra
Show changes
Commits on Source (5)
options(repos = c(CRAN = "https://p3m.dev/cran/__linux__/centos7/latest"))
source("renv/activate.R")
# Mac config files
.DS_Store
._.DS_Store
._*
# R files
# Keep files necessary for renv bootstrapping
......
# Pseudobulk comparison of male vs female urethra
# Author: John T Lafin
# Updated: 2025-01-10
# Load packages
library(dplyr)
library(ggplot2)
library(Seurat)
library(edgeR)
library(future)
library(future.apply)
# Set up parallelization
future_plan <- "multisession" # Can use multicore if not on Rstudio or Windows
n_workers <- 4
plan(future_plan, workers=n_workers)
options(future.globals.maxSize = 8000 * 1024^2)
set.seed(42)
# Create output dir
outdir <- "data_modified/06_pseudobulk/"
dir.create(outdir, showWarnings = FALSE)
# Load integrated data
seu <- readRDS("data_modified/04_annotation/objects/global.rds")
# Fix names and create combined column
seu$cellannotation <- make.names(seu$cellannotation)
seu$cellannotation_sex <- paste(seu$cellannotation, seu$Sex, sep="_")
# Calculate pseudobulk as summed counts per Sample and cellannotation
pb <- Seurat2PB(seu, sample="Sample", cluster="cellannotation_sex")
# Check library sizes
lib.size.cutoff <- 5e4
d <- pb$samples %>%
mutate(sample_cluster = paste(sample, cluster, sep="_"),
remove = lib.size < lib.size.cutoff)
ggplot(d, aes(x=cluster, y=lib.size, color=remove)) +
geom_point() +
scale_y_log10() +
geom_hline(yintercept=lib.size.cutoff, linetype="dotted") +
theme(axis.text.x = element_text(angle=45, hjust=1))
ggsave(paste0(outdir, "lib_size_cutoff.png"), width=10, height=10/1.62)
# Filter cells and genes
keep.samples <- pb$samples$lib.size > lib.size.cutoff
pb <- pb[, keep.samples]
keep.genes <- filterByExpr(pb,
group = pb$samples$cluster,
min.count = 10,
min.total.count = 20)
pb <- pb[keep.genes, ,keep=FALSE]
# Normalize
pb <- normLibSizes(pb)
# Create a design matrix
cluster <- as.factor(pb$samples$cluster)
samples <- pb$samples$sample
design <- model.matrix(~0+cluster)
colnames(design) <- gsub("(samples|cluster)", "", colnames(design))
# Estimate dispersion
pb <- estimateDisp(pb, design, robust = TRUE)
# Fit model
fit <- glmQLFit(pb, design, robust = TRUE)
# Test major iFib and club cells
contr <- makeContrasts(
major.interstitial.fibroblast_male-major.interstitial.fibroblast_female,
club.epithelia.of.urethra_male-club.epithelia.of.urethra_female,
levels=fit$design
)
# Perform statistical tests
qlf <- list()
for (i in 1:ncol(contr)) {
qlf[[i]] <- glmQLFTest(fit, contrast = contr[,i])
names(qlf)[[i]] <- qlf[[i]]$comparison %>%
sub("-1\\*", "", .) %>%
sub("_f.*$", "", .)
}
# Save results
resdir <- paste0(outdir, "DEA/")
dir.create(resdir, showWarnings=FALSE)
for (i in seq_along(qlf)) {
fn <- names(qlf)[i] %>%
gsub("\\.", "_", .) %>%
paste0(".csv")
topTags(qlf[[i]], n=Inf, sort.by="logFC", p.value=0.05) %>%
write.csv(paste0(resdir, fn))
}
# Save DGEList
saveRDS(pb, paste0(outdir, "dgelist.rds"))
# Save session info
capture.output(sessionInfo(), file = paste0(outdir, "sessionInfo.txt"))
#!/usr/bin/env python3
# Run initial analysis of Xenium data
# Author: John T Lafin
# Updated: 2024-12-27
# Import modules
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import spatialdata as sd
from spatialdata_io import xenium
import sopa
# Set parameters
xenium_data='/archive/urology/Strand_lab/shared/xenium/output-XETG00248__0034053__D202__20240425__175357'
zarr_dir='data_modified/spatialdata_zarr/'
sample_name='D202'
outdir='data_modified/xenium_'+sample_name+'/'
os.makedirs(outdir, exist_ok=True)
# Load data and create zarr store
zarr_store=zarr_dir+sample_name+'.zarr'
sdata = xenium(xenium_data, n_jobs=8)
os.makedirs(zarr_dir, exist_ok=True)
sdata.write(zarr_store)
sdata = sd.read_zarr(zarr_store)
# Pull the anndata object and calculate QC metrics
adata = sdata['table']
sc.pp.calculate_qc_metrics(adata, percent_top=(10, 20, 50, 150), inplace=True)
# Save plots
fig, axs = plt.subplots(1, 4, figsize=(15, 4))
axs[0].set_title("Total transcripts per cell")
sns.histplot(
adata.obs["total_counts"],
binwidth=5,
kde=False,
ax=axs[0],
)
axs[1].set_title("Unique transcripts per cell")
sns.histplot(
adata.obs["n_genes_by_counts"],
binwidth=5,
kde=False,
ax=axs[1],
)
axs[2].set_title("Area of segmented cells")
sns.histplot(
adata.obs["cell_area"],
binwidth=5,
kde=False,
ax=axs[2],
)
axs[3].set_title("Nucleus ratio")
sns.histplot(
adata.obs["nucleus_area"] / adata.obs["cell_area"],
kde=False,
ax=axs[3],
)
plt.savefig(fname=outdir+'qc_plots.png')
# Basic filtering
sc.pp.filter_cells(adata, min_counts=10)
sc.pp.filter_genes(adata, min_cells=5)
# Save raw counts
adata.layers['counts'] = adata.X.copy()
# Pre-processing
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
## Leiden clustering at 15 resolutions
res = np.arange(0.1, 1.6, 0.1)
res = np.round(res, 1)
for i in res:
sc.tl.leiden(
adata,
resolution=i,
key_added="leiden_res_" + str(i),
flavor='igraph',
n_iterations=2
)
# Create UMAP plots
grps=[ 'leiden_res_'+str(x) for x in res ]
sc.pl.umap(
adata,
color=grps,
ncols=3,
legend_loc='on data'
)
plt.savefig(fname=outdir+'umap_leiden.png')
# Check gene expression
genes=['EPCAM','KRT5','NKX3-1','PIGR','AKR1C2','DCN','ACTA2','DES','CDH19','PECAM1','PTPRC']
sc.pl.umap(
adata,
color=genes,
ncols=3
)
plt.savefig(fname=outdir+'umap_lineage_markers.png')
# Save modified data
adata.write_h5ad(outdir+sample_name+'_annotated.h5ad')
sopa.io.write_cell_categories(outdir, adata)
os.rename(src=outdir+'analysis.zarr.zip', dst=outdir+sample_name+'_annot_analysis.zarr.zip')
#!/usr/bin/env python3
# Run initial analysis of Xenium data
# Author: John T Lafin
# Updated: 2024-11-26
# Import modules
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import spatialdata as sd
from spatialdata_io import xenium
import sopa
# Set parameters
xenium_data='/archive/urology/Strand_lab/shared/xenium/output-XETG00248__0034066__D216__20240425__175357/output-XETG00248__0034066__D216__20240425__175357'
zarr_dir='data_modified/spatialdata_zarr/'
sample_name='D216'
outdir='data_modified/xenium_D216/'
os.makedirs(outdir, exist_ok=True)
# Load data and create zarr store
zarr_store=zarr_dir+sample_name+'.zarr'
sdata = xenium(xenium_data, n_jobs=8)
os.makedirs(zarr_dir, exist_ok=True)
sdata.write(zarr_store)
sdata = sd.read_zarr(zarr_store)
# Pull the anndata object and calculate QC metrics
adata = sdata['table']
sc.pp.calculate_qc_metrics(adata, percent_top=(10, 20, 50, 150), inplace=True)
# Save plots
fig, axs = plt.subplots(1, 4, figsize=(15, 4))
axs[0].set_title("Total transcripts per cell")
sns.histplot(
adata.obs["total_counts"],
binwidth=5,
kde=False,
ax=axs[0],
)
axs[1].set_title("Unique transcripts per cell")
sns.histplot(
adata.obs["n_genes_by_counts"],
binwidth=5,
kde=False,
ax=axs[1],
)
axs[2].set_title("Area of segmented cells")
sns.histplot(
adata.obs["cell_area"],
binwidth=5,
kde=False,
ax=axs[2],
)
axs[3].set_title("Nucleus ratio")
sns.histplot(
adata.obs["nucleus_area"] / adata.obs["cell_area"],
kde=False,
ax=axs[3],
)
plt.savefig(fname=outdir+'qc_plots.png')
# Basic filtering
sc.pp.filter_cells(adata, min_counts=10)
sc.pp.filter_genes(adata, min_cells=5)
# Save raw counts
adata.layers['counts'] = adata.X.copy()
# Pre-processing
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
## Leiden clustering at 15 resolutions
res = np.arange(0.1, 1.6, 0.1)
res = np.round(res, 1)
for i in res:
sc.tl.leiden(
adata,
resolution=i,
key_added="leiden_res_" + str(i),
flavor='igraph',
n_iterations=2
)
# Create UMAP plots
umap_dir=outdir+'umaps/'
os.makedirs(umap_dir, exist_ok=True)
for i in res:
sc.pl.umap(
adata,
color='leiden_res_'+str(i),
save=umap_dir+'leiden_res_'+str(i)+'.png'
)
# Check gene expression
genes=['EPCAM','KRT5','NKX3-1','PIGR','AKR1C2','DCN','ACTA2','DES','CDH19','PECAM1','PTPRC']
sc.pl.umap(
adata,
color=genes,
save=umap_dir+'lineage_markers.png'
)
# Save modified data
adata.write_h5ad(outdir+sample_name+'_annotated.h5ad')
sopa.io.write_cell_categories(outdir, adata)
os.rename(src=outdir+'analysis.zarr.zip', dst=outdir+sample_name+'_annot_analysis.zarr.zip')
#!/usr/bin/env python3
# Run initial analysis of Xenium data
# Author: John T Lafin
# Updated: 2024-11-26
# Import modules
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import spatialdata as sd
from spatialdata_io import xenium
import sopa
# Set parameters
xenium_data='/archive/urology/Strand_lab/shared/xenium/output-XETG00248__0033934__D330-FLUT__20240718__174131'
zarr_dir='data_modified/spatialdata_zarr/'
sample_name='D330'
outdir='data_modified/xenium_'+sample_name+'/'
os.makedirs(outdir, exist_ok=True)
# Load data and create zarr store
zarr_store=zarr_dir+sample_name+'.zarr'
sdata = xenium(xenium_data, n_jobs=8)
os.makedirs(zarr_dir, exist_ok=True)
sdata.write(zarr_store)
sdata = sd.read_zarr(zarr_store)
# Pull the anndata object and calculate QC metrics
adata = sdata['table']
sc.pp.calculate_qc_metrics(adata, percent_top=(10, 20, 50, 150), inplace=True)
# Save plots
fig, axs = plt.subplots(1, 4, figsize=(15, 4))
axs[0].set_title("Total transcripts per cell")
sns.histplot(
adata.obs["total_counts"],
binwidth=5,
kde=False,
ax=axs[0],
)
axs[1].set_title("Unique transcripts per cell")
sns.histplot(
adata.obs["n_genes_by_counts"],
binwidth=5,
kde=False,
ax=axs[1],
)
axs[2].set_title("Area of segmented cells")
sns.histplot(
adata.obs["cell_area"],
binwidth=5,
kde=False,
ax=axs[2],
)
axs[3].set_title("Nucleus ratio")
sns.histplot(
adata.obs["nucleus_area"] / adata.obs["cell_area"],
kde=False,
ax=axs[3],
)
plt.savefig(fname=outdir+'qc_plots.png')
# Basic filtering
sc.pp.filter_cells(adata, min_counts=10)
sc.pp.filter_genes(adata, min_cells=5)
# Save raw counts
adata.layers['counts'] = adata.X.copy()
# Pre-processing
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
## Leiden clustering at 15 resolutions
res = np.arange(0.1, 1.6, 0.1)
res = np.round(res, 1)
for i in res:
sc.tl.leiden(
adata,
resolution=i,
key_added="leiden_res_" + str(i),
flavor='igraph',
n_iterations=2
)
# Create UMAP plots
grps=[ 'leiden_res_'+str(x) for x in res ]
sc.pl.umap(
adata,
color=grps,
ncols=3,
legend_loc='on data'
)
plt.savefig(fname=outdir+'umap_leiden.png')
# Check gene expression
genes=['EPCAM','KRT5','NKX3-1','PIGR','AKR1C2','DCN','ACTA2','DES','CDH19','PECAM1','PTPRC']
sc.pl.umap(
adata,
color=genes,
ncols=3
)
plt.savefig(fname=outdir+'umap_lineage_markers.png')
# Save modified data
adata.write_h5ad(outdir+sample_name+'_annotated.h5ad')
sopa.io.write_cell_categories(outdir, adata)
os.rename(src=outdir+'analysis.zarr.zip', dst=outdir+sample_name+'_annot_analysis.zarr.zip')
#!/usr/bin/env python3
# Run initial analysis of Xenium data
# Author: John T Lafin
# Updated: 2024-11-26
# Import modules
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import spatialdata as sd
from spatialdata_io import xenium
import sopa
# Set parameters
xenium_data='/archive/urology/Strand_lab/shared/xenium/output-XETG00248__0033827__D93-MULT__20240718__174131'
zarr_dir='data_modified/spatialdata_zarr/'
sample_name='D93'
outdir='data_modified/xenium_'+sample_name+'/'
os.makedirs(outdir, exist_ok=True)
# Load data and create zarr store
zarr_store=zarr_dir+sample_name+'.zarr'
sdata = xenium(xenium_data, n_jobs=8)
os.makedirs(zarr_dir, exist_ok=True)
sdata.write(zarr_store)
sdata = sd.read_zarr(zarr_store)
# Pull the anndata object and calculate QC metrics
adata = sdata['table']
sc.pp.calculate_qc_metrics(adata, percent_top=(10, 20, 50, 150), inplace=True)
# Save plots
fig, axs = plt.subplots(1, 4, figsize=(15, 4))
axs[0].set_title("Total transcripts per cell")
sns.histplot(
adata.obs["total_counts"],
binwidth=5,
kde=False,
ax=axs[0],
)
axs[1].set_title("Unique transcripts per cell")
sns.histplot(
adata.obs["n_genes_by_counts"],
binwidth=5,
kde=False,
ax=axs[1],
)
axs[2].set_title("Area of segmented cells")
sns.histplot(
adata.obs["cell_area"],
binwidth=5,
kde=False,
ax=axs[2],
)
axs[3].set_title("Nucleus ratio")
sns.histplot(
adata.obs["nucleus_area"] / adata.obs["cell_area"],
kde=False,
ax=axs[3],
)
plt.savefig(fname=outdir+'qc_plots.png')
# Basic filtering
sc.pp.filter_cells(adata, min_counts=10)
sc.pp.filter_genes(adata, min_cells=5)
# Save raw counts
adata.layers['counts'] = adata.X.copy()
# Pre-processing
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
## Leiden clustering at 15 resolutions
res = np.arange(0.1, 1.6, 0.1)
res = np.round(res, 1)
for i in res:
sc.tl.leiden(
adata,
resolution=i,
key_added="leiden_res_" + str(i),
flavor='igraph',
n_iterations=2
)
# Create UMAP plots
grps=[ 'leiden_res_'+str(x) for x in res ]
sc.pl.umap(
adata,
color=grps,
ncols=3,
legend_loc='on data'
)
plt.savefig(fname=outdir+'umap_leiden.png')
# Check gene expression
genes=['EPCAM','KRT5','NKX3-1','PIGR','AKR1C2','DCN','ACTA2','DES','CDH19','PECAM1','PTPRC']
sc.pl.umap(
adata,
color=genes,
ncols=3
)
plt.savefig(fname=outdir+'umap_lineage_markers.png')
# Save modified data
adata.write_h5ad(outdir+sample_name+'_annotated.h5ad')
sopa.io.write_cell_categories(outdir, adata)
os.rename(src=outdir+'analysis.zarr.zip', dst=outdir+sample_name+'_annot_analysis.zarr.zip')