Skip to content
Snippets Groups Projects
Commit 97f23cc9 authored by John Lafin's avatar John Lafin
Browse files

Include prelim analysis script for Xe D202

parent d375e704
Branches
No related merge requests found
#!/usr/bin/env python3
# Run initial analysis of Xenium data
# Author: John T Lafin
# Updated: 2024-12-27
# Import modules
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import spatialdata as sd
from spatialdata_io import xenium
import sopa
# Set parameters
xenium_data='/archive/urology/Strand_lab/shared/xenium/output-XETG00248__0034053__D202__20240425__175357'
zarr_dir='data_modified/spatialdata_zarr/'
sample_name='D202'
outdir='data_modified/xenium_'+sample_name+'/'
os.makedirs(outdir, exist_ok=True)
# Load data and create zarr store
zarr_store=zarr_dir+sample_name+'.zarr'
sdata = xenium(xenium_data, n_jobs=8)
os.makedirs(zarr_dir, exist_ok=True)
sdata.write(zarr_store)
sdata = sd.read_zarr(zarr_store)
# Pull the anndata object and calculate QC metrics
adata = sdata['table']
sc.pp.calculate_qc_metrics(adata, percent_top=(10, 20, 50, 150), inplace=True)
# Save plots
fig, axs = plt.subplots(1, 4, figsize=(15, 4))
axs[0].set_title("Total transcripts per cell")
sns.histplot(
adata.obs["total_counts"],
binwidth=5,
kde=False,
ax=axs[0],
)
axs[1].set_title("Unique transcripts per cell")
sns.histplot(
adata.obs["n_genes_by_counts"],
binwidth=5,
kde=False,
ax=axs[1],
)
axs[2].set_title("Area of segmented cells")
sns.histplot(
adata.obs["cell_area"],
binwidth=5,
kde=False,
ax=axs[2],
)
axs[3].set_title("Nucleus ratio")
sns.histplot(
adata.obs["nucleus_area"] / adata.obs["cell_area"],
kde=False,
ax=axs[3],
)
plt.savefig(fname=outdir+'qc_plots.png')
# Basic filtering
sc.pp.filter_cells(adata, min_counts=10)
sc.pp.filter_genes(adata, min_cells=5)
# Save raw counts
adata.layers['counts'] = adata.X.copy()
# Pre-processing
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
## Leiden clustering at 15 resolutions
res = np.arange(0.1, 1.6, 0.1)
res = np.round(res, 1)
for i in res:
sc.tl.leiden(
adata,
resolution=i,
key_added="leiden_res_" + str(i),
flavor='igraph',
n_iterations=2
)
# Create UMAP plots
grps=[ 'leiden_res_'+str(x) for x in res ]
sc.pl.umap(
adata,
color=grps,
ncols=3,
legend_loc='on data'
)
plt.savefig(fname=outdir+'umap_leiden.png')
# Check gene expression
genes=['EPCAM','KRT5','NKX3-1','PIGR','AKR1C2','DCN','ACTA2','DES','CDH19','PECAM1','PTPRC']
sc.pl.umap(
adata,
color=genes,
ncols=3
)
plt.savefig(fname=outdir+'umap_lineage_markers.png')
# Save modified data
adata.write_h5ad(outdir+sample_name+'_annotated.h5ad')
sopa.io.write_cell_categories(outdir, adata)
os.rename(src=outdir+'analysis.zarr.zip', dst=outdir+sample_name+'_annot_analysis.zarr.zip')
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment