Skip to content
Snippets Groups Projects
Commit 8404afb1 authored by John Lafin's avatar John Lafin
Browse files

Add initial xenium analysis scripts

parent 903db20e
Branches
No related merge requests found
#!/usr/bin/env python3
# Run initial analysis of Xenium data
# Author: John T Lafin
# Updated: 2024-11-26
# Import modules
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import spatialdata as sd
from spatialdata_io import xenium
import sopa
# Set parameters
xenium_data='/archive/urology/Strand_lab/shared/xenium/output-XETG00248__0034066__D216__20240425__175357/output-XETG00248__0034066__D216__20240425__175357'
zarr_dir='data_modified/spatialdata_zarr/'
sample_name='D216'
outdir='data_modified/xenium_D216/'
os.makedirs(outdir, exist_ok=True)
# Load data and create zarr store
zarr_store=zarr_dir+sample_name+'.zarr'
sdata = xenium(xenium_data, n_jobs=8)
os.makedirs(zarr_dir, exist_ok=True)
sdata.write(zarr_store)
sdata = sd.read_zarr(zarr_store)
# Pull the anndata object and calculate QC metrics
adata = sdata['table']
sc.pp.calculate_qc_metrics(adata, percent_top=(10, 20, 50, 150), inplace=True)
# Save plots
fig, axs = plt.subplots(1, 4, figsize=(15, 4))
axs[0].set_title("Total transcripts per cell")
sns.histplot(
adata.obs["total_counts"],
binwidth=5,
kde=False,
ax=axs[0],
)
axs[1].set_title("Unique transcripts per cell")
sns.histplot(
adata.obs["n_genes_by_counts"],
binwidth=5,
kde=False,
ax=axs[1],
)
axs[2].set_title("Area of segmented cells")
sns.histplot(
adata.obs["cell_area"],
binwidth=5,
kde=False,
ax=axs[2],
)
axs[3].set_title("Nucleus ratio")
sns.histplot(
adata.obs["nucleus_area"] / adata.obs["cell_area"],
kde=False,
ax=axs[3],
)
plt.savefig(fname=outdir+'qc_plots.png')
# Basic filtering
sc.pp.filter_cells(adata, min_counts=10)
sc.pp.filter_genes(adata, min_cells=5)
# Save raw counts
adata.layers['counts'] = adata.X.copy()
# Pre-processing
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
## Leiden clustering at 15 resolutions
res = np.arange(0.1, 1.6, 0.1)
res = np.round(res, 1)
for i in res:
sc.tl.leiden(
adata,
resolution=i,
key_added="leiden_res_" + str(i),
flavor='igraph',
n_iterations=2
)
# Create UMAP plots
umap_dir=outdir+'umaps/'
os.makedirs(umap_dir, exist_ok=True)
for i in res:
sc.pl.umap(
adata,
color='leiden_res_'+str(i),
save=umap_dir+'leiden_res_'+str(i)+'.png'
)
# Check gene expression
genes=['EPCAM','KRT5','NKX3-1','PIGR','AKR1C2','DCN','ACTA2','DES','CDH19','PECAM1','PTPRC']
sc.pl.umap(
adata,
color=genes,
save=umap_dir+'lineage_markers.png'
)
# Save modified data
adata.write_h5ad(outdir+sample_name+'_annotated.h5ad')
sopa.io.write_cell_categories(outdir, adata)
os.rename(src=outdir+'analysis.zarr.zip', dst=outdir+sample_name+'_annot_analysis.zarr.zip')
#!/usr/bin/env python3
# Run initial analysis of Xenium data
# Author: John T Lafin
# Updated: 2024-11-26
# Import modules
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import spatialdata as sd
from spatialdata_io import xenium
import sopa
# Set parameters
xenium_data='/archive/urology/Strand_lab/shared/xenium/output-XETG00248__0033934__D330-FLUT__20240718__174131'
zarr_dir='data_modified/spatialdata_zarr/'
sample_name='D330'
outdir='data_modified/xenium_'+sample_name+'/'
os.makedirs(outdir, exist_ok=True)
# Load data and create zarr store
zarr_store=zarr_dir+sample_name+'.zarr'
sdata = xenium(xenium_data, n_jobs=8)
os.makedirs(zarr_dir, exist_ok=True)
sdata.write(zarr_store)
sdata = sd.read_zarr(zarr_store)
# Pull the anndata object and calculate QC metrics
adata = sdata['table']
sc.pp.calculate_qc_metrics(adata, percent_top=(10, 20, 50, 150), inplace=True)
# Save plots
fig, axs = plt.subplots(1, 4, figsize=(15, 4))
axs[0].set_title("Total transcripts per cell")
sns.histplot(
adata.obs["total_counts"],
binwidth=5,
kde=False,
ax=axs[0],
)
axs[1].set_title("Unique transcripts per cell")
sns.histplot(
adata.obs["n_genes_by_counts"],
binwidth=5,
kde=False,
ax=axs[1],
)
axs[2].set_title("Area of segmented cells")
sns.histplot(
adata.obs["cell_area"],
binwidth=5,
kde=False,
ax=axs[2],
)
axs[3].set_title("Nucleus ratio")
sns.histplot(
adata.obs["nucleus_area"] / adata.obs["cell_area"],
kde=False,
ax=axs[3],
)
plt.savefig(fname=outdir+'qc_plots.png')
# Basic filtering
sc.pp.filter_cells(adata, min_counts=10)
sc.pp.filter_genes(adata, min_cells=5)
# Save raw counts
adata.layers['counts'] = adata.X.copy()
# Pre-processing
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
## Leiden clustering at 15 resolutions
res = np.arange(0.1, 1.6, 0.1)
res = np.round(res, 1)
for i in res:
sc.tl.leiden(
adata,
resolution=i,
key_added="leiden_res_" + str(i),
flavor='igraph',
n_iterations=2
)
# Create UMAP plots
grps=[ 'leiden_res_'+str(x) for x in res ]
sc.pl.umap(
adata,
color=grps,
ncols=3,
legend_loc='on data'
)
plt.savefig(fname=outdir+'umap_leiden.png')
# Check gene expression
genes=['EPCAM','KRT5','NKX3-1','PIGR','AKR1C2','DCN','ACTA2','DES','CDH19','PECAM1','PTPRC']
sc.pl.umap(
adata,
color=genes,
ncols=3
)
plt.savefig(fname=outdir+'umap_lineage_markers.png')
# Save modified data
adata.write_h5ad(outdir+sample_name+'_annotated.h5ad')
sopa.io.write_cell_categories(outdir, adata)
os.rename(src=outdir+'analysis.zarr.zip', dst=outdir+sample_name+'_annot_analysis.zarr.zip')
#!/usr/bin/env python3
# Run initial analysis of Xenium data
# Author: John T Lafin
# Updated: 2024-11-26
# Import modules
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import spatialdata as sd
from spatialdata_io import xenium
import sopa
# Set parameters
xenium_data='/archive/urology/Strand_lab/shared/xenium/output-XETG00248__0033827__D93-MULT__20240718__174131'
zarr_dir='data_modified/spatialdata_zarr/'
sample_name='D93'
outdir='data_modified/xenium_'+sample_name+'/'
os.makedirs(outdir, exist_ok=True)
# Load data and create zarr store
zarr_store=zarr_dir+sample_name+'.zarr'
sdata = xenium(xenium_data, n_jobs=8)
os.makedirs(zarr_dir, exist_ok=True)
sdata.write(zarr_store)
sdata = sd.read_zarr(zarr_store)
# Pull the anndata object and calculate QC metrics
adata = sdata['table']
sc.pp.calculate_qc_metrics(adata, percent_top=(10, 20, 50, 150), inplace=True)
# Save plots
fig, axs = plt.subplots(1, 4, figsize=(15, 4))
axs[0].set_title("Total transcripts per cell")
sns.histplot(
adata.obs["total_counts"],
binwidth=5,
kde=False,
ax=axs[0],
)
axs[1].set_title("Unique transcripts per cell")
sns.histplot(
adata.obs["n_genes_by_counts"],
binwidth=5,
kde=False,
ax=axs[1],
)
axs[2].set_title("Area of segmented cells")
sns.histplot(
adata.obs["cell_area"],
binwidth=5,
kde=False,
ax=axs[2],
)
axs[3].set_title("Nucleus ratio")
sns.histplot(
adata.obs["nucleus_area"] / adata.obs["cell_area"],
kde=False,
ax=axs[3],
)
plt.savefig(fname=outdir+'qc_plots.png')
# Basic filtering
sc.pp.filter_cells(adata, min_counts=10)
sc.pp.filter_genes(adata, min_cells=5)
# Save raw counts
adata.layers['counts'] = adata.X.copy()
# Pre-processing
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
## Leiden clustering at 15 resolutions
res = np.arange(0.1, 1.6, 0.1)
res = np.round(res, 1)
for i in res:
sc.tl.leiden(
adata,
resolution=i,
key_added="leiden_res_" + str(i),
flavor='igraph',
n_iterations=2
)
# Create UMAP plots
grps=[ 'leiden_res_'+str(x) for x in res ]
sc.pl.umap(
adata,
color=grps,
ncols=3,
legend_loc='on data'
)
plt.savefig(fname=outdir+'umap_leiden.png')
# Check gene expression
genes=['EPCAM','KRT5','NKX3-1','PIGR','AKR1C2','DCN','ACTA2','DES','CDH19','PECAM1','PTPRC']
sc.pl.umap(
adata,
color=genes,
ncols=3
)
plt.savefig(fname=outdir+'umap_lineage_markers.png')
# Save modified data
adata.write_h5ad(outdir+sample_name+'_annotated.h5ad')
sopa.io.write_cell_categories(outdir, adata)
os.rename(src=outdir+'analysis.zarr.zip', dst=outdir+sample_name+'_annot_analysis.zarr.zip')
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment