diff --git a/scripts/02_xenium_annotation.py b/scripts/02_xenium_annotation.py
new file mode 100644
index 0000000000000000000000000000000000000000..9cb290529372e3982213f0596987a34f772942f8
--- /dev/null
+++ b/scripts/02_xenium_annotation.py
@@ -0,0 +1,630 @@
+# Annotate Xenium samples
+# Author: John T Lafin
+# Updated: 2025-03-18
+
+# Load modules
+import os
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import scanpy as sc
+import spatialdata as sd
+from sopa.io import write_cell_categories
+
+# Create output directory
+outdir = 'data_modified/02_xenium_annotation/'
+os.makedirs(outdir, exist_ok=True)
+
+# Set plotting params
+sc.set_figure_params(dpi=300,dpi_save=300)
+
+# Prepare pre-processing function
+def preprocess(adata, res):
+    adata.X = adata.layers['counts'].copy()
+    sc.pp.normalize_total(adata)
+    sc.pp.log1p(adata)
+    sc.pp.pca(adata)
+    sc.pp.neighbors(adata)
+    sc.tl.umap(adata)
+
+    for i in res:
+        sc.tl.leiden(
+            adata,
+            resolution=i,
+            key_added='leiden_res_' + str(i),
+            flavor='igraph',
+            n_iterations=2
+        )
+    return adata
+
+# Create list of metrics to save to XE analysis.zarr.zip
+res = np.arange(0.1, 1.6, 0.1)
+res = np.round(res, 1)
+grps=[ 'leiden_res_' + str(i) for i in res ]
+grps.extend(['lineage', 'cellannotation', 'cell_id'])
+
+# Define neuroendocrine score cutoffs
+ne_genes=['CHGA', 'ASCL1', 'CALCA', 'GRP', 'SCG2', 'TPH1']
+ne_cutoffs={'D93': 0.3,
+            'D202': 0.5,
+            'D216': 0.3,
+            'D330': 0.25}
+
+# Annotate D93
+donor = 'D93'
+adata = sc.read_h5ad('data_modified/xenium_'+donor+'/'+donor+'_annotated.h5ad')
+datadir = outdir+donor+'/'
+os.makedirs(datadir, exist_ok=True)
+
+## Annotate lineages
+labels = dict()
+for i in range(len(adata.obs['leiden_res_0.2'].unique())):
+    match i:
+        case 0|2:
+            labels[str(i)] = 'Epithelia'
+        case 4:
+            labels[str(i)] = 'Endothelia'
+        case 1|6:
+            labels[str(i)] = 'Leukocyte'
+        case _:
+            labels[str(i)] = 'Stroma'
+adata.obs['lineage'] = adata.obs['leiden_res_0.2'].map(labels)
+
+## Split lineages
+lins = ['Epithelia', 'Stroma', 'Leukocyte', 'Endothelia']
+lin_list = dict(zip(lins, [ adata[adata.obs['lineage'] == x ].copy() for x in lins]))
+
+## Preprocess lineages
+for k in lin_list.keys():
+    lin_list[k] = preprocess(lin_list[k], res)
+    
+## Annotate epithelia
+labels = dict()
+for i in range(len(lin_list['Epithelia'].obs['leiden_res_0.5'].unique())):
+    match i:
+        case 0:
+            labels[str(i)] = 'basal epithelia of prostate'
+        case 1|2|4:
+            labels[str(i)] = 'transitional intermediate epithelia of urethra'
+        case 3|6:
+            labels[str(i)] = 'transitional luminal epithelia of urethra'
+        case 5:
+            labels[str(i)] = 'luminal epithelia of prostate'
+lin_list['Epithelia'].obs['cellannotation'] = lin_list['Epithelia'].obs['leiden_res_0.5'].map(labels)
+
+## Annotate stroma
+labels = dict()
+for i in range(len(lin_list['Stroma'].obs['leiden_res_1.0'].unique())):
+    match i:
+        case 0|1|3|4|11:
+            labels[str(i)] = 'fibroblast'
+        case 2|10:
+            labels[str(i)] = 'vascular smooth muscle cell'
+        case 5|6|7:
+            labels[str(i)] = 'smooth muscle cell'
+        case 8:
+            labels[str(i)] = 'satellie glial cell'
+        case 9:
+            labels[str(i)] = 'pericyte'
+lin_list['Stroma'].obs['cellannotation'] = lin_list['Stroma'].obs['leiden_res_1.0'].map(labels)
+
+## Annotate leukocytes
+labels = dict()
+for i in range(len(lin_list['Leukocyte'].obs['leiden_res_0.5'].unique())):
+    match i:
+        case 0:
+            labels[str(i)] = 'T cell'
+        case 1|2|3|4:
+            labels[str(i)] = 'macrophage'
+        case 5:
+            labels[str(i)] = 'mast cell'
+        case 6:
+            labels[str(i)] = 'B cell'
+        case 7:
+            labels[str(i)] = 'plasma cell'
+lin_list['Leukocyte'].obs['cellannotation'] = lin_list['Leukocyte'].obs['leiden_res_0.5'].map(labels)
+
+## Annotate endothelia
+labels = dict()
+for i in range(len(lin_list['Endothelia'].obs['leiden_res_0.7'].unique())):
+    match i:
+        case 6:
+            labels[str(i)] = 'lymphatic endothelial cell'
+        case _:
+            labels[str(i)] = 'endothelial cell'
+lin_list['Endothelia'].obs['cellannotation'] = lin_list['Endothelia'].obs['leiden_res_0.7'].map(labels)
+
+## Cast labels to global object
+labels = pd.concat([ x.obs[['cell_id', 'cellannotation']] for x in list(lin_list.values()) ])
+adata.obs = adata.obs.merge(labels, how='left', on='cell_id')
+
+## Assign neuroendocrine cells
+sc.tl.score_genes(
+    adata,
+    gene_list=ne_genes,
+    score_name='NE_score',
+)
+
+adata.obs['cellannotation'] = np.where(
+    (adata.obs['lineage'] == 'Epithelia') & (adata.obs['NE_score'] > ne_cutoffs[donor]), 'neuroendocrine cell', adata.obs['cellannotation']
+    )
+
+## Save data
+sc.pl.umap(adata, color='lineage')
+plt.savefig(fname=datadir+donor+'_umap_lineage.png', bbox_inches='tight')
+
+sc.pl.umap(adata, color='cellannotation')
+plt.savefig(fname=datadir+donor+'_umap_cellannotation.png', bbox_inches='tight')
+
+adata.write_h5ad(datadir+donor+'_annot.h5ad')
+for k,v in lin_list.items():
+    v.write_h5ad(datadir+'/'+k+'.h5ad')
+
+### Load in full (not filtered) dataset and merge labels
+sdata = sd.read_zarr('data_modified/spatialdata_zarr/'+donor+'.zarr')
+sdata['table'].obs = pd.merge(
+    left=sdata['table'].obs,
+    right=adata.obs[grps],
+    how='left',
+    on='cell_id'
+)
+
+### Change NaNs to 'missing'
+for g in grps:
+    if g == 'cell_id':
+        continue
+    if 'missing' not in sdata['table'].obs[g].cat.categories:
+        sdata['table'].obs[g] = sdata['table'].obs[g].cat.add_categories(['missing'])
+    sdata['table'].obs[g] = sdata['table'].obs[g].fillna('missing')
+
+### Save XE analysis.zarr.zip file
+write_cell_categories(datadir+donor+'_analysis.zarr.zip', sdata['table'], is_dir=False)
+
+
+
+
+
+# Annotate D202
+donor = 'D202'
+adata = sc.read_h5ad('data_modified/xenium_'+donor+'/'+donor+'_annotated.h5ad')
+datadir = outdir+donor+'/'
+os.makedirs(datadir, exist_ok=True)
+
+## Annotate lineages
+labels = dict()
+for i in range(len(adata.obs['leiden_res_0.4'].unique())):
+    match i:
+        case 0|1|2:
+            labels[str(i)] = 'Epithelia'
+        case 5:
+            labels[str(i)] = 'Endothelia'
+        case 3|6:
+            labels[str(i)] = 'Leukocyte'
+        case 4|7|8|9:
+            labels[str(i)] = 'Stroma'
+adata.obs['lineage'] = adata.obs['leiden_res_0.4'].map(labels)
+
+## Split lineages
+lin_list = dict(zip(lins, [ adata[adata.obs['lineage'] == x ].copy() for x in lins]))
+
+## Preprocess lineages
+for k in lin_list.keys():
+    lin_list[k] = preprocess(lin_list[k], res)
+
+## Label epithelia
+labels = dict()
+for i in range(len(lin_list['Epithelia'].obs['leiden_res_0.8'].unique())):
+    match i:
+        case 0|1|4:
+            labels[str(i)] = 'luminal epithelia of prostate'
+        case 2|8:
+            labels[str(i)] = 'transitional luminal epithelia of urethra'
+        case 5|6|7:
+            labels[str(i)] = 'basal epithelia of prostate'
+        case 3:
+            labels[str(i)] = 'missing'
+lin_list['Epithelia'].obs['cellannotation'] = lin_list['Epithelia'].obs['leiden_res_0.8'].map(labels)
+
+## Label endothelia
+labels = dict()
+for i in range(len(lin_list['Endothelia'].obs['leiden_res_0.6'].unique())):
+    match i:
+        case 0:
+            labels[str(i)] = 'pericyte'
+        case 1:
+            labels[str(i)] = 'vascular smooth muscle cell'
+        case 2|3|4|5|6:
+            labels[str(i)] = 'endothelial cell'
+lin_list['Endothelia'].obs['cellannotation'] = lin_list['Endothelia'].obs['leiden_res_0.6'].map(labels)
+
+## Label leukocytes
+labels = dict()
+for i in range(len(lin_list['Leukocyte'].obs['leiden_res_1.5'].unique())):
+    match i:
+        case 12|13|14|15:
+            labels[str(i)] = 'T cell'
+        case 0|1|2|3|4|5|6|7|8|9|10|11:
+            labels[str(i)] = 'macrophage'
+        case 16:
+            labels[str(i)] = 'mast cell'
+        case 17:
+            labels[str(i)] = 'B cell'
+        case 19:
+            labels[str(i)] = 'plasma cell'
+        case 18:
+            labels[str(i)] = 'lymphatic endothelial cell'
+lin_list['Leukocyte'].obs['cellannotation'] = lin_list['Leukocyte'].obs['leiden_res_1.5'].map(labels)
+
+## Label stroma
+labels = dict()
+for i in range(len(lin_list['Stroma'].obs['leiden_res_0.6'].unique())):
+    match i:
+        case 0|1|2|3:
+            labels[str(i)] = 'vascular smooth muscle cell'
+        case 4|5|7:
+            labels[str(i)] = 'fibroblast'
+        case 6:
+            labels[str(i)] = 'smooth muscle cell'
+        case 8:
+            labels[str(i)] = 'satellite glial cell'
+lin_list['Stroma'].obs['cellannotation'] = lin_list['Stroma'].obs['leiden_res_0.6'].map(labels)
+
+## Cast labels to global object
+labels = pd.concat([ x.obs[['cell_id', 'cellannotation']] for x in list(lin_list.values()) ])
+adata.obs = adata.obs.merge(labels, how='left', on='cell_id')
+
+## Fix mislabeled lineages
+adata.obs['lineage'] = adata.obs['lineage'].case_when(
+    [
+        (adata.obs['cellannotation'] == 'lymphatic endothelial cell', 'Endothelia'),
+        (adata.obs['cellannotation'] == 'vascular smooth muscle cell', 'Stroma'),
+        (adata.obs['cellannotation'] == 'pericyte', 'Stroma')
+    ]
+)
+
+## Fix mislabeled epithelial cell types
+sc.tl.leiden(
+    adata,
+    resolution=2.3,
+    key_added='leiden_res_2.3',
+    flavor='igraph',
+    n_iterations=2
+)
+adata.obs['cellannotation'] = pd.Categorical(adata.obs['cellannotation'].case_when([
+    (adata.obs['leiden_res_2.3'].isin(['0','1','2','3','4']), 'luminal epithelia of prostate'),
+    (adata.obs['leiden_res_2.3'] == '8', 'basal epithelia of prostate'),
+    (adata.obs['leiden_res_2.3'].isin(['5','18']), 'transitional luminal epithelia of urethra'),
+    (adata.obs['leiden_res_2.3'] == '10', 'transitional intermediate epithelia of urethra')
+]))
+
+## Assign neuroendocrine cells
+sc.tl.score_genes(
+    adata,
+    gene_list=ne_genes,
+    score_name='NE_score',
+)
+
+adata.obs['cellannotation'] = np.where(
+    (adata.obs['lineage'] == 'Epithelia') & (adata.obs['NE_score'] > ne_cutoffs[donor]), 'neuroendocrine cell', adata.obs['cellannotation']
+    )
+
+## Save data
+sc.pl.umap(adata, color='lineage')
+plt.savefig(fname=datadir+donor+'_umap_lineage.png', bbox_inches='tight')
+
+sc.pl.umap(adata, color='cellannotation')
+plt.savefig(fname=datadir+donor+'_umap_cellannotation.png', bbox_inches='tight')
+
+adata.write_h5ad(datadir+donor+'_annot.h5ad')
+for k,v in lin_list.items():
+    v.write_h5ad(datadir+'/'+k+'.h5ad')
+
+### Load in full (not filtered) dataset and merge labels
+sdata = sd.read_zarr('data_modified/spatialdata_zarr/'+donor+'.zarr')
+sdata['table'].obs = pd.merge(
+    left=sdata['table'].obs,
+    right=adata.obs[grps],
+    how='left',
+    on='cell_id'
+)
+
+### Change NaNs to 'missing'
+for g in grps:
+    if g == 'cell_id':
+        continue
+    if 'missing' not in sdata['table'].obs[g].cat.categories:
+        sdata['table'].obs[g] = sdata['table'].obs[g].cat.add_categories(['missing'])
+    sdata['table'].obs[g] = sdata['table'].obs[g].fillna('missing')
+
+### Save XE analysis.zarr.zip file
+write_cell_categories(datadir+donor+'_analysis.zarr.zip', sdata['table'], is_dir=False)
+
+
+
+
+
+# Annotate D216
+donor = 'D216'
+adata = sc.read_h5ad('data_modified/xenium_'+donor+'/'+donor+'_annotated.h5ad')
+datadir = outdir+donor+'/'
+os.makedirs(datadir, exist_ok=True)
+
+## Annotate lineages
+labels = dict()
+for i in range(len(adata.obs['leiden_res_1.4'].unique())):
+    match i:
+        case 0|2|6|7|18|19:
+            labels[str(i)] = 'Epithelia'
+        case 4|17|21:
+            labels[str(i)] = 'Endothelia'
+        case 1|3|8|15|19:
+            labels[str(i)] = 'Leukocyte'
+        case _:
+            labels[str(i)] = 'Stroma'
+adata.obs['lineage'] = adata.obs['leiden_res_1.4'].map(labels)
+
+## Split lineages
+lin_list = dict(zip(lins, [ adata[adata.obs['lineage'] == x ].copy() for x in lins]))
+
+## Preprocess lineages
+for k in lin_list.keys():
+    lin_list[k] = preprocess(lin_list[k], res)
+
+## Annotate epithelia
+labels = dict()
+for i in range(len(lin_list['Epithelia'].obs['leiden_res_0.9'].unique())):
+    match i:
+        case 0|1|2|3|12:
+            labels[str(i)] = 'squamous luminal epithelia of urethra'
+        case 7|8|9|10|11:
+            labels[str(i)] = 'squamous basal epithelia of urethra'
+        case 4|5:
+            labels[str(i)] = 'transitional luminal epithelia of urethra'
+        case 6:
+            labels[str(1)] = 'transitional intermediate epithelia of urethra'
+lin_list['Epithelia'].obs['cellannotation'] = lin_list['Epithelia'].obs['leiden_res_0.9'].map(labels)
+
+## Annotate stroma
+labels = dict()
+for i in range(len(lin_list['Stroma'].obs['leiden_res_0.8'].unique())):
+    match i:
+        case 0|1|2|3|5:
+            labels[str(i)] = 'fibroblast'
+        case 4|7:
+            labels[str(i)] = 'vascular smooth muscle cell'
+        case 6:
+            labels[str(i)] = 'pericyte'
+        case 8:
+            labels[str(i)] = 'smooth muscle cell'
+        case 9:
+            labels[str(i)] = 'satellite glial cell'
+lin_list['Stroma'].obs['cellannotation'] = lin_list['Stroma'].obs['leiden_res_0.8'].map(labels)
+
+## Annotate leukocytes
+labels = dict()
+for i in range(len(lin_list['Leukocyte'].obs['leiden_res_0.7'].unique())):
+    match i:
+        case 0:
+            labels[str(i)] = 'mast cell'
+        case 2|3:
+            labels[str(i)] = 'T cell'
+        case 4|5|6:
+            labels[str(i)] = 'macrophage'
+        case 7:
+            labels[str(i)] = 'plasma cell'
+        case 8:
+            labels[str(i)] = 'dendritic cell'
+        case 9:
+            labels[str(i)] = 'B cell'
+        case 1:
+            labels[str(i)] = 'missing'
+lin_list['Leukocyte'].obs['cellannotation'] = lin_list['Leukocyte'].obs['leiden_res_0.7'].map(labels)
+
+## Annotate endothelia
+labels = dict()
+for i in range(len(lin_list['Endothelia'].obs['leiden_res_0.4'].unique())):
+    match i:
+        case 3:
+            labels[str(i)] = 'lymphatic endothelial cell'
+        case 0|1|2:
+            labels[str(i)] = 'endothelial cell'
+lin_list['Endothelia'].obs['cellannotation'] = lin_list['Endothelia'].obs['leiden_res_0.4'].map(labels)
+
+## Cast labels to global object
+labels = pd.concat([ x.obs[['cell_id', 'cellannotation']] for x in list(lin_list.values()) ])
+adata.obs = adata.obs.merge(labels, how='left', on='cell_id')
+
+## Correct labels
+adata.obs['cellannotation'] = pd.Categorical(
+    adata.obs['cellannotation'].case_when([
+        (adata.obs['leiden_res_1.5'] == '0', 'squamous luminal epithelia of urethra'),
+        (adata.obs['leiden_res_1.5'] == '1', 'squamous intermediate epithelia of urethra'),
+        (adata.obs['leiden_res_1.5'] == '21', 'squamous basal epithelia of urethra'),
+        (adata.obs['leiden_res_1.5'] == '10', 'transitional luminal epithelia of urethra'),
+        (adata.obs['leiden_res_1.5'] == '11', 'transitional intermediate epithelia of urethra')
+    ])
+)
+
+## Assign neuroendocrine cells
+sc.tl.score_genes(
+    adata,
+    gene_list=ne_genes,
+    score_name='NE_score',
+)
+
+adata.obs['cellannotation'] = np.where(
+    (adata.obs['lineage'] == 'Epithelia') & (adata.obs['NE_score'] > ne_cutoffs[donor]), 'neuroendocrine cell', adata.obs['cellannotation']
+    )
+
+## Save data
+sc.pl.umap(adata, color='lineage')
+plt.savefig(fname=datadir+donor+'_umap_lineage.png', bbox_inches='tight')
+
+sc.pl.umap(adata, color='cellannotation')
+plt.savefig(fname=datadir+donor+'_umap_cellannotation.png', bbox_inches='tight')
+
+adata.write_h5ad(datadir+donor+'_annot.h5ad')
+for k,v in lin_list.items():
+    v.write_h5ad(datadir+'/'+k+'.h5ad')
+
+### Load in full (not filtered) dataset and merge labels
+sdata = sd.read_zarr('data_modified/spatialdata_zarr/'+donor+'.zarr')
+sdata['table'].obs = pd.merge(
+    left=sdata['table'].obs,
+    right=adata.obs[grps],
+    how='left',
+    on='cell_id'
+)
+
+### Change NaNs to 'missing'
+for g in grps:
+    if g == 'cell_id':
+        continue
+    if 'missing' not in sdata['table'].obs[g].cat.categories:
+        sdata['table'].obs[g] = sdata['table'].obs[g].cat.add_categories(['missing'])
+    sdata['table'].obs[g] = sdata['table'].obs[g].fillna('missing')
+
+### Save XE analysis.zarr.zip file
+write_cell_categories(datadir+donor+'_analysis.zarr.zip', sdata['table'], is_dir=False)
+
+
+
+
+# Annotate D330
+donor = 'D330'
+adata = sc.read_h5ad('data_modified/xenium_'+donor+'/'+donor+'_annotated.h5ad')
+datadir = outdir+donor+'/'
+os.makedirs(datadir, exist_ok=True)
+
+## Annotate lineages
+labels = dict()
+for i in range(len(adata.obs['leiden_res_0.9'].unique())):
+    match i:
+        case 8|13|14|15|16:
+            labels[str(i)] = 'Epithelia'
+        case 12|17:
+            labels[str(i)] = 'Endothelia'
+        case 7|9|11|18:
+            labels[str(i)] = 'Leukocyte'
+        case _:
+            labels[str(i)] = 'Stroma'
+adata.obs['lineage'] = adata.obs['leiden_res_0.9'].map(labels)
+
+## Split lineages
+lin_list = dict(zip(lins, [ adata[adata.obs['lineage'] == x ].copy() for x in lins]))
+
+## Preprocess lineages
+for k in lin_list.keys():
+    lin_list[k] = preprocess(lin_list[k], res)
+
+
+## Annotate epithelia
+labels = dict()
+for i in range(len(lin_list['Epithelia'].obs['leiden_res_1.2'].unique())):
+    match i:
+        case 11:
+            labels[str(i)] = 'squamous basal epithelia'
+        case 12|14:
+            labels[str(i)] = 'squamous luminal epithelia'
+        case 6|8|9:
+            labels[str(i)] = 'transitional intermediate epithelia of urethra'
+        case 0|1|2|3|4|5|7:
+            labels[str(i)] = 'transitional luminal epithelia of urethra'
+        case 10|13:
+            labels[str(i)] = 'missing'
+lin_list['Epithelia'].obs['cellannotation'] = lin_list['Epithelia'].obs['leiden_res_1.2'].map(labels)
+
+## Annotate stroma
+labels = dict()
+for i in range(len(lin_list['Stroma'].obs['leiden_res_1.2'].unique())):
+    match i:
+        case 6|7|10|12|13|14|15:
+            labels[str(i)] = 'fibroblast'
+        case 9:
+            labels[str(i)] = 'vascular smooth muscle cell'
+        case 0|2|3|4|16:
+            labels[str(i)] = 'smooth muscle cell'
+        case 11:
+            labels[str(i)] = 'satellite glial cell'
+        case 8:
+            labels[str(i)] = 'pericyte'
+        case 1|5:
+            labels[str(i)] = 'smooth muscle cell'
+lin_list['Stroma'].obs['cellannotation'] = lin_list['Stroma'].obs['leiden_res_1.2'].map(labels)
+
+## Annotate leukocytes
+labels = dict()
+for i in range(len(lin_list['Leukocyte'].obs['leiden_res_1.0'].unique())):
+    match i:
+        case 4|8|13:
+            labels[str(i)] = 'T cell'
+        case 6:
+            labels[str(i)] = 'NK cell'
+        case 1|2|3|5|7|9:
+            labels[str(i)] = 'macrophage'
+        case 10:
+            labels[str(i)] = 'dendritic cell'
+        case 0:
+            labels[str(i)] = 'mast cell'
+        case 12:
+            labels[str(i)] = 'B cell'
+        case 11:
+            labels[str(i)] = 'plasma cell'
+lin_list['Leukocyte'].obs['cellannotation'] = lin_list['Leukocyte'].obs['leiden_res_1.0'].map(labels)
+
+## Annotate endothelia
+labels = dict()
+for i in range(len(lin_list['Endothelia'].obs['leiden_res_0.3'].unique())):
+    match i:
+        case 0:
+            labels[str(i)] = 'lymphatic endothelial cell'
+        case 1|2|3:
+            labels[str(i)] = 'endothelial cell'
+lin_list['Endothelia'].obs['cellannotation'] = lin_list['Endothelia'].obs['leiden_res_0.3'].map(labels)
+
+## Cast labels to global object
+labels = pd.concat([ x.obs[['cell_id', 'cellannotation']] for x in list(lin_list.values()) ])
+adata.obs = adata.obs.merge(labels, how='left', on='cell_id')
+
+## Assign neuroendocrine cells
+sc.tl.score_genes(
+    adata,
+    gene_list=ne_genes,
+    score_name='NE_score',
+)
+
+adata.obs['cellannotation'] = np.where(
+    (adata.obs['lineage'] == 'Epithelia') & (adata.obs['NE_score'] > ne_cutoffs[donor]), 'neuroendocrine cell', adata.obs['cellannotation']
+    )
+
+
+## Save data
+sc.pl.umap(adata, color='lineage')
+plt.savefig(fname=datadir+donor+'_umap_lineage.png', bbox_inches='tight')
+
+sc.pl.umap(adata, color='cellannotation')
+plt.savefig(fname=datadir+donor+'_umap_cellannotation.png', bbox_inches='tight')
+
+adata.write_h5ad(datadir+donor+'_annot.h5ad')
+for k,v in lin_list.items():
+    v.write_h5ad(datadir+'/'+k+'.h5ad')
+
+### Load in full (not filtered) dataset and merge labels
+sdata = sd.read_zarr('data_modified/spatialdata_zarr/'+donor+'.zarr')
+sdata['table'].obs = pd.merge(
+    left=sdata['table'].obs,
+    right=adata.obs[grps],
+    how='left',
+    on='cell_id'
+)
+
+### Change NaNs to 'missing'
+for g in grps:
+    if g == 'cell_id':
+        continue
+    if 'missing' not in sdata['table'].obs[g].cat.categories:
+        sdata['table'].obs[g] = sdata['table'].obs[g].cat.add_categories(['missing'])
+    sdata['table'].obs[g] = sdata['table'].obs[g].fillna('missing')
+
+### Save XE analysis.zarr.zip file
+write_cell_categories(datadir+donor+'_analysis.zarr.zip', sdata['table'], is_dir=False)
diff --git a/scripts/04_annotation.R b/scripts/04_annotation.R
index 5e98f6efc245af2e1c189a0916d781b9b5bbcfb3..969128080c04adb3354e2a7abfeb660676b160f7 100644
--- a/scripts/04_annotation.R
+++ b/scripts/04_annotation.R
@@ -110,9 +110,9 @@ annot.list$Leukocyte <- c("T cell",
                           "dendritic cell",
                           "plasma cell")
 annot.list$Stroma <- c("pericyte",
-                       "major interstitial fibroblast",
+                       "interstitial fibroblast of urethra",
                        "smooth muscle cell",
-                       "minor interstitial fibroblast",
+                       "interstitial fibroblast of vagina",
                        "smooth muscle cell",
                        "smooth muscle cell",
                        "periepithelial fibroblast of vagina",
@@ -121,11 +121,11 @@ annot.list$Stroma <- c("pericyte",
                        "periepithelial fibroblast of urethra",
                        "stressed cell",
                        "pericyte",
-                       "satellite glial cell",
+                       "Schwann cell",
                        "smooth muscle cell",
                        "endoneurial fibroblast",
                        "pericyte",
-                       "satellite glial cell")
+                       "Schwann cell")
 
 ## Set epithelia and stroma to higher resolutions
 Idents(lin.list$Epithelia) <- "RNA_snn_res.1.5"
@@ -168,6 +168,14 @@ fib_rename <- WhichCells(lin.list$Stroma,
 levels(lin.list$Stroma$cellannotation) <- c(levels(lin.list$Stroma$cellannotation), "periepithelial fibroblast of prostate")
 lin.list$Stroma@meta.data[fib_rename, "cellannotation"] <- "periepithelial fibroblast of prostate"
 
+## Split interstitial fibroblasts by sex
+fib_rename <- WhichCells(lin.list$Stroma, 
+                         idents=c("interstitial fibroblast of vagina", "interstitial fibroblast of urethra"), 
+                         expression = Sex == "male")
+levels(lin.list$Stroma$cellannotation) <- c(levels(lin.list$Stroma$cellannotation), "interstitial fibroblast of prostate")
+lin.list$Stroma@meta.data[fib_rename, "cellannotation"] <- "interstitial fibroblast of prostate"
+
+
 ## Reset idents
 Idents(lin.list$Epithelia) <- "cellannotation"
 Idents(lin.list$Stroma) <- "cellannotation"
diff --git a/scripts/05_female_only.R b/scripts/05_female_only.R
index fd6a7e7d4835b551b9bbb4b4f0221e6ec39195d9..7f8212c7750b0a600ca1fc2fc8869e6ef06b9e5b 100644
--- a/scripts/05_female_only.R
+++ b/scripts/05_female_only.R
@@ -127,10 +127,10 @@ lin.list$Leukocyte <- RenameIdents(lin.list$Leukocyte, cluster.ids) %>%
 
 ## Stroma
 Idents(lin.list$Stroma) <- "RNA_snn_res.1"
-cluster.ids <- c("0" = "minor interstitial fibroblast",
+cluster.ids <- c("0" = "interstitial fibroblast of vagina",
                  "1" = "pericyte",
                  "2" = "smooth muscle cell",
-                 "3" = "major interstitial fibroblast",
+                 "3" = "interstitial fibroblast of urethra",
                  "4" = "smooth muscle cell",
                  "5" = "smooth muscle cell",
                  "6" = "vascular smooth muscle cell",
@@ -138,11 +138,11 @@ cluster.ids <- c("0" = "minor interstitial fibroblast",
                  "8" = "periepithelial fibroblast of urethra",
                  "9" = "smooth muscle cell",
                  "10" = "endoneurial fibroblast",
-                 "11" = "satellite glial cell",
+                 "11" = "Schwann cell",
                  "12" = "pericyte",
                  "13" = "pericyte",
                  "14" = "smooth muscle cell",
-                 "15" = "satellite glial cell")
+                 "15" = "Schwann cell")
 lin.list$Stroma <- RenameIdents(lin.list$Stroma, cluster.ids) %>%
   StashIdent("cellannotation")