Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
scRNA_urethra
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Strand Lab
scRNA_urethra
Commits
e72babe6
Commit
e72babe6
authored
6 months ago
by
John Lafin
Browse files
Options
Downloads
Patches
Plain Diff
Update lineage filtering; move stress filter to annot script
parent
e0e9f125
Branches
Branches containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
scripts/03_lineage_filtering.R
+51
-82
51 additions, 82 deletions
scripts/03_lineage_filtering.R
scripts/04_annotation.R
+232
-0
232 additions, 0 deletions
scripts/04_annotation.R
with
283 additions
and
82 deletions
scripts/03_lineage_filtering.R
+
51
−
82
View file @
e72babe6
# Lineage-dependent filtering
# Author: John T Lafin
# Updated: 2024-0
9
-1
7
# Updated: 2024-
1
0-1
5
print
(
"Script begin at: "
)
Sys.time
()
...
...
@@ -28,6 +28,33 @@ set.seed(42)
outdir
<-
"data_modified/03_lineage_filtering/"
dir.create
(
outdir
,
showWarnings
=
FALSE
)
# Create pre-processing function
preprocess
<-
function
(
x
,
cluster.res
=
seq
(
0.1
,
1.5
,
0.1
),
default.res
=
"RNA_snn_res.0.4"
,
batch.var
=
"Sample"
){
x
<-
x
%>%
NormalizeData
()
%>%
FindVariableFeatures
()
%>%
ScaleData
()
%>%
RunPCA
()
%>%
RunHarmony
(
group.by.vars
=
batch.var
)
numPCs
<-
maxLikGlobalDimEst
(
x
@
reductions
$
harmony
@
cell.embeddings
,
k
=
10
)
$
dim.est
%>%
round
()
x
<-
x
%>%
RunUMAP
(
dims
=
1
:
numPCs
,
reduction
=
"harmony"
,
metric
=
"euclidean"
,
min.dist
=
0.1
)
%>%
FindNeighbors
(
reduction
=
"harmony"
,
dims
=
1
:
numPCs
)
%>%
FindClusters
(
resolution
=
cluster.res
)
%>%
SetIdent
(
value
=
default.res
)
return
(
x
)
}
# Load data ---------------------------------------------------------------
sc.h
<-
readRDS
(
"data_modified/02_preprocess/processed_object.rds"
)
...
...
@@ -78,30 +105,8 @@ ggsave(paste0(outdir, "dotplot_global_lineage.png"))
lin.list
<-
SplitObject
(
sc.h
,
split.by
=
"Lineage"
)
lin.list
[[
"Putative doublet"
]]
<-
NULL
# Remove doublets
res
<-
seq
(
0.1
,
1
,
0.1
)
lin.list
<-
lin.list
%>%
future_lapply
(
function
(
x
){
x
<-
NormalizeData
(
x
)
%>%
FindVariableFeatures
()
%>%
ScaleData
()
%>%
RunPCA
()
%>%
RunHarmony
(
group.by.vars
=
"Sample"
)
numPCs
<-
maxLikGlobalDimEst
(
x
@
reductions
$
harmony
@
cell.embeddings
,
k
=
10
)
$
dim.est
%>%
round
()
x
<-
x
%>%
RunUMAP
(
dims
=
1
:
numPCs
,
reduction
=
"harmony"
,
metric
=
"euclidean"
,
min.dist
=
0.1
)
%>%
FindNeighbors
(
reduction
=
"harmony"
,
dims
=
1
:
numPCs
)
%>%
FindClusters
(
resolution
=
res
)
%>%
SetIdent
(
value
=
default.res
)
return
(
x
)},
future.seed
=
TRUE
)
res
<-
seq
(
0.1
,
1.5
,
0.1
)
lin.list
<-
future_lapply
(
lin.list
,
preprocess
,
future.seed
=
TRUE
)
# Save UMAP of each subset
for
(
lin
in
names
(
lin.list
)
){
...
...
@@ -109,10 +114,11 @@ for ( lin in names(lin.list) ){
ggsave
(
paste0
(
umapdir
,
lin
,
"_prefilter.png"
))
}
# Lineage-dependent filtering ---------------------------------------------
## Identify and remove clusters enriched for likely doublets
dbltdir
<-
paste0
(
outdir
,
"doublets/"
)
dir.create
(
dbltdir
,
showWarnings
=
FALSE
)
#### Save doublet plots of each lineage
dblts
<-
list
()
...
...
@@ -125,20 +131,22 @@ for (lin in names(lin.list)) {
ggplot
(
dblts
[[
lin
]],
aes
(
.data
[[
default.res
]],
fraction
,
fill
=
scDblFinder.class
))
+
geom_col
()
+
geom_hline
(
yintercept
=
0.5
,
linetype
=
"dashed"
)
ggsave
(
paste0
(
ou
tdir
,
"dblts_"
,
lin
,
".png"
))
ggsave
(
paste0
(
dbl
tdir
,
lin
,
"
_pass1
.png"
))
}
### Remove clusters with >50% of cells classed as doublets
### Remove clusters with >
=
50% of cells classed as doublets
lin.list.filt
<-
list
()
keep
<-
list
()
for
(
lin
in
names
(
lin.list
))
{
keep
[[
lin
]]
<-
dblts
[[
lin
]]
%>%
filter
(
scDblFinder.class
==
"singlet"
&
fraction
>
0.5
)
%>%
pull
(
"RNA_snn_res.0.4"
)
pull
(
.data
[[
default.res
]]
)
lin.list.filt
[[
lin
]]
<-
subset
(
lin.list
[[
lin
]],
idents
=
keep
[[
lin
]])
}
## Library size filtering
ncountdir
<-
paste0
(
outdir
,
"ncount/"
)
dir.create
(
ncountdir
,
showWarnings
=
FALSE
)
### Calculate thresholds
...
...
@@ -161,7 +169,7 @@ rownames(mt.dt) <- mt.dt$lineage
mt.dt
$
lineage
<-
NULL
### Save thresholds
write.csv
(
mt.dt
,
paste0
(
outdir
,
"nCount_cutoffs.csv"
))
write.csv
(
mt.dt
,
paste0
(
nc
ou
n
tdir
,
"nCount_cutoffs.csv"
))
### Save violin plots with cutoffs
for
(
g
in
names
(
lin.list.filt
))
{
...
...
@@ -169,7 +177,7 @@ for (g in names(lin.list.filt)) {
geom_hline
(
yintercept
=
mt.dt
[
g
,
"cutoff"
],
linetype
=
"dashed"
)
+
scale_y_log10
()
+
NoLegend
()
ggsave
(
paste0
(
outdir
,
paste0
(
"vln_nCount_cutoffs_"
,
g
,
"
.png"
))
)
ggsave
(
paste0
(
nc
ou
n
tdir
,
g
,
"_nCount
.png"
))
}
### Apply library size filter
...
...
@@ -187,37 +195,16 @@ tibble("Lineage" = names(cells.old),
"Filtered"
=
unlist
(
cells.new
))
%>%
mutate
(
"Difference"
=
Original
-
Filtered
,
"Fraction_removed"
=
Difference
/
Original
)
%>%
write.csv
(
paste0
(
outdir
,
"fraction_removed_nCount.csv"
))
write.csv
(
paste0
(
nc
ou
n
tdir
,
"fraction_removed_nCount.csv"
))
# Recluster ---------------------------------------------------------------
lin.list
<-
lin.list.filt
%>%
future_lapply
(
function
(
x
){
x
<-
NormalizeData
(
x
)
%>%
FindVariableFeatures
()
%>%
ScaleData
()
%>%
RunPCA
()
%>%
RunHarmony
(
group.by.vars
=
"Sample"
)
numPCs
<-
maxLikGlobalDimEst
(
x
@
reductions
$
harmony
@
cell.embeddings
,
k
=
10
)
$
dim.est
%>%
round
()
x
<-
x
%>%
RunUMAP
(
dims
=
1
:
numPCs
,
reduction
=
"harmony"
,
metric
=
"euclidean"
,
min.dist
=
0.1
)
%>%
FindNeighbors
(
reduction
=
"harmony"
,
dims
=
1
:
numPCs
)
%>%
FindClusters
(
resolution
=
res
)
%>%
SetIdent
(
value
=
default.res
)
return
(
x
)},
future.seed
=
TRUE
)
lin.list
<-
future_lapply
(
lin.list.filt
,
preprocess
,
future.seed
=
TRUE
)
# Doublet removal, second round -------------------------------------------
## Create plots of doublets
dblts
<-
list
()
for
(
lin
in
names
(
lin.list
))
{
dblts
[[
lin
]]
<-
lin.list
[[
lin
]]
@
meta.data
%>%
...
...
@@ -225,46 +212,28 @@ for (lin in names(lin.list)) {
summarize
(
count
=
n
())
%>%
group_by
(
.data
[[
default.res
]])
%>%
mutate
(
fraction
=
count
/
sum
(
count
))
ggplot
(
dblts
[[
lin
]],
aes
(
.data
[[
default.res
]],
fraction
,
fill
=
scDblFinder.class
))
+
geom_col
()
+
geom_hline
(
yintercept
=
0.5
,
linetype
=
"dashed"
)
ggsave
(
paste0
(
dbltdir
,
lin
,
"_pass2.png"
))
}
## Remove doublets
keep
<-
list
()
for
(
lin
in
names
(
lin.list
))
{
keep
[[
lin
]]
<-
dblts
[[
lin
]]
%>%
filter
(
scDblFinder.class
==
"singlet"
&
fraction
>
0.5
)
%>%
pull
(
"RNA_snn_res.0.4"
)
lin.list
[[
lin
]]
<-
subset
(
lin.list
[[
lin
]],
idents
=
keep
[[
lin
]])
pull
(
.data
[[
default.res
]]
)
lin.list
.filt
[[
lin
]]
<-
subset
(
lin.list
[[
lin
]],
idents
=
keep
[[
lin
]])
}
## Recluster again
lin.list
<-
lin.list
%>%
future_lapply
(
function
(
x
){
x
<-
NormalizeData
(
x
)
%>%
FindVariableFeatures
()
%>%
ScaleData
()
%>%
RunPCA
()
%>%
RunHarmony
(
group.by.vars
=
"Sample"
)
numPCs
<-
maxLikGlobalDimEst
(
x
@
reductions
$
harmony
@
cell.embeddings
,
k
=
10
)
$
dim.est
%>%
round
()
x
<-
x
%>%
RunUMAP
(
dims
=
1
:
numPCs
,
reduction
=
"harmony"
,
metric
=
"euclidean"
,
min.dist
=
0.1
)
%>%
FindNeighbors
(
reduction
=
"harmony"
,
dims
=
1
:
numPCs
)
%>%
FindClusters
(
resolution
=
res
)
%>%
SetIdent
(
value
=
default.res
)
return
(
x
)},
future.seed
=
TRUE
)
## Only recluster epithelia as the other lineages didn't have more doublets
lin.list
$
Epithelia
<-
preprocess
(
lin.list.filt
$
Epithelia
)
# Save outputs ------------------------------------------------------------
## Save UMAPs and objects
objdir
<-
paste0
(
outdir
,
"objects/"
)
dir.create
(
objdir
,
showWarnings
=
FALSE
)
for
(
lin
in
names
(
lin.list
))
{
...
...
This diff is collapsed.
Click to expand it.
scripts/04_annotation.R
0 → 100644
+
232
−
0
View file @
e72babe6
# Annotation and stress score filtering
# Author: John T Lafin
# Updated: 2024-10-15
print
(
"Script begin at: "
)
Sys.time
()
# Setup -------------------------------------------------------------------
# Load packages
library
(
dplyr
)
library
(
Seurat
)
library
(
intrinsicDimension
)
library
(
ggplot2
)
library
(
harmony
)
library
(
future
)
library
(
future.apply
)
# Future initialization
future_plan
<-
"multisession"
# Can use multicore if not on Rstudio or Windows
n_workers
<-
4
plan
(
future_plan
,
workers
=
n_workers
)
options
(
future.globals.maxSize
=
8000
*
1024
^
2
)
set.seed
(
42
)
# Create output dir
outdir
<-
"data_modified/04_annotation/"
dir.create
(
outdir
,
showWarnings
=
FALSE
)
# Create pre-processing function
preprocess
<-
function
(
x
,
cluster.res
=
seq
(
0.1
,
1.5
,
0.1
),
default.res
=
"RNA_snn_res.0.4"
,
batch.var
=
"Sample"
){
x
<-
x
%>%
NormalizeData
()
%>%
FindVariableFeatures
()
%>%
ScaleData
()
%>%
RunPCA
()
%>%
RunHarmony
(
group.by.vars
=
batch.var
)
numPCs
<-
maxLikGlobalDimEst
(
x
@
reductions
$
harmony
@
cell.embeddings
,
k
=
10
)
$
dim.est
%>%
round
()
x
<-
x
%>%
RunUMAP
(
dims
=
1
:
numPCs
,
reduction
=
"harmony"
,
metric
=
"euclidean"
,
min.dist
=
0.1
)
%>%
FindNeighbors
(
reduction
=
"harmony"
,
dims
=
1
:
numPCs
)
%>%
FindClusters
(
resolution
=
cluster.res
)
%>%
SetIdent
(
value
=
default.res
)
return
(
x
)
}
# Load data ---------------------------------------------------------------
lin.list
<-
list
()
lins
<-
list.files
(
'data_modified/03_lineage_filtering/objects/'
)
%>%
sub
(
".rds"
,
""
,
.
)
for
(
lin
in
lins
)
{
lin.list
[[
lin
]]
<-
readRDS
(
paste0
(
"data_modified/03_lineage_filtering/objects/"
,
lin
,
".rds"
))
}
# Annotation --------------------------------------------------------------
annotdir
<-
paste0
(
outdir
,
"annotations_first_pass/"
)
dir.create
(
annotdir
,
showWarnings
=
FALSE
)
## Create list of labels
annot.list
<-
list
()
annot.list
$
Endothelia
<-
c
(
"Venous endothelia"
,
"Venous endothelia"
,
"Capillary endothelia"
,
"Capillary endothelia"
,
"Arterial endothelia"
,
"Lymphatic endothelia"
)
annot.list
$
Epithelia
<-
c
(
"transitional basal epithelia of urethra"
,
"basal epithelia of prostate"
,
"transitional basal epithelia of urethra"
,
"squamous intermediate epithelia of urethra"
,
"basal epithelia of prostate"
,
"transitional intermediate epithelia of urethra"
,
"squamous intermediate epithelia of urethra"
,
"luminal epithelia of prostate"
,
"club epithelia of urethra"
,
"transitional intermediate epithelia of urethra"
,
"club epithelia of urethra"
,
"stressed cells"
,
"stressed cells"
,
"squamous intermediate epithelia of urethra"
,
"squamous intermediate epithelia of urethra"
,
"basal epithelia of prostate"
,
"transitional basal epithelia of urethra"
,
"basal epithelia of prostate"
,
"proliferative cells"
,
"squamous luminal epithelia of urethra"
,
"stressed cells"
,
"transitional basal epithelia of urethra"
,
"basal epithelia of prostate"
)
annot.list
$
Leukocyte
<-
c
(
"T cell"
,
"Macrophage"
,
"Mast cell"
,
"T cell"
,
"T cell"
,
"NK cell"
,
"Proliferating leukocyte"
,
"B cell"
,
"Dendritic cell"
,
"Plasma cell"
)
annot.list
$
Stroma
<-
c
(
"pericyte"
,
"major interstitial fibroblast"
,
"smooth muscle"
,
"minor interstitial fibroblast"
,
"smooth muscle"
,
"smooth muscle"
,
"periepithelial fibroblast of vagina"
,
"vascular smooth muscle"
,
"smooth muscle"
,
"periepithelial fibroblast of urethra"
,
"stressed cell"
,
"pericyte"
,
"glia"
,
"smooth muscle"
,
"endoneurial fibroblast"
,
"pericyte"
,
"glia"
)
## Set epithelia and stroma to higher resolutions
Idents
(
lin.list
$
Epithelia
)
<-
"RNA_snn_res.1.5"
Idents
(
lin.list
$
Stroma
)
<-
"RNA_snn_res.1"
## Add the labels
for
(
lin
in
lins
)
{
names
(
annot.list
[[
lin
]])
<-
levels
(
lin.list
[[
lin
]])
lin.list
[[
lin
]]
<-
RenameIdents
(
lin.list
[[
lin
]],
annot.list
[[
lin
]])
%>%
StashIdent
(
"cellannotation"
)
}
## Add neuroendocrine cells
NE.markers
<-
strsplit
(
"CHGA GRP CALCA SCG2 TPH1 ASCL1 SCG5 PHGR1 MIAT MAOB"
,
split
=
" "
)
lin.list
$
Epithelia
<-
AddModuleScore
(
lin.list
$
Epithelia
,
features
=
NE.markers
,
name
=
"NE.score"
)
lin.list
$
Epithelia
@
meta.data
<-
rename
(
lin.list
$
Epithelia
@
meta.data
,
NE.score
=
NE.score1
)
VlnPlot
(
lin.list
$
Epithelia
,
"NE.score"
,
group.by
=
"orig.ident"
)
+
NoLegend
()
+
geom_hline
(
yintercept
=
0.3
,
linetype
=
"dashed"
)
ggsave
(
paste0
(
annotdir
,
"NE_score_cutoff.png"
))
lin.list
$
Epithelia
@
meta.data
<-
lin.list
$
Epithelia
@
meta.data
%>%
mutate
(
cellannotation
=
if_else
(
NE.score
>
0.3
,
"Neuroendocrine cell"
,
cellannotation
))
## Add ionocytes
VlnPlot
(
lin.list
$
Epithelia
,
"FOXI1"
,
group.by
=
"orig.ident"
)
+
NoLegend
()
+
geom_hline
(
yintercept
=
0.3
,
linetype
=
"dashed"
)
ggsave
(
paste0
(
annotdir
,
"FOXI1_score_cutoff.png"
))
ionocyte.barcodes
<-
WhichCells
(
lin.list
$
Epithelia
,
expression
=
FOXI1
>
0.3
)
lin.list
$
Epithelia
@
meta.data
[
ionocyte.barcodes
,
"cellannotation"
]
<-
"Ionocyte"
## Save updates to object
lin.list
$
Epithelia
<-
StashIdent
(
lin.list
$
Epithelia
,
"cellannotation"
)
## Split periepithelial fibroblasts by sex
fib_rename
<-
WhichCells
(
lin.list
$
Stroma
,
idents
=
"periepithelial fibroblast of vagina"
,
expression
=
Sex
==
"male"
)
levels
(
lin.list
$
Stroma
$
cellannotation
)
<-
c
(
levels
(
lin.list
$
Stroma
$
cellannotation
),
"periepithelial fibroblast of prostate"
)
lin.list
$
Stroma
@
meta.data
[
fib_rename
,
"cellannotation"
]
<-
"periepithelial fibroblast of prostate"
Idents
(
stroma
)
<-
"cellannotation"
## Save labeled UMAP plots
for
(
lin
in
lins
)
{
DimPlot
(
lin.list
[[
lin
]])
ggsave
(
paste0
(
annotdir
,
lin
,
"_umap.png"
))
}
# Stress score filtering --------------------------------------------------
stressdir
<-
paste0
(
outdir
,
"stress/"
)
dir.create
(
stressdir
,
showWarnings
=
FALSE
)
## Produce violin plots of stress scores
lin.list
<-
lapply
(
lin.list
,
SetIdent
,
value
=
"RNA_snn_res.1"
)
### Check epithelia at higher resolution to capture other populations
Idents
(
lin.list
$
Epithelia
)
<-
"RNA_snn_res.1.5"
for
(
lin
in
names
(
lin.list
))
{
VlnPlot
(
lin.list
[[
lin
]],
"Stress.score"
,
pt.size
=
0
)
+
NoLegend
()
ggsave
(
paste0
(
stressdir
,
lin
,
"_pre-filter.png"
),
width
=
11
,
height
=
8.5
)
}
## Define stressed clusters based on violin plots
stressed.cells
<-
list
()
stressed.cells
$
Epithelia
<-
c
(
11
,
12
,
20
)
stressed.cells
$
Stroma
<-
c
(
2
,
10
,
15
)
stressed.cells
$
Endothelia
<-
c
(
4
,
10
)
stressed.cells
$
Leukocyte
<-
c
(
4
,
9
)
## Remove stressed clusters and reprocess
for
(
lin
in
names
(
lin.list
))
{
lin.list
[[
lin
]]
<-
subset
(
lin.list
[[
lin
]],
idents
=
stressed.cells
[[
lin
]],
invert
=
TRUE
)
}
lin.list
<-
lapply
(
lin.list
,
preprocess
)
### TO-DO
# Second round of stress filtering on stroma
# Revisit epithelia to relabel
# Make sure that ionocytes and NE cells are present in epi labels
# Produce post-filter stress plots
for
(
lin
in
names
(
lin.list
))
{
VlnPlot
(
lin.list
[[
lin
]],
"Stress.score"
,
pt.size
=
0
)
+
NoLegend
()
ggsave
(
paste0
(
stressdir
,
lin
,
"_first_filter.png"
),
width
=
11
,
height
=
8.5
)
}
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment