-
Notifications
You must be signed in to change notification settings - Fork 127
/
singleCell_doublets.R
85 lines (59 loc) · 3.08 KB
/
singleCell_doublets.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# filter out doublets: DoubletFinder
# setwd("~/Desktop/demo/single_cell_doublets/scripts")
# load libraries
library(Seurat)
library(ggplot2)
library(tidyverse)
library(DoubletFinder)
# create counts matrix
cts <- ReadMtx(mtx = '/Users/kr/Desktop/demo/single_cell_doublets/data/raw_feature_bc_matrix/matrix.mtx.gz',
features = '/Users/kr/Desktop/demo/single_cell_doublets/data/raw_feature_bc_matrix/features.tsv.gz',
cells = '/Users/kr/Desktop/demo/single_cell_doublets/data/raw_feature_bc_matrix/barcodes.tsv.gz')
cts[1:10,1:10]
# create Seurat object
pbmc.seurat <- CreateSeuratObject(counts = cts)
str(pbmc.seurat)
# QC and Filtering
# explore QC
pbmc.seurat$mitoPercent <- PercentageFeatureSet(pbmc.seurat, pattern = '^MT-')
pbmc.seurat.filtered <- subset(pbmc.seurat, subset = nCount_RNA > 800 &
nFeature_RNA > 500 &
mitoPercent < 10)
pbmc.seurat
pbmc.seurat.filtered
# pre-process standard workflow
pbmc.seurat.filtered <- NormalizeData(object = pbmc.seurat.filtered)
pbmc.seurat.filtered <- FindVariableFeatures(object = pbmc.seurat.filtered)
pbmc.seurat.filtered <- ScaleData(object = pbmc.seurat.filtered)
pbmc.seurat.filtered <- RunPCA(object = pbmc.seurat.filtered)
ElbowPlot(pbmc.seurat.filtered)
pbmc.seurat.filtered <- FindNeighbors(object = pbmc.seurat.filtered, dims = 1:20)
pbmc.seurat.filtered <- FindClusters(object = pbmc.seurat.filtered)
pbmc.seurat.filtered <- RunUMAP(object = pbmc.seurat.filtered, dims = 1:20)
## pK Identification (no ground-truth) ---------------------------------------------------------------------------------------
sweep.res.list_pbmc <- paramSweep_v3(pbmc.seurat.filtered, PCs = 1:20, sct = FALSE)
sweep.stats_pbmc <- summarizeSweep(sweep.res.list_pbmc, GT = FALSE)
bcmvn_pbmc <- find.pK(sweep.stats_pbmc)
ggplot(bcmvn_pbmc, aes(pK, BCmetric, group = 1)) +
geom_point() +
geom_line()
pK <- bcmvn_pbmc %>% # select the pK that corresponds to max bcmvn to optimize doublet detection
filter(BCmetric == max(BCmetric)) %>%
select(pK)
pK <- as.numeric(as.character(pK[[1]]))
## Homotypic Doublet Proportion Estimate -------------------------------------------------------------------------------------
annotations <- pbmc.seurat.filtered@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations) ## ex: annotations <- seu_kidney@meta.data$ClusteringResults
nExp_poi <- round(0.076*nrow(pbmc.seurat.filtered@meta.data)) ## Assuming 7.5% doublet formation rate - tailor for your dataset
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
# run doubletFinder
pbmc.seurat.filtered <- doubletFinder_v3(pbmc.seurat.filtered,
PCs = 1:20,
pN = 0.25,
pK = pK,
nExp = nExp_poi.adj,
reuse.pANN = FALSE, sct = FALSE)
# visualize doublets
DimPlot(pbmc.seurat.filtered, reduction = 'umap', group.by = "DF.classifications_0.25_0.21_691")
# number of singlets and doublets
table(pbmc.seurat.filtered@meta.data$DF.classifications_0.25_0.21_691)