Seurat 官网单细胞教程四 (SCTransform 使用及 SCTransform V2 版本)

JunJunLab 老俊俊的生信笔记 2022-08-27

收录于合集 #单细胞笔记 21个

昨日月

1引言

前面已经涉及到使用 SCTransform 来代替原来的 NormalizeData,FindVariableFeatures和ScaleData 三个函数,今天我们来详细看看使用的方法以及最新 2022 年 6 月份 更新的 V2 版本使用介绍。

2V1 版本使用示例

首先读取数据:

library(Seurat)
library(ggplot2)
library(sctransform)

# load data
pbmc_data <- Read10X(data.dir = "./hg19/")
pbmc <- CreateSeuratObject(counts = pbmc_data)

# store mitochondrial percentage in object meta data
pbmc <- PercentageFeatureSet(pbmc, pattern = "^MT-", col.name = "percent.mt")

运行 SCTransform:

# run sctransform
pbmc <- SCTransform(pbmc,
                    vars.to.regress = "percent.mt", verbose = FALSE)

使用 glmGamPoi 提高速度:

需要提前安装一下:

# installglmGamPoi
BiocManager::install("glmGamPoi")

pbmc <- SCTransform(pbmc, method = "glmGamPoi",
                    vars.to.regress = "percent.mt",
                    verbose = FALSE)

常规流程:

# These are now standard steps
# in the Seurat workflow for visualization and clustering
pbmc <- RunPCA(pbmc, verbose = FALSE)
pbmc <- RunUMAP(pbmc, dims = 1:30, verbose = FALSE)

pbmc <- FindNeighbors(pbmc, dims = 1:30, verbose = FALSE)
pbmc <- FindClusters(pbmc, verbose = FALSE)

DimPlot(pbmc, label = TRUE) + NoLegend()

小提琴可视化:

# These are now standard steps in the Seurat workflow for visualization and clustering
# Visualize canonical marker genes as violin plots.
VlnPlot(pbmc, features = c("CD8A", "GZMK", "CCL5", "S100A4",
                           "ANXA1", "CCR7", "ISG15", "CD3D"),
        pt.size = 0.2, ncol = 4)

umap 图展示基因表达:

# Visualize canonical marker genes on the sctransform embedding.
FeaturePlot(pbmc, features = c("CD8A", "GZMK", "CCL5",
                               "S100A4", "ANXA1", "CCR7"),
            pt.size = 0.2,
            ncol = 3)

3V2 版本使用示例

V2 版本 提升了速度和减少了内存的消耗,安装如下:

# install glmGamPoi
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")

BiocManager::install("glmGamPoi")
# install sctransform from Github
devtools::install_github("satijalab/sctransform", ref = "develop")

测试代码:

准备测试数据:

library(Seurat)
library(SeuratData)
library(patchwork)
library(dplyr)
library(ggplot2)

library(Seurat)
library(SeuratData)
library(patchwork)
library(dplyr)
library(ggplot2)

# load dataset
ifnb <- UpdateSeuratObject(LoadData("ifnb"))

# split the dataset into a list of two seurat objects (stim and CTRL)
ifnb.list <- SplitObject(ifnb, split.by = "stim")

ctrl <- ifnb.list[["CTRL"]]
stim <- ifnb.list[["STIM"]]

单样本使用测试:(vst.flavor = "v2")

# normalize and run dimensionality reduction on control dataset
ctrl <- SCTransform(ctrl, vst.flavor = "v2", verbose = FALSE) %>%
  RunPCA(npcs = 30, verbose = FALSE) %>%
  RunUMAP(reduction = "pca", dims = 1:30, verbose = FALSE) %>%
  FindNeighbors(reduction = "pca", dims = 1:30, verbose = FALSE) %>%
  FindClusters(resolution = 0.7, verbose = FALSE)

p1 <- DimPlot(ctrl, label = T, repel = T) +
  ggtitle("Unsupervised clustering")

p2 <- DimPlot(ctrl, label = T, repel = T,
              group.by = "seurat_annotations") +
  ggtitle("Annotated celltypes")

p1 | p2

数据整合测试代码:整合

# Perform integration using pearson residuals
stim <- SCTransform(stim, vst.flavor = "v2", verbose = FALSE) %>%
  RunPCA(npcs = 30, verbose = FALSE)

ifnb.list <- list(ctrl = ctrl, stim = stim)

features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 3000)

ifnb.list <- PrepSCTIntegration(object.list = ifnb.list, anchor.features = features)

immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list,
                                         normalization.method = "SCT",
                                         anchor.features = features)

immune.combined.sct <- IntegrateData(anchorset = immune.anchors,
                                     normalization.method = "SCT")

下游常规流程:

immune.combined.sct <- RunPCA(immune.combined.sct, verbose = FALSE)
immune.combined.sct <- RunUMAP(immune.combined.sct, reduction = "pca", dims = 1:30, verbose = FALSE)
immune.combined.sct <- FindNeighbors(immune.combined.sct, reduction = "pca", dims = 1:30)
immune.combined.sct <- FindClusters(immune.combined.sct, resolution = 0.3)

# plot
p1 <- DimPlot(immune.combined.sct, reduction = "umap",
              group.by = "stim")

p2 <- DimPlot(immune.combined.sct, reduction = "umap",
              group.by = "seurat_clusters", label = TRUE,
              repel = TRUE)

p3 <- DimPlot(immune.combined.sct, reduction = "umap",
              group.by = "seurat_annotations", label = TRUE,
              repel = TRUE)

p1 | p2 | p3

寻找不同条件下的 Marker 基因:

添加分组:

# find marker gene
immune.combined.sct$celltype.stim <- paste(immune.combined.sct$seurat_annotations,
                                           immune.combined.sct$stim,
                                           sep = "_")

Idents(immune.combined.sct) <- "celltype.stim"

使用 SCT Assay 里面的正则负二项模型矫正过后的 counts 数据来计算差异基因:

################################
# find marker gene
immune.combined.sct$celltype.stim <- paste(immune.combined.sct$seurat_annotations,
                                           immune.combined.sct$stim,
                                           sep = "_")

# set current Idents
Idents(immune.combined.sct) <- "celltype.stim"

immune.combined.sct <- PrepSCTFindMarkers(immune.combined.sct)
## Found 2 SCT models. Recorrecting SCT counts using minimum median counts: 1665

# diff test
b.interferon.response <- FindMarkers(immune.combined.sct,
                                     assay = "SCT",
                                     ident.1 = "B_STIM",
                                     ident.2 = "B_CTRL",
                                     verbose = FALSE)

# check
head(b.interferon.response, n = 5)
# p_val avg_log2FC pct.1 pct.2     p_val_adj
# ISG15 1.505744e-159   3.508576 0.998 0.229 2.003543e-155
# IFIT3 4.175136e-154   2.567244 0.961 0.052 5.555437e-150
# IFI6  2.492667e-153   2.459369 0.965 0.076 3.316742e-149
# ISG20 9.512332e-152   2.560476 1.000 0.666 1.265711e-147
# IFIT1 2.444608e-139   2.132262 0.904 0.029 3.252795e-135

如果在 PrepSCTFindMarkers 之后你想提取子集然后寻找差异基因,需要加上 recorrect_umi = FALSE 参数:

# subset of the original object after running PrepSCTFindMarkers()
immune.combined.sct.subset <- subset(immune.combined.sct,
                                     idents = c("B_STIM", "B_CTRL"))

# add recorrect_umi = FALSE
b.interferon.response.subset <- FindMarkers(immune.combined.sct.subset,
                                            assay = "SCT", ident.1 = "B_STIM",
                                            ident.2 = "B_CTRL",
                                            verbose = FALSE,
                                            recorrect_umi = FALSE)

可视化:

# plot
Idents(immune.combined.sct) <- "seurat_annotations"

DefaultAssay(immune.combined.sct) <- "SCT"

FeaturePlot(immune.combined.sct,
            features = c("CD3D", "GNLY", "IFI6"),
            split.by = "stim",
            max.cutoff = 3,
            cols = c("grey", "red"))