其他
Seurat 官网单细胞教程四 (SCTransform 使用及 SCTransform V2 版本)
昨日月
1引言
前面已经涉及到使用 SCTransform 来代替原来的 NormalizeData
,FindVariableFeatures
和ScaleData
三个函数,今天我们来详细看看使用的方法以及最新 2022 年 6 月份 更新的 V2 版本使用介绍。
2V1 版本使用示例
首先读取数据:
library(Seurat)
library(ggplot2)
library(sctransform)
# load data
pbmc_data <- Read10X(data.dir = "./hg19/")
pbmc <- CreateSeuratObject(counts = pbmc_data)
# store mitochondrial percentage in object meta data
pbmc <- PercentageFeatureSet(pbmc, pattern = "^MT-", col.name = "percent.mt")
运行 SCTransform:
# run sctransform
pbmc <- SCTransform(pbmc,
vars.to.regress = "percent.mt", verbose = FALSE)
使用 glmGamPoi 提高速度:
需要提前安装一下:
# installglmGamPoi
BiocManager::install("glmGamPoi")
pbmc <- SCTransform(pbmc, method = "glmGamPoi",
vars.to.regress = "percent.mt",
verbose = FALSE)
常规流程:
# These are now standard steps
# in the Seurat workflow for visualization and clustering
pbmc <- RunPCA(pbmc, verbose = FALSE)
pbmc <- RunUMAP(pbmc, dims = 1:30, verbose = FALSE)
pbmc <- FindNeighbors(pbmc, dims = 1:30, verbose = FALSE)
pbmc <- FindClusters(pbmc, verbose = FALSE)
DimPlot(pbmc, label = TRUE) + NoLegend()
小提琴可视化:
# These are now standard steps in the Seurat workflow for visualization and clustering
# Visualize canonical marker genes as violin plots.
VlnPlot(pbmc, features = c("CD8A", "GZMK", "CCL5", "S100A4",
"ANXA1", "CCR7", "ISG15", "CD3D"),
pt.size = 0.2, ncol = 4)
umap 图展示基因表达:
# Visualize canonical marker genes on the sctransform embedding.
FeaturePlot(pbmc, features = c("CD8A", "GZMK", "CCL5",
"S100A4", "ANXA1", "CCR7"),
pt.size = 0.2,
ncol = 3)
3V2 版本使用示例
V2 版本 提升了速度和减少了内存的消耗,安装如下:
# install glmGamPoi
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("glmGamPoi")
# install sctransform from Github
devtools::install_github("satijalab/sctransform", ref = "develop")
测试代码:
准备测试数据:
library(Seurat)
library(SeuratData)
library(patchwork)
library(dplyr)
library(ggplot2)
library(Seurat)
library(SeuratData)
library(patchwork)
library(dplyr)
library(ggplot2)
# load dataset
ifnb <- UpdateSeuratObject(LoadData("ifnb"))
# split the dataset into a list of two seurat objects (stim and CTRL)
ifnb.list <- SplitObject(ifnb, split.by = "stim")
ctrl <- ifnb.list[["CTRL"]]
stim <- ifnb.list[["STIM"]]
单样本使用测试:(vst.flavor = "v2")
# normalize and run dimensionality reduction on control dataset
ctrl <- SCTransform(ctrl, vst.flavor = "v2", verbose = FALSE) %>%
RunPCA(npcs = 30, verbose = FALSE) %>%
RunUMAP(reduction = "pca", dims = 1:30, verbose = FALSE) %>%
FindNeighbors(reduction = "pca", dims = 1:30, verbose = FALSE) %>%
FindClusters(resolution = 0.7, verbose = FALSE)
p1 <- DimPlot(ctrl, label = T, repel = T) +
ggtitle("Unsupervised clustering")
p2 <- DimPlot(ctrl, label = T, repel = T,
group.by = "seurat_annotations") +
ggtitle("Annotated celltypes")
p1 | p2
数据整合测试代码:整合
# Perform integration using pearson residuals
stim <- SCTransform(stim, vst.flavor = "v2", verbose = FALSE) %>%
RunPCA(npcs = 30, verbose = FALSE)
ifnb.list <- list(ctrl = ctrl, stim = stim)
features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 3000)
ifnb.list <- PrepSCTIntegration(object.list = ifnb.list, anchor.features = features)
immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list,
normalization.method = "SCT",
anchor.features = features)
immune.combined.sct <- IntegrateData(anchorset = immune.anchors,
normalization.method = "SCT")
下游常规流程:
immune.combined.sct <- RunPCA(immune.combined.sct, verbose = FALSE)
immune.combined.sct <- RunUMAP(immune.combined.sct, reduction = "pca", dims = 1:30, verbose = FALSE)
immune.combined.sct <- FindNeighbors(immune.combined.sct, reduction = "pca", dims = 1:30)
immune.combined.sct <- FindClusters(immune.combined.sct, resolution = 0.3)
# plot
p1 <- DimPlot(immune.combined.sct, reduction = "umap",
group.by = "stim")
p2 <- DimPlot(immune.combined.sct, reduction = "umap",
group.by = "seurat_clusters", label = TRUE,
repel = TRUE)
p3 <- DimPlot(immune.combined.sct, reduction = "umap",
group.by = "seurat_annotations", label = TRUE,
repel = TRUE)
p1 | p2 | p3
寻找不同条件下的 Marker 基因:
添加分组:
# find marker gene
immune.combined.sct$celltype.stim <- paste(immune.combined.sct$seurat_annotations,
immune.combined.sct$stim,
sep = "_")
Idents(immune.combined.sct) <- "celltype.stim"
使用 SCT Assay
里面的正则负二项模型矫正过后
的 counts 数据来计算差异基因:
################################
# find marker gene
immune.combined.sct$celltype.stim <- paste(immune.combined.sct$seurat_annotations,
immune.combined.sct$stim,
sep = "_")
# set current Idents
Idents(immune.combined.sct) <- "celltype.stim"
immune.combined.sct <- PrepSCTFindMarkers(immune.combined.sct)
## Found 2 SCT models. Recorrecting SCT counts using minimum median counts: 1665
# diff test
b.interferon.response <- FindMarkers(immune.combined.sct,
assay = "SCT",
ident.1 = "B_STIM",
ident.2 = "B_CTRL",
verbose = FALSE)
# check
head(b.interferon.response, n = 5)
# p_val avg_log2FC pct.1 pct.2 p_val_adj
# ISG15 1.505744e-159 3.508576 0.998 0.229 2.003543e-155
# IFIT3 4.175136e-154 2.567244 0.961 0.052 5.555437e-150
# IFI6 2.492667e-153 2.459369 0.965 0.076 3.316742e-149
# ISG20 9.512332e-152 2.560476 1.000 0.666 1.265711e-147
# IFIT1 2.444608e-139 2.132262 0.904 0.029 3.252795e-135
如果在 PrepSCTFindMarkers 之后你想提取子集然后寻找差异基因,需要加上 recorrect_umi = FALSE
参数:
# subset of the original object after running PrepSCTFindMarkers()
immune.combined.sct.subset <- subset(immune.combined.sct,
idents = c("B_STIM", "B_CTRL"))
# add recorrect_umi = FALSE
b.interferon.response.subset <- FindMarkers(immune.combined.sct.subset,
assay = "SCT", ident.1 = "B_STIM",
ident.2 = "B_CTRL",
verbose = FALSE,
recorrect_umi = FALSE)
可视化:
# plot
Idents(immune.combined.sct) <- "seurat_annotations"
DefaultAssay(immune.combined.sct) <- "SCT"
FeaturePlot(immune.combined.sct,
features = c("CD3D", "GNLY", "IFI6"),
split.by = "stim",
max.cutoff = 3,
cols = c("grey", "red"))
4结尾
大家可以自行尝试一下。
欢迎加入生信交流群。加我微信我也拉你进 微信群聊 老俊俊生信交流群
(微信交流群需收取20元入群费用(防止骗子和便于管理)
)。
老俊俊微信:
知识星球:
所以今天你学习了吗?
今天的分享就到这里了,敬请期待下一篇!
最后欢迎大家分享转发,您的点赞是对我的鼓励和肯定!
如果觉得对您帮助很大,赏杯快乐水喝喝吧!
往期回顾
◀Seurat 官网单细胞教程三 (RPCA 快速整合数据)
◀...