其他
单细胞转录组高级分析八:整合V(D)J数据
单细胞转录组基础分析专题
逆转录的cDNA经过扩增后可以一分为二,一部分做5'端scRNA测序,另一部分用富集引物扩增V(D)J序列测序。
scRepertoire简介
library(devtools)
#正式版,需要4.0版R语言支持
install_github("ncborcherding/scRepertoire")
#开发版,适用4.0版以下的R语言
install_github("ncborcherding/scRepertoire@dev")
library(Seurat)
library(scRepertoire)
library(tidyverse)
library(patchwork)
rm(list=ls())
dir.create("VDJ")
#加载scRepertoire自带的contig_annotations文件
data("contig_list")
查看contig_list[[1]]的前5行,可以发现这是标准的contig注释数据
combined <- combineTCR(contig_list,
samples = c("PY", "PY", "PX", "PX", "PZ","PZ"),
ID = c("P", "T", "P", "T", "P", "T"),
cells ="T-AB")
#参数说明:
#samples:用于指定样本名称的参数,因为contig_list包含了6个样本的VDJ数据,所以这里传递了一个6个字符元素的向量;
#ID:用于指定样本名称之外的其他分类信息,例如分组信息;
#cells:指定VDJ类型,"T-AB"代表TCR的α和β链配对,"T-GD"代表γ和δ配对;
#还有removeNA, removeMulti, filterMulti都选择了默认值,有兴趣了解的可以查看帮助文档
exportTable:默认赋值F,函数分析结果为图形;改为T之后函数分析结果输出为表格。
cloneCall:可选"aa", "nt", "gene", "gene+nt",代表用CDR3氨基酸序列、CDR3核苷酸序列、VDJ基因还是VDJ基因+CDR3核苷酸序列中的哪一种来定义克隆型。以下分析主要用"aa"定义clonotype,大家可以根据自己的需要选择其他类型。
##展示每个样本的克隆型数量
p1 <- quantContig(combined, cloneCall="gene+nt", scale = F)
p2 <- quantContig(combined, cloneCall="gene+nt", scale = T)
plotc = p1 + p2
ggsave('VDJ/quantContig.png', plotc, width = 8, height = 4)
#设置exportTable = T,则输出表格而非图形
quantContig(combined, cloneCall="gene+nt", scale = T, exportTable = T
# contigs values total scaled
# 1 2692 PY_P 3208 83.91521
# 2 1513 PY_T 3119 48.50914
# 3 823 PX_P 1068 77.05993
# 4 928 PX_T 1678 55.30393
# 5 1147 PZ_P 1434 79.98605
# 6 764 PZ_T 2768 27.60116
##CDR3序列的长度分布,“aa”代表统计氨基酸序列长度
p1 <- lengthContig(combined, cloneCall="aa", chains = "combined")
p2 <- lengthContig(combined, cloneCall="aa", chains = "single")
plotc = p1 + p2
ggsave('VDJ/lengthContig.png', plotc, width = 8, height = 4)
##对比两个样本的克隆型
p1 = compareClonotypes(combined, numbers = 10, samples = c("PX_P", "PX_T"),
cloneCall="aa", graph = "alluvial")
ggsave('VDJ/compareClonotypes.png', p1, width = 8, height = 4)
##Clonal Space Homeostasis,这个不知道怎么翻译
clonalHomeostasis(combined, cloneCall = "aa")
#此分析将克隆型按其相对丰度分为rare, small, medium, large, hyperexpanded
#5大类,并统计各个类别的占比
##克隆型分类占比,与上一个分析相似,只是分类方法调整了
clonalProportion(combined, cloneCall = "aa")
##Overlap Analysis,分析样本相似性
clonalOverlap(combined, cloneCall = "aa", method = "morisita")
##克隆多样性指数
clonalDiversity(combined, cloneCall = "aa", group = "samples")
seurat <- get(load("Data/VDJ/seurat2.rda"))
#查看UMAP图
DimPlot(seurat, label = T) + NoLegend()
#查看原始seurat的meta.data有哪些内容
names(seurat@meta.data)
# [1] "nCount_RNA" "nFeature_RNA" "integrated_snn_res.0.5" "seurat_clusters"
# [5] "Patient" "Type" "RawBarcode"
##将VDJ数据添加到seurat对象的meta.data中
seurat <- combineExpression(combined, seurat, cloneCall="aa", groupBy = "sample")
#查看添加VDJ数据后seurat的meta.data有哪些内容
names(seurat@meta.data)
[1] "nCount_RNA" "nFeature_RNA" "integrated_snn_res.0.5"
[4] "seurat_clusters" "Patient" "Type"
[7] "RawBarcode" "barcode" "CTgene"
[10] "CTnt" "CTaa" "CTstrict"
[13] "Frequency" "cloneType"
#可以发现导入了双链的相关信息,增加了克隆型的频率信息(Frequency),并且将频率划分了层次(cloneType)
##UMAP图展示cloneType的分布
seurat$cloneType <- factor(seurat$cloneType, levels = c("Hyperexpanded (100 < X <= 500)",
"Large (20 < X <= 100)", "Medium (5 < X <= 20)",
"Small (1 < X <= 5)", "Single (0 < X <= 1)", NA))
colorblind_vector <- colorRampPalette(c("#FF4B20", "#FFB433", "#C6FDEC", "#7AC5FF", "#0348A6"))
DimPlot(seurat, group.by = "cloneType") +
scale_color_manual(values = colorblind_vector(5), na.value="grey")
##UMAP图展示特定克隆型的分布
seurat <- highlightClonotypes(seurat, cloneCall= "aa",
sequence = c("CAVNGGSQGNLIF_CSAEREDTDTQYF", "NA_CATSATLRVVAEKLFF"))
DimPlot(seurat, group.by = "highlight")
##桑基图展示特定克隆型的来源
alluvialClonotypes(seurat, cloneCall = "aa",
y.axes = c("Patient", "cluster", "Type"),
color = "CAVNGGSQGNLIF_CSAEREDTDTQYF") +
scale_fill_manual(values = c("grey", colorblind_vector(1)))
##桑基图展示克隆型在病例-cluster-组织类型之间的关系
alluvialClonotypes(seurat, cloneCall = "aa",
y.axes = c("Patient", "cluster", "Type"),
color = "cluster")
##按cluster分析克隆型
combined2 <- expression2List(seurat, group = "cluster")
p1 = clonalDiversity(combined2, cloneCall = "aa") + ggtitle("clonalDiversity")
p2 = clonalHomeostasis(combined2, cloneCall = "aa") + ggtitle("clonalHomeostasis")
p3 = clonalProportion(combined2, cloneCall = "aa") + ggtitle("clonalProportion")
p4 = clonalOverlap(combined2, cloneCall="aa", method="overlap") + ggtitle("clonalOverlap")
plotc = (p2|p3)/(p1|p4)
ggsave('VDJ/clonal.png', plotc, width = 10, height = 8)
本教程的目的在于把单细胞分析流程串起来,适合有一定R语言基础的朋友参考。如果您学习本教程有一定困难,可以分享此篇文章到朋友圈,截图微信发给Kinesin(文末二维码),我会抽时间组群直播讲解单细胞数据分析的全过程。本专题所用的软件、数据、代码脚本和中间结果,我打包放在了百度云上,需要的朋友添加Kinesin微信索取。