ProjecTILs系列教程（一）：MC38 TILs克隆型分析

生信菜鸟团 2022-08-20

The following article is from 生信宝库 Author Immugent

说在前面

Immugent在上一期推文中：ProjecTILs：一站式解决肿瘤和感染模型中T细胞的注释，大致介绍了ProjecTILs的背景知识和功能，从本期开始将通过实操对ProjecTILs的分析技巧进行演示。

在这期推文中，Immugent将通过代码演示建立一个整合scRNA-seq分析工作的流程，来解读MC38小鼠结肠癌模型中肿瘤浸润T细胞的转录和克隆分型情况。

要想复现下面代码需要安装下列三个R包：Seurat，ProjecTILs，scRepertoire。

代码实现

首先是需要准备输入的单细胞转录组数据，这里用的是示例数据，大家也可以直接用自己的数据。

library(gridExtra)
library(ggplot2)
library(plotly)
library(ProjecTILs)
library(scRepertoire)

files <- c("E-MTAB-7919.processed.1.zip", "E-MTAB-7919.processed.2.zip", "E-MTAB-7919.processed.3.zip")
matrix_dir <- "./input/Xiong_TIL/matrix"
system(sprintf("mkdir -p %s", matrix_dir))

for (i in 1:length(files)) {
    data_path <- sprintf("https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-7919/%s",
        files[i])
    system(sprintf("curl %s --output %s/data.zip", data_path, matrix_dir))
    system(sprintf("unzip -o %s/data.zip -d %s", matrix_dir, matrix_dir))
}
system(sprintf("rm %s/data.zip", matrix_dir))

projectID <- "Xiong_TIL"
libIDtoSampleID <- c("Mouse 1", "Mouse 2", "Mouse 3", "Mouse 4")
names(libIDtoSampleID) <- 4:7

exp_mat <- Read10X(matrix_dir)
querydata <- CreateSeuratObject(counts = exp_mat, project = projectID, min.cells = 3,
    min.features = 50)
querydata$Sample <- substring(colnames(querydata), 18)
table(querydata$Sample)

querydata$SampleLabel <- factor(querydata$Sample, levels = c(4:7), labels = libIDtoSampleID)
table(querydata$SampleLabel)
#Mouse 1 Mouse 2 Mouse 3 Mouse 4 
#1759    1746    1291    1386

接下来还需要相应的TCR数据。

data_path <- "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-7918/E-MTAB-7918.processed.1.zip"
tcr_dir <- "./input/Xiong_TIL/TCR"

system(sprintf("mkdir -p %s", tcr_dir))
system(sprintf("curl %s --output %s/data.zip", data_path, tcr_dir))
system(sprintf("unzip -o %s/data.zip -d %s", tcr_dir, tcr_dir))

libIDtoSampleID_VDJ <- 4:7
names(libIDtoSampleID_VDJ) <- 35:38

vdj.list <- list()
for (i in 1:length(libIDtoSampleID_VDJ)) {
    s <- names(libIDtoSampleID_VDJ)[i]
    vdj.list[[i]] <- read.csv(sprintf("%s/filtered_contig_annotations_%s.csv", tcr_dir,
        s), as.is = T)

    # Rename barcodes to match scRNA-seq suffixes
    vdj.list[[i]]$barcode <- sub("\\d$", "", vdj.list[[i]]$barcode)
    vdj.list[[i]]$barcode <- paste0(vdj.list[[i]]$barcode, libIDtoSampleID_VDJ[i])
    vdj.list[[i]]$raw_clonotype_id <- paste0(vdj.list[[i]]$raw_clonotype_id, "-",
        libIDtoSampleID_VDJ[i])
    vdj.list[[i]]$SampleLabel <- libIDtoSampleID_VDJ[i]

}

# Using parameters removeNA=T and removeMulti=T will remove cells with multiple
# a-b combinations
combined <- combineTCR(vdj.list, samples = libIDtoSampleID_VDJ, ID = names(libIDtoSampleID_VDJ),
    cells = "T-AB", removeNA = T, removeMulti = T)

for (i in seq_along(combined)) {
    combined[[i]] <- stripBarcode(combined[[i]], column = 1, connector = "_", num_connects = 3)
}

querydata <- combineExpression(combined, querydata, cloneCall = "gene", groupBy = "none")

数据准备完成后，下面就开始运行ProjecTILs。

ref <- load.reference.map()
query.projected <- make.projection(querydata, ref = ref, ncores = 2)

p1 <- plot.projection(ref)
p2 <- plot.projection(ref, query.projected, linesize = 0.5, pointsize = 0.5)
grid.arrange(p1, p2, ncol = 2)

plots <- list()

sample_names <- unique(query.projected$SampleLabel)
for (sample_i in seq_along(sample_names)) {
    sample <- sample_names[sample_i]
    plots[[sample_i]] <- plot.projection(ref, query.projected[, query.projected$SampleLabel ==
        sample]) + ggtitle(sample)
}

grid.arrange(grobs = plots, ncol = 2)

query.projected <- cellstate.predict(ref = ref, query = query.projected)
table(query.projected$functional.cluster)  #Cell state assignment is stored in the 'functional.cluster' metadata field
plot.statepred.composition(ref, query.projected, metric = "Percent")
plot.states.radar(ref = ref, query.projected, min.cells = 30)

levs <- c("Hyperexpanded (100 < X <= 500)", "Large (20 < X <= 100)", "Medium (5 < X <= 20)",
    "Small (1 < X <= 5)", "Single (0 < X <= 1)", NA)
palette <- colorRampPalette(c("#FF4B20", "#FFB433", "#C6FDEC", "#7AC5FF", "#0348A6"))

query.projected$cloneType <- factor(query.projected$cloneType, levels = levs)
DimPlot(query.projected, group.by = "cloneType") + scale_color_manual(values = c(palette(5)),
    na.value = "grey")

clone_call = "CTaa"  #select column/variable to use as clonotypes ID, in this case CTaa, the paired TCR CDR3 aminoacid sequences
cutoff <- 35  #Min cells for clonotype

clonotypeSizes <- sort(table(query.projected[[clone_call]])[table(query.projected[[clone_call]]) >
    cutoff], decreasing = T)

bigClonotypes <- names(clonotypeSizes)

plots <- list()

for (i in 1:length(bigClonotypes)) {
    ctype <- bigClonotypes[i]

    plots[[i]] <- plot.projection(ref, query.projected[, which(query.projected[[clone_call]] ==
        ctype)]) + ggtitle(sprintf("%s - size %i", ctype, clonotypeSizes[ctype]))
}

grid.arrange(grobs = plots, ncol = 2)

meta <- melt(table(query.projected@meta.data[!is.na(query.projected@meta.data$Frequency),
    c("functional.cluster", "cloneType")]), varnames = c("functional.cluster", "cloneType"))
ggplot(data = meta, aes(x = functional.cluster, y = value, fill = cloneType)) + geom_bar(stat = "identity") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + scale_fill_manual(values = c(palette(5)),
    na.value = "grey")

meta.list <- expression2List(query.projected, group = "functional.cluster")
clonalOverlap(meta.list, cloneCall = "gene", method = "morisita") + theme(axis.text.x = element_text(angle = 90,
    hjust = 1, vjust = 0.5))

compareClonotypes(meta.list, numbers = 6, samples = c("CD8_EffectorMemory", "CD8_Tpex",
    "CD8_Tex", "CD8_NaiveLike", "CD8_EarlyActiv"), cloneCall = "aa", graph = "alluvial") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

clone_call = "CTaa"  #select column/variable to use as clonotypes ID, in this case CTaa, the paired TCR CDR3 aminoacid sequences
cutoff <- 35  #Min cells for clonotype

clonotypeSizes <- sort(table(query.projected[[clone_call]])[table(query.projected[[clone_call]]) >
    cutoff], decreasing = T)

bigClonotypes <- names(clonotypeSizes)

plots <- list()

for (i in 1:length(bigClonotypes)) {
    ctype <- bigClonotypes[i]

    plots[[i]] <- plot.projection(ref, query.projected[, which(query.projected[[clone_call]] ==
        ctype)]) + ggtitle(sprintf("%s - size %i", ctype, clonotypeSizes[ctype]))
}

grid.arrange(grobs = plots, ncol = 2)

小结

通过这个示例数据（MC38 TILs）在参考图谱上的投射显示，这些肿瘤浸润的T细胞主要由耗竭的(CD8_Tex)、耗竭前体的CD8 T细胞(CD8_Tpex)和典型的高免疫原性（热肿瘤）的Tregs组成。此外，大多数扩展克隆被发现跨越CD8_Tex和CD8_Tpex状态。这是预期的，因为在肿瘤中持续的抗原刺激驱动免疫主导的肿瘤反应性克隆走向衰竭分化路径。在TIL状态注释参考图谱的背景下，结合投射物和scRepertoire简化了单细胞表达数据和克隆型分析的联合分析。

好啦，ProjecTILs第一期的实操部分到这就结束啦，欢迎大家继续关注！

又忘关摄像头了！村官和妇女主任激情戏再度上演……

注意！2024心理咨询培训已开启，大专及以上学历均可参与！

央视热播剧《我的阿勒泰》爆火，马伊琍一句话惊醒千万父母：妈妈什么性格，孩子什么命！

西安8家物业服务企业违法违规被处罚！

复旦陈果：孤独是一个人的狂欢，在你寂寞时请关注这些公众号充实自己

ProjecTILs系列教程（一）：MC38 TILs克隆型分析

说在前面

代码实现

小结

您可能也对以下帖子感兴趣

又忘关摄像头了！村官和妇女主任激情戏再度上演……

注意！2024心理咨询培训已开启，大专及以上学历均可参与！

央视热播剧《我的阿勒泰》爆火，马伊琍一句话惊醒千万父母：妈妈什么性格，孩子什么命！

西安8家物业服务企业违法违规被处罚！

复旦陈果：孤独是一个人的狂欢，在你寂寞时请关注这些公众号充实自己

生成图片，分享到微信朋友圈

ProjecTILs系列教程（一）：MC38 TILs克隆型分析

说在前面

代码实现

小结

您可能也对以下帖子感兴趣