其他
单细胞RNA-seq揭示TNBC的异质性(图表复现04)
后起之秀奔涌而至,欢迎大家在《生信技能树》的舞台分享自己的心得体会!(文末有惊喜)
前面的单细胞RNA-seq揭示TNBC的异质性(图表复现03)教程里面我们一起复现了文章“ Unravelling subclonal heterogeneity and aggressive disease states in TNBC through single-cell RNA-seq ” 的Figure 2 ,这周继续来复现一下Figure 3的相关内容
复现图表
Fig4a
代码重复
1.操作准备流程
## 70个基因预后特征(PS), 49个基因转移负担特征(MBS), 354个基因残留肿瘤特征(RTS)
mammaprint_long <- read.table("mammaprint_sig_new.txt", header = TRUE, sep = "\t")
mammaprint <- apply(mammaprint_long, 2, function(x){return(intersect(x, rownames(mat_ct)))})[,1]
mammaprint_avg_exprs <- apply(mat_ct[match(mammaprint, rownames(mat_ct)),], 2, mean)
all.equal(names(mammaprint_avg_exprs), colnames(mat_ct))
mammaprint_avg_exprs <- mammaprint_avg_exprs[which(pd_ct$cell_types_cl_all == "epithelial")]
all.equal(colnames(HSMM_allepith_clustering), names(mammaprint_avg_exprs))
pData(HSMM_allepith_clustering)$mammaprint_avg_exprs <- mammaprint_avg_exprs
zenawerb_long <- read.table("werb_49_metastasis_sig.txt", header = TRUE, sep = "\t")
zenawerb <- apply(zenawerb_long, 2, function(x){return(intersect(x, rownames(mat_ct)))})[,1]
zenawerb_avg_exprs <- apply(mat_ct[match(zenawerb, rownames(mat_ct)),], 2, mean)
all.equal(names(zenawerb_avg_exprs), colnames(mat_ct))
zenawerb_avg_exprs <- zenawerb_avg_exprs[which(pd_ct$cell_types_cl_all == "epithelial")]
all.equal(colnames(HSMM_allepith_clustering), names(zenawerb_avg_exprs))
pData(HSMM_allepith_clustering)$zenawerb_avg_exprs <- zenawerb_avg_exprs
artega_long <- read.table("artega_sig.txt", header = TRUE, sep = "\t")
artega <- apply(artega_long, 2, function(x){return(intersect(x, rownames(mat_ct)))})[,1]
artega_avg_exprs <- apply(mat_ct[match(artega, rownames(mat_ct)),], 2, mean)
all.equal(names(artega_avg_exprs), colnames(mat_ct))
artega_avg_exprs <- artega_avg_exprs[which(pd_ct$cell_types_cl_all == "epithelial")]
all.equal(colnames(HSMM_allepith_clustering), names(artega_avg_exprs))
pData(HSMM_allepith_clustering)$artega_avg_exprs <- artega_avg_exprs
2.可视化输入文件准备
#预后特征热图
prognosis_sig <- cbind(mammaprint_avg_exprs, zenawerb_avg_exprs, artega_avg_exprs)
colnames(prognosis_sig) <- c("PS", "MBS", "RTS")
#热图输入文件准备
prognosis_epith_pat <- list()
for (i in 1:length(patients_now)) {
prognosis_epith_pat[[i]] <- t(prognosis_sig)[,which(pd_ct_epith$patient == patients_now[i])]
}
names(prognosis_epith_pat) <- patients_now
for (i in 1:length(patients_now)) {
print(all.equal(colnames(prognosis_epith_pat[[1]]), rownames(pds_epith_ct[[1]])))
print(all.equal(names(clusterings_sep_allepith[[1]]), colnames(prognosis_epith_pat[[1]])))
}
3.可视化
#热图可视化
ht_sep_prognosis <-
Heatmap(prognosis_epith_pat[[1]],
cluster_rows = FALSE,
col = colorRamp2(c(-0.2, 0.2, 1), c("blue","white", "red")),
show_column_names = FALSE,
column_title = patients_now[1],
top_annotation = ha_lehman_epith_pat[[1]],
column_title_gp = gpar(fontsize = 12),
show_row_names = FALSE,
name = patients_now[1],
show_heatmap_legend = FALSE,
heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
Heatmap(prognosis_epith_pat[[2]],
col = colorRamp2(c(-0.2, 0.2, 1), c("blue","white", "red")),
cluster_rows = FALSE,
show_column_names = FALSE,
column_title = patients_now[2],
column_title_gp = gpar(fontsize = 12),
top_annotation = ha_lehman_epith_pat[[2]],
name = patients_now[2],
show_row_names = FALSE,
show_heatmap_legend = FALSE,
heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
Heatmap(prognosis_epith_pat[[3]],
col = colorRamp2(c(-0.2, 0.2, 1), c("blue","white", "red")),
cluster_rows = FALSE,
show_column_names = FALSE,
column_title = patients_now[3],
column_title_gp = gpar(fontsize = 12),
top_annotation = ha_lehman_epith_pat[[3]],
name = patients_now[3],
show_row_names = FALSE,
show_heatmap_legend = FALSE,
heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
Heatmap(prognosis_epith_pat[[4]],
col = colorRamp2(c(-0.2, 0.2, 1), c("blue","white", "red")),
cluster_rows = FALSE,
show_column_names = FALSE,
column_title = patients_now[4],
column_title_gp = gpar(fontsize = 12),
top_annotation = ha_lehman_epith_pat[[4]],
name = patients_now[4],
show_row_names = FALSE,
show_heatmap_legend = FALSE,
heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9))) +
Heatmap(prognosis_epith_pat[[5]],
col = colorRamp2(c(-0.2, 0.2, 1), c("blue","white", "red")),
cluster_rows = FALSE,
show_column_names = FALSE,
column_title = patients_now[5],
column_title_gp = gpar(fontsize = 12),
top_annotation = ha_lehman_epith_pat[[5]],
name = patients_now[5],
show_row_names = FALSE,
show_heatmap_legend = FALSE,
heatmap_legend_param = list(title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9)))+
Heatmap(prognosis_epith_pat[[6]],
col = colorRamp2(c(-0.2, 0.2, 1), c("blue","white", "red")),
cluster_rows = FALSE,
row_names_side = "right",
column_title = patients_now[6],
column_title_gp = gpar(fontsize = 12),
top_annotation = ha_lehman_epith_pat[[6]],
name = patients_now[6],
show_column_names = FALSE,
heatmap_legend_param = list(title = "Expression",title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9)))
#pdf("fig4a.pdf", onefile = FALSE, width = 20)
print(draw(ht_sep_prognosis, annotation_legend_side = "right"))
#dev.off()
图片展示
Fig4b
代码重复
1.操作准备流程
# 预后特征的小提琴排序图
clust_avg_prognosis <- matrix(NA, nrow = length(unique(HSMM_allepith_clustering$Cluster)), ncol = ncol(prognosis_sig))
rownames(clust_avg_prognosis) <- paste("clust", c(1:length(unique(HSMM_allepith_clustering$Cluster))), sep = "")
colnames(clust_avg_prognosis) <- colnames(prognosis_sig)
for (c in 1:length(unique(HSMM_allepith_clustering$Cluster))) {
clust_avg_prognosis[c,] <- apply(prognosis_sig[which(HSMM_allepith_clustering$Cluster == c),], 2, mean)}
prognosis_sig <- as.data.frame(prognosis_sig)
all.equal(rownames(prognosis_sig), colnames(HSMM_allepith_clustering))
prognosis_sig$Cluster <- as.numeric(HSMM_allepith_clustering$Cluster)
prognosis_melt <- melt(prognosis_sig, id.vars = c("Cluster"))
prognosis_melt$value <- as.numeric(prognosis_melt$value)
prognosis_melt <- prognosis_melt %>%
filter(Cluster %in% c(2,3,4))
prognosis_melt$Cluster<-as.factor(prognosis_melt$Cluster)
2.可视化操作
#pdf("fig4b.pdf", width = 9)
p <- ggplot(prognosis_melt, aes(Cluster, value, fill = Cluster)) +
scale_fill_manual(values = c("1" = "#ee204d", "2" = "#17806d", "3" = "#b2ec5d", "4" = "#cda4de", "5" = "#1974d2"))
p + geom_violin(adjust = .5) + facet_wrap(~variable) + stat_summary(fun.y = mean, geom = "point", shape = 22, size = 3)
#dev.off()
图片展示
Fig4d
代码重复
1.操作准备流程
# 通路热图
path1 <- c("ST3GAL4", "ST3GAL6", "ST8SIA1", "FUT1", "FUT2", "FUT3", "FUT4", "FUT6", "FUT9", "GLTP", "SPTLC1", "KDSR", "SPTLC2", "CERK", "ARSA")
idx_path1 <- match(path1, rownames(HSMM_allepith_clustering))
path2 <- c("CCL20", "CCL22", "CCL4", "CCR6", "IL11", "IL12RB1", "IL6R", "CSF2RA", "BMP7", "GLMN", "GPI", "INHBA", "TNF",
"TNFSF15", "GHR", "LEPR", "TLR1", "TLR2", "TLR5", "TLR7", "TLR10", "F11R")
idx_path2 <- match(path2, rownames(HSMM_allepith_clustering))
path3 <- c("ERBB2", "AKT1", "AKT3", "PIK3R3", "PIK3R4", "RPS6KB2", "TRIB3", "BTK", "GRB10", "GRB2", "ILK", "PAK1", "PRKCZ", "CSNK2A1",
"IRS1", "IRAK1", "MYD88", "MAP2K1", "MAPK8", "MAPK1", "PTPN11", "EIF4E", "EIF4EBP1", "EIF4G1", "FKBP1A", "PDK1", "RHEB", "RPS6KB1")
idx_path3 <- match(path3, rownames(HSMM_allepith_clustering))
all_idx_paths <- c(idx_path1, idx_path2)
names_path <- c(rep("glycosphigolipid metabolism", length(idx_path1)), rep("innate immunity", length(idx_path2)))
anno_cols_path <- c("glycosphigolipid metabolism" = "#ff9baa", "innate immunity" = "#17806d")
ha_path_rows <- HeatmapAnnotation(df = data.frame(pathway = names_path),
annotation_legend_param = list(pathway = list(ncol = 1, title = "pathway", title_position = "topcenter")),
which = "row", col = list("pathway" = anno_cols_path), annotation_width = unit(3, "mm"))
# 每个簇分开矩阵,只提取相关基因
mat_epith_allpats <- exprs(HSMM_allepith_clustering)
mats_epith_patient <- list()
pds_epith_patient <- list()
mats_epith_patient_clusters <- list()
for (i in 1:length(patients_now)) {
pds_epith_patient[[i]] <- pData(HSMM_allepith_clustering)[which(pData(HSMM_allepith_clustering)$patient == patients_now[i]), ]
mats_epith_patient[[i]] <- mat_epith_allpats[all_idx_paths, which(pData(HSMM_allepith_clustering)$patient == patients_now[i])]
mats_epith_patient_clusters[[i]] <- list()
for (c in 1:length(unique(HSMM_allepith_clustering$Cluster))) {
mats_epith_patient_clusters[[i]][[c]] <- mats_epith_patient[[i]][, which(pds_epith_patient[[i]]$Cluster == c)]
}
names(mats_epith_patient_clusters[[i]]) <- paste0("clust", c(1:5))
}
names(mats_epith_patient) <- patients_now
names(pds_epith_patient) <- patients_now
2.可视化
# 热图绘制
ht_paths <-
ha_path_rows +
Heatmap(mats_epith_patient_clusters[[1]][[2]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster2", column_title = "2",
show_row_names = FALSE,
show_column_dend = TRUE, show_column_names = FALSE,
row_names_gp = gpar(fontsize = 9, col = anno_cols_path),
split = names_path,
heatmap_legend_param = list(title = "Expression",title_gp = gpar(fontsize = 9), labels_gp = gpar(fontsize = 9)))+
Heatmap(mats_epith_patient_clusters[[1]][[3]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster3", column_title = "3",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[1]][[4]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster4", column_title = "4",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[2]][[2]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster2_2", column_title = "2",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[2]][[3]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster3_2", column_title = "3",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[2]][[4]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster4_2", column_title = "4",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[3]][[2]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster2_3", column_title = "2",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[3]][[4]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster4_3", column_title = "4",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[4]][[2]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster2_4", column_title = "2",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[4]][[3]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster3_4", column_title = "3",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[4]][[4]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster4_4", column_title = "4",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[4]][[5]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster5_4", column_title = "5",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[5]][[1]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster1_5", column_title = "1",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[5]][[2]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster2_5", column_title = "2",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[5]][[3]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster3_5", column_title = "3",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[5]][[5]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster5_5", column_title = "5",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[6]][[2]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster2_6", column_title = "2",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[6]][[3]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster3_6", column_title = "3",
show_row_names = FALSE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE) +
Heatmap(mats_epith_patient_clusters[[6]][[4]],
col = colorRamp2(c(-0.5, 1, 3), c("blue", "white", "red")),
cluster_rows = TRUE, show_row_dend = FALSE,
name = "cluster4_6", column_title = "4",
show_row_names = TRUE,show_heatmap_legend = FALSE,
show_column_dend = TRUE, show_column_names = FALSE)
#pdf("fig4d.pdf", onefile = FALSE, width = 25)
draw(ht_paths)
#dev.off()
图片展示
(完)
全部的代码和数据,都可以在我们《生信技能树》公众号后台回复“tnbc”获取,还是很有学习的价值。其实我更希望看到学徒跟着我的《CNS图表复现专栏》来整理这些代码;
CNS图表复现01—读入csv文件的表达矩阵构建Seurat对象 CNS图表复现02—Seurat标准流程之聚类分群 CNS图表复现03—单细胞区分免疫细胞和肿瘤细胞 CNS图表复现04—单细胞聚类分群的resolution参数问题 CNS图表复现05—免疫细胞亚群再分类 CNS图表复现06—根据CellMarker网站进行人工校验免疫细胞亚群 CNS图表复现07—原来这篇文章有两个单细胞表达矩阵 CNS图表复现08—肿瘤单细胞数据第一次分群通用规则 CNS图表复现09—上皮细胞可以区分为恶性与否 CNS图表复现10—表达矩阵是如何得到的 CNS图表复现11—RNA-seq数据可不只是表达量矩阵结果 CNS图表复现12—检查原文的细胞亚群的标记基因 CNS图表复现13—使用inferCNV来区分肿瘤细胞的恶性与否 CNS图表复现14—检查文献的inferCNV流程 CNS图表复现15—inferCNV流程输入数据差异大揭秘 CNS图表复现16—inferCNV结果解读及利用 CNS图表复现17—inferCNV结果解读及利用之进阶