其他
单细胞转录组基础分析四:细胞类型鉴定
##以下方法三选一,建议第一种
#默认wilcox方法
diff.wilcox = FindAllMarkers(scRNA)
all.markers = diff.wilcox %>% select(gene, everything()) %>% subset(p_val<0.05)
top10 = all.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
write.csv(all.markers, "cell_identify/diff_genes_wilcox.csv", row.names = F)
write.csv(top10, "cell_identify/top10_diff_genes_wilcox.csv", row.names = F)
#专为单细胞设计的MAST
diff.mast = FindAllMarkers(scRNA, test.use = 'MAST')
all.markers = diff.mast %>% select(gene, everything()) %>% subset(p_val<0.05)
top10 = all.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
write.csv(all.markers, "cell_identify/diff_genes_mast.csv", row.names = F)
write.csv(top10, "cell_identify/top10_diff_genes_mast.csv", row.names = F)
#bulkRNA经典方法DESeq2
diff.deseq2 = FindAllMarkers(scRNA, test.use = 'DESeq2', slot = 'counts')
all.markers = diff.deseq2 %>% select(gene, everything()) %>% subset(p_val<0.05)
top10 = all.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
write.csv(all.markers, "cell_identify/diff_genes_deseq2.csv", row.names = F)
write.csv(top10, "cell_identify/top10_diff_genes_deseq2.csv", row.names = F)
##top10基因绘制热图
top10_genes <- read.csv("cell_identify/top10_diff_genes_wilcox.csv")
top10_genes = CaseMatch(search = as.vector(top10_genes$gene), match = rownames(scRNA))
plot1 = DoHeatmap(scRNA, features = top10_genes, group.by = "seurat_clusters", group.bar = T, size = 4)
ggsave("cell_identify/top10_markers.pdf", plot=plot1, width=8, height=6)
ggsave("cell_identify/top10_markers.png", plot=plot1, width=8, height=6)
CellMarker:http://biocc.hrbmu.edu.cn/CellMarker/index.jsp
PanglaoDB:https://panglaodb.se/index.html
#挑选部分基因
select_genes <- c('LYZ','CD79A','CD8A','CD8B','GZMB','FCGR3A')
#vlnplot展示
p1 <- VlnPlot(scRNA, features = select_genes, pt.size=0, group.by="celltype", ncol=2)
ggsave("cell_identify/selectgenes_VlnPlot.png", p1, width=6 ,height=8)
#featureplot展示
p2 <- FeaturePlot(scRNA, features = select_genes, reduction = "tsne", label=T, ncol=2)
ggsave("cell_identify/selectgenes_FeaturePlot.png", p2, width=8 ,height=12)
p3=p1|p2
ggsave("cell_identify/selectgenes.png", p3, width=10 ,height=8)
library(SingleR)
refdata <- HumanPrimaryCellAtlasData()
testdata <- GetAssayData(scRNA, slot="data")
clusters <- scRNA@meta.data$seurat_clusters
cellpred <- SingleR(test = testdata, ref = refdata, labels = refdata$label.main,
method = "cluster", clusters = clusters,
assay.type.test = "logcounts", assay.type.ref = "logcounts")
celltype = data.frame(ClusterID=rownames(cellpred), celltype=cellpred$labels, stringsAsFactors = F)
write.csv(celltype,"cell_identify/celltype_singleR.csv",row.names = F)
scRNA@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
scRNA@meta.data[which(scRNA@meta.data$seurat_clusters == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
p1 = DimPlot(scRNA, group.by="celltype", label=T, label.size=5, reduction='tsne')
p2 = DimPlot(scRNA, group.by="celltype", label=T, label.size=5, reduction='umap')
p3 = plotc <- p1+p2+ plot_layout(guides = 'collect')
ggsave("cell_identify/tSNE_celltype.pdf", p1, width=7 ,height=6)
ggsave("cell_identify/UMAP_celltype.pdf", p2, width=7 ,height=6)
ggsave("cell_identify/celltype.pdf", p3, width=10 ,height=5)
ggsave("cell_identify/celltype.png", p3, width=10 ,height=5)
获取帮助
本教程的目的在于把常用的单细胞分析流程串起来,适合有一定R语言基础的朋友参考。分析原理和代码我没有详细解释,官网有详细的教程和权威的解释,翻译好的中文教程也有多个版本,有兴趣的可以搜索一下。如果您学习本教程有一定困难,可以分享此篇文章到朋友圈,截图微信发给Kinesin(文末二维码),我会抽时间组群直播讲解单细胞数据分析的全过程。本专题所用的软件、数据、代码脚本和中间结果,我打包放在了百度云上,需要的朋友添加Kinesin微信索取。