祖传的单个10x样本的seurat标准代码（人和鼠需要区别对待）

Original 生信技能树生信技能树 2022-06-07

昨天发布了：祖传的单个10x样本的seurat标准代码，非常受大家欢迎，我看到有人马上用起来了我的代码，很棒。在10X官网演示数据：https://support.10xgenomics.com/single-cell-gene-expression/datasets 随便下载一个，就可以绘制出来一大堆图表。

我的代码里面只需要修改你下载的这3个文件的文件夹地址就可以运行下去啦。不过，值得强调的是，昨天发布的是基于小鼠的数据分析代码，主要是基因名字大小写问题，以及后续实验singleR进行细胞类型注释的时候，选择的源不一样。

如果你需要实战，也可以随便找一篇使用了10x单细胞转录组的文献数据，比如：https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE135927 需要注意的是，每个样本都是独立的3个文件，然后存放在一个文件夹里面，使用我的代码的时候，需要一个个指定好文件夹路径即可。

上面截图的就是小鼠数据，scRNASeq of splenic CD3+ cells from immunodeficient mice，使用我昨天发布了：祖传的单个10x样本的seurat标准代码即可。

不过，目前做人类数据分析的更多，所以我还是得公布一个基于人类的10x单细胞数据分析代码，而且这个时候加上一个merge吧，毕竟现在很少有人还搞单个样本的10x数据分析。

第一步是merge多个10x数据

rm(list=ls())options(stringsAsFactors = F)library(Seurat)

# 分别读取每个10x样本的结果文件夹if(F){ samples=list.files('outputs/') samples library(Seurat) sceList = lapply(samples,function(pro){ folder=file.path('outputs/',pro,'filtered_feature_bc_matrix') CreateSeuratObject(counts = Read10X(folder), project = pro ) }) sce.big <- merge(sceList[[1]], y = c(sceList[[2]],sceList[[3]],sceList[[4]],sceList[[5]], sceList[[6]],sceList[[7]],sceList[[8]], sceList[[9]],sceList[[10]],sceList[[11]],sceList[[12]]), add.cell.ids = samples, project = "ls_12") sce.big table(sce.big$orig.ident) save(sce.big,file = 'sce.big.merge.ls_12.Rdata') }

第二步是走简单的聚类分群流程

rm(list=ls())options(stringsAsFactors = F)library(Seurat)## 合并成为一个R对象文件

load(file = 'sce.big.merge.ls_12.Rdata')raw_sce=sce.big

raw_scerownames(raw_sce)[grepl('^mt-',rownames(raw_sce),ignore.case = T)]rownames(raw_sce)[grepl('^Rp[sl]',rownames(raw_sce),ignore.case = T)]

raw_sce[["percent.mt"]] <- PercentageFeatureSet(raw_sce, pattern = "^MT-")fivenum(raw_sce[["percent.mt"]][,1])rb.genes <- rownames(raw_sce)[grep("^RP[SL]",rownames(raw_sce))]C<-GetAssayData(object = raw_sce, slot = "counts")percent.ribo <- Matrix::colSums(C[rb.genes,])/Matrix::colSums(C)*100raw_sce <- AddMetaData(raw_sce, percent.ribo, col.name = "percent.ribo")

plot1 <- FeatureScatter(raw_sce, feature1 = "nCount_RNA", feature2 = "percent.mt")plot2 <- FeatureScatter(raw_sce, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")CombinePlots(plots = list(plot1, plot2))

VlnPlot(raw_sce, features = c("percent.ribo", "percent.mt"), ncol = 2)VlnPlot(raw_sce, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)VlnPlot(raw_sce, features = c("percent.ribo", "nCount_RNA"), ncol = 2)raw_sce

pro='merge'raw_sce1 <- subset(raw_sce, subset = nFeature_RNA > 200 & nCount_RNA > 1000 & percent.mt < 20)raw_sce1

sce=raw_sce1scesce <- NormalizeData(sce, normalization.method = "LogNormalize", scale.factor = 10000)GetAssay(sce,assay = "RNA")sce <- FindVariableFeatures(sce, selection.method = "vst", nfeatures = 2000) # 步骤 ScaleData 的耗时取决于电脑系统配置（保守估计大于一分钟）sce <- ScaleData(sce) sce <- RunPCA(object = sce, pc.genes = VariableFeatures(sce)) DimHeatmap(sce, dims = 1:12, cells = 100, balanced = TRUE)ElbowPlot(sce)sce <- FindNeighbors(sce, dims = 1:15)sce <- FindClusters(sce, resolution = 0.2)table(sce@meta.data$RNA_snn_res.0.2)

set.seed(123)sce <- RunTSNE(object = sce, dims = 1:15, do.fast = TRUE)DimPlot(sce,reduction = "tsne",label=T)table(sce@meta.data$seurat_clusters,sce@meta.data$orig.ident)

phe=data.frame(cell=rownames(sce@meta.data), cluster =sce@meta.data$seurat_clusters)head(phe)table(phe$cluster)tsne_pos=Embeddings(sce,'tsne')DimPlot(sce,reduction = "tsne",label=T,split.by ='orig.ident')

head(phe)table(phe$cluster)head(tsne_pos) dat=cbind(tsne_pos,phe)save(dat,file=paste0(pro,'_for_tSNE.pos.Rdata')) load(file=paste0(pro,'_for_tSNE.pos.Rdata')) library(ggplot2)p=ggplot(dat,aes(x=tSNE_1,y=tSNE_2,color=cluster))+geom_point(size=0.95)p=p+stat_ellipse(data=dat,aes(x=tSNE_1,y=tSNE_2,fill=cluster,color=cluster), geom = "polygon",alpha=0.2,level=0.9,type="t",linetype = 2,show.legend = F)+coord_fixed()print(p) theme= theme(panel.grid =element_blank()) + ## 删去网格 theme(panel.border = element_blank(),panel.background = element_blank()) + ## 删去外层边框 theme(axis.line = element_line(size=1, colour = "black")) p=p+theme+guides(colour = guide_legend(override.aes = list(size=5)))print(p)ggplot2::ggsave(filename = paste0(pro,'_tsne_res0.2.pdf'))

plot1 <- FeatureScatter(sce, feature1 = "nCount_RNA", feature2 = "percent.mt")plot2 <- FeatureScatter(sce, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")CombinePlots(plots = list(plot1, plot2))ggplot2::ggsave(filename = paste0(pro,'_CombinePlots.pdf'))

VlnPlot(sce, features = c("percent.ribo", "percent.mt"), ncol = 2)ggplot2::ggsave(filename = paste0(pro,'_mt-and-ribo.pdf'))VlnPlot(sce, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)ggplot2::ggsave(filename = paste0(pro,'_counts-and-feature.pdf'))VlnPlot(sce, features = c("percent.ribo", "nCount_RNA"), ncol = 2)

table(sce@meta.data$seurat_clusters)

for( i in unique(sce@meta.data$seurat_clusters) ){ markers_df <- FindMarkers(object = sce, ident.1 = i, min.pct = 0.25) print(x = head(markers_df)) markers_genes = rownames(head(x = markers_df, n = 5)) VlnPlot(object = sce, features =markers_genes,log =T ) ggsave(filename=paste0(pro,'_VlnPlot_subcluster_',i,'_sce.markers_heatmap.pdf')) FeaturePlot(object = sce, features=markers_genes ) ggsave(filename=paste0(pro,'_FeaturePlot_subcluster_',i,'_sce.markers_heatmap.pdf'))}

sce.markers <- FindAllMarkers(object = sce, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)

DT::datatable(sce.markers)write.csv(sce.markers,file=paste0(pro,'_sce.markers.csv'))library(dplyr) top10 <- sce.markers %>% group_by(cluster) %>% top_n(10, avg_logFC)DoHeatmap(sce,top10$gene,size=3)ggsave(filename=paste0(pro,'_sce.markers_heatmap.pdf'))

save(sce,sce.markers,file = 'sce.output.merge.ls_12.Rdata')

上面代码就这里面包含了我一直强调的单细胞基础10讲相关内容：

最后是细胞亚群的自动化注释

因为SingleR里面关于人类的数据库文件要比小鼠多，我就随便给一个代码哈，大家需要自行去理解SingleR相关文档。

rm(list=ls())options(stringsAsFactors = F)library(Seurat)

library(SingleR)hpca.se <- HumanPrimaryCellAtlasData()hpca.se

load(file = 'sce.output.merge.ls_12.Rdata')phe=sce@meta.datasce_for_SingleR <- GetAssayData(sce, slot="data")sce_for_SingleRlibrary(SingleR)hpca.se <- HumanPrimaryCellAtlasData()hpca.sepred.hesc <- SingleR(test = sce_for_SingleR, ref = hpca.se, labels = hpca.se$label.main)table(pred.hesc$labels)table(pred.hesc$labels,phe$seurat_clusters)plotScoreHeatmap(pred.hesc)

#Plot the Datalibrary(ggplot2)library(reshape2)tb=table(pred.hesc$labels,phe$seurat_clusters)tp= apply(tp,2,function(x) x/sum(x))colSums(tp)df=as.data.frame(melt(tp))head(df)df$Var2=as.factor(df$Var2)g <- ggplot(df, aes(Var2, Var1)) + geom_point(aes(size = value), colour = "green") + theme_bw() + xlab("") + ylab("") gtable(phe$seurat_clusters)ggsave(filename = 'first_singleR_match_seurat0.2_cluster.pdf')

plotScoreHeatmap(pred.hesc)tmp=as.data.frame(table(pred.hesc$labels))write.table(tmp[order(tmp$Freq,decreasing = T),], file = 'main_subtype.txt',row.names = F,quote = F)

save(pred.hesc,file = 'first_singleR_subtype_results.Rdata')

基本上涵盖了我在《单细胞天地》这个公众号的两个视频课程全部内容了：

全网第一个单细胞课程（免费基础课程）

免费学习地址在B站：https://www.bilibili.com/video/av38741055 ，欢迎提问弹幕交流！
务必听课后完成结业考核20题：https://mp.weixin.qq.com/s/lpoHhZqi-_ASUaIfpnX96w
课程配套资料文档在：https://docs.qq.com/doc/DT2NwV0Fab3JBRUx0

技能树出品的第二个单细胞课程（进阶课程，仍然免费）

详情请自行阅读介绍 https://mp.weixin.qq.com/s/bLfO-8ri_SNUepGs4UwRQw
本课程长期答疑文档，https://docs.qq.com/doc/DT0FxbEpHYU5ZVlpu

因为课程涉及到知识点太多，所以我拆分成为了5个子课程，欢迎B站提问弹幕交流！全部链接是：

「生信技能树」单细胞进阶数据处理之文献导读，链接是：https://www.bilibili.com/video/BV17f4y1R7N8
「生信技能树」使用10X单细胞转录组数据探索免疫治疗，链接是：https://www.bilibili.com/video/BV1xD4y1S74P
「生信技能树」单细胞基因组数据拷贝数变异分析流程，链接是：https://www.bilibili.com/video/BV1Yf4y1R75R
「生信技能树」云服务器处理单细胞转录组数据，链接是：https://www.bilibili.com/video/BV154411Z7DU
「生信技能树」使用Smart-seq2单细胞转录组数据探索小鼠性腺发育，链接是：https://www.bilibili.com/video/BV1454y1q77Z

也希望可以帮助到你。

如果你需要的是CNS文章全部图表，发表级别的代码，那就必须看CNS文章自带的GitHub哈，比如：你要的rmarkdown文献图表复现全套代码来了（单细胞）

大摩宏观策略谈：2025中美变局展望

穿了跟没穿一样，胸型赞到爆！天然乳胶，性感到让男人腿软！

高三女生醉酒后被强奸致死？检方回应

高三女生醉酒后被强奸致死？检方回应

那些内心强大的孩子，童年被允许做过这1件事

祖传的单个10x样本的seurat标准代码（人和鼠需要区别对待）

第一步是merge多个10x数据

第二步是走简单的聚类分群流程

最后是细胞亚群的自动化注释

全网第一个单细胞课程（免费基础课程）

技能树出品的第二个单细胞课程（进阶课程，仍然免费）

您可能也对以下帖子感兴趣

大摩宏观策略谈：2025中美变局展望

穿了跟没穿一样，胸型赞到爆！天然乳胶，性感到让男人腿软！

高三女生醉酒后被强奸致死？检方回应

高三女生醉酒后被强奸致死？检方回应

那些内心强大的孩子，童年被允许做过这1件事

生成图片，分享到微信朋友圈

祖传的单个10x样本的seurat标准代码（人和鼠需要区别对待）

第一步是merge多个10x数据

第二步是走简单的聚类分群流程

最后是细胞亚群的自动化注释

全网第一个单细胞课程（免费基础课程）

技能树出品的第二个单细胞课程（进阶课程，仍然免费）

您可能也对以下帖子感兴趣