单细胞亚群 Marker 基因热图重绘及均值展示

Original JunJunLab 老俊俊的生信笔记 2022-08-27

收录于合集 #单细胞笔记 21个

今日书

1前言

注意了注意了,QQ 交流群人数已满,请不要申请了,微信交流群还有位置!

2引言

seurat 自带的 DoHeatmap 绘制热图函数虽然能在大部分情况下满足我们的需要,但是经常我们会在一些细节的处理上可能就不太方便了,这时候需要提取数据自己绘图了。

还有一种展示的方式就是将亚群所有细胞的 marker 基因取均值进行展示,这样可以减少热图的大小及冗余信息。

今天分享一下如何提取数据自定义绘制热图及均值热图的绘制。

3测试数据上游基本处理

我们取前 5 个基因进行展示:

library(dplyr)
library(Seurat)
library(ggplot2)
library(patchwork)
library(ggsci)
library(circlize)
library(RColorBrewer)
library(ComplexHeatmap)

# Load the PBMC dataset
pbmc.data <- Read10X(data.dir = "./hg19/")

# Initialize the Seurat object with the raw (non-normalized data).
pbmc <- CreateSeuratObject(counts = pbmc.data,
                           project = "pbmc3k",
                           min.cells = 3,
                           min.features = 200)
pbmc
# An object of class Seurat
# 13714 features across 2700 samples within 1 assay
# Active assay: RNA (13714 features, 0 variable features)

# calculate mitocondro genes percent
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")

# filter
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)

# standard process
pbmc <- NormalizeData(pbmc) %>%
  FindVariableFeatures(selection.method = "vst", nfeatures = 2000)

pbmc <- pbmc %>%
  ScaleData(features = rownames(pbmc)) %>%
  RunPCA(features = VariableFeatures(object = pbmc)) %>%
  FindNeighbors(dims = 1:10) %>%
  FindClusters(resolution = 0.5)

# find markers for every cluster compared to all remaining cells
# report only the positive ones
pbmc.markers <- FindAllMarkers(pbmc,
                               only.pos = TRUE,
                               min.pct = 0.25,
                               logfc.threshold = 0.25)

# get top 10 genes
top5pbmc.markers <- pbmc.markers %>%
  group_by(cluster) %>%
  top_n(n = 5, wt = avg_log2FC)

DoHeatmap 展示:

# get color
col <- pal_npg()(9)

# plot
DoHeatmap(pbmc,features = top5pbmc.markers$gene,
          group.colors = col) +
  scale_colour_npg() +
  scale_fill_gradient2(low = '#0099CC',mid = 'white',high = '#CC0033',
                       name = 'Z-score')

稍微调整一下还是挺好看的。

4提取数据

FetchData 提取基因表达量:

###########################################

# get all cell gene expression
all_cell_exp <- FetchData(pbmc,vars = top5pbmc.markers$gene,
                          slot = 'data') %>%
  t()

# get meta data
metadata <- pbmc@meta.data %>% arrange(seurat_clusters)
metadata$cluster_name <- paste('clsuter ',metadata$seurat_clusters,sep = '')

# order cells
all_cell_exp <- all_cell_exp[,rownames(metadata)]

# Z-score
allht <- t(scale(t(all_cell_exp),scale = T,center = T))

# check
head(allht[1:3,1:3])

# AAACGCTGTAGCCA-1 AAACTTGATCCAGA-1 AAAGAGACGAGATA-1
# LDHB       -1.3141746        1.2251863        1.0439048
# CCR7       -0.4792064        2.9138081       -0.4792064
# LEF1       -0.4336201       -0.4336201       -0.4336201

绘图:

这里我们使用 complexheatmap 绘图:

数据准备

# color
col_fun = colorRamp2(c(-2, 0, 2), c("#0099CC", "white", "#CC0033"))

# top annotation
table(metadata$seurat_clusters)
#   0   1   2   3   4   5   6   7   8
# 711 480 472 344 279 162 144  32  14

# anno_col <- pal_npg()(9)
anno_col <- brewer.pal(9, "Paired")
names(anno_col) <- paste('clsuter ',0:8,sep = '')

# clsuter 0 clsuter 1 clsuter 2 clsuter 3 clsuter 4 clsuter 5 clsuter 6 clsuter 7
# "#A6CEE3" "#1F78B4" "#B2DF8A" "#33A02C" "#FB9A99" "#E31A1C" "#FDBF6F" "#FF7F00"
# clsuter 8
# "#CAB2D6"

# make annotation
column_ha = HeatmapAnnotation(cluster = metadata[,'cluster_name'],
                              col = list(cluster = anno_col),
                              border = T)

绘图

# plot
Heatmap(allht,
        name = "Z-score",
        cluster_columns = F,cluster_rows = F,
        row_title = "Cluster top 5 Marker genes",
        # column_title = "Clusters",
        row_names_gp = gpar(fontface = 'italic',fontsize = 10),
        border = T,
        show_column_names = F,
        top_annotation = column_ha,
        column_split = metadata[,'cluster_name'],
        column_title_rot = 45,
        col = col_fun)

5marker 均值绘图

使用 AverageExpression 提取亚群基因的均值表达量:

###########################################

# get cells mean gene expression
mean_gene_exp <- AverageExpression(pbmc,
                                   features = top5pbmc.markers$gene,
                                   group.by = 'seurat_clusters',
                                   slot = 'data') %>%
  data.frame() %>%
  as.matrix()

# add colnames
colnames(mean_gene_exp) <- paste('clsuter ',0:8,sep = '')

# Z-score
htdf <- t(scale(t(mean_gene_exp),scale = T,center = T))

# color
col_fun = colorRamp2(c(-2, 0, 2), c("#0099CC", "white", "#CC0033"))

# top annotation
column_ha = HeatmapAnnotation(cluster = colnames(htdf),
                              col = list(cluster = anno_col))

绘图:

# plot
Heatmap(htdf,
        name = "Z-score",
        cluster_columns = F,cluster_rows = F,
        row_title = "Cluster top 5 Marker genes",
        column_title = "Clusters",
        row_names_gp = gpar(fontface = 'italic',fontsize = 10),
        row_names_side = 'left',
        border = T,
        rect_gp = gpar(col = "white", lwd = 1),
        column_names_side = 'top',
        column_names_rot = 45,
        top_annotation = column_ha,
        # column_split = paste('clsuter ',0:8,sep = ''),
        col = col_fun)