数据呈现丨R画树状图:一种轻量级方法
R画树状图
树状图是用于表示层次关系的图,例如从分层聚类获得的层次关系。它们常用于生物学,尤其是遗传学,用于揭示一组基因或者分群的关系。
library(ggdendro)
library(ggplot2)
调整ggdendro
首先,我'扩展'了ggdendro :: dendro_data()。 dendro_data_k()函数接受一个k参数,一个整数,指定所需簇的数量。 此值仅用于base :: cutree()函数,并且对于每个集群,将根据其x,xend和yend坐标为这些段分配相应叶子的集群ID。 这可能不是最优雅的方式,但它非常简单。
dendro_data_k <- function(hc, k) {
hcdata <- ggdendro::dendro_data(hc, type = "rectangle")
seg <- hcdata$segments
labclust <- cutree(hc, k)[hc$order]
segclust <- rep(0L, nrow(seg))
heights <- sort(hc$height, decreasing = TRUE)
height <- mean(c(heights[k], heights[k - 1L]), na.rm = TRUE)
for (i in 1:k) {
xi <- hcdata$labels$x[labclust == i]
idx1 <- seg$x >= min(xi) & seg$x <= max(xi)
idx2 <- seg$xend >= min(xi) & seg$xend <= max(xi)
idx3 <- seg$yend < height
idx <- idx1 & idx2 & idx3
segclust[idx] <- i
}
idx <- which(segclust == 0L)
segclust[idx] <- segclust[idx + 1L]
hcdata$segments$clust <- segclust
hcdata$segments$line <- as.integer(segclust < 1L)
hcdata$labels$clust <- labclust
hcdata
}
绘图函数
通过从ggdendro和上面的函数获得的所需的数据结构,可以使用ggplot2构建树。 使用两个geom:分支的geom_segment()和标签的geom_text()。
如果我们想要自定义树的方向(例如从上到下或从左到右)或格式(圆形图),事情会变得有点复杂。 为了更容易地处理它,使用(内部)不同的函数来设置标签的参数(角度,偏移,……)。
set_labels_params <- function(nbLabels,
direction = c("tb", "bt", "lr", "rl"),
fan = FALSE) {
if (fan) {
angle <- 360 / nbLabels * 1:nbLabels + 90
idx <- angle >= 90 & angle <= 270
angle[idx] <- angle[idx] + 180
hjust <- rep(0, nbLabels)
hjust[idx] <- 1
} else {
angle <- rep(0, nbLabels)
hjust <- 0
if (direction %in% c("tb", "bt")) { angle <- angle + 45 }
if (direction %in% c("tb", "rl")) { hjust <- 1 }
}
list(angle = angle, hjust = hjust, vjust = 0.5)
}
左右滑动查看更多
plot_ggdendro <- function(hcdata,
direction = c("lr", "rl", "tb", "bt"),
fan = FALSE,
scale.color = NULL,
branch.size = 1,
label.size = 3,
nudge.label = 0.01,
expand.y = 0.1) {
direction <- match.arg(direction) # if fan = FALSE
ybreaks <- pretty(segment(hcdata)$y, n = 5)
ymax <- max(segment(hcdata)$y)
## branches
p <- ggplot() +
geom_segment(data = segment(hcdata),
aes(x = x,
y = y,
xend = xend,
yend = yend,
linetype = factor(line),
colour = factor(clust)),
lineend = "round",
show.legend = FALSE,
size = branch.size)
## orientation
if (fan) {
p <- p +
coord_polar(direction = -1) +
scale_x_continuous(breaks = NULL,
limits = c(0, nrow(label(hcdata)))) +
scale_y_reverse(breaks = ybreaks)
} else {
p <- p + scale_x_continuous(breaks = NULL)
if (direction %in% c("rl", "lr")) {
p <- p + coord_flip()
}
if (direction %in% c("bt", "lr")) {
p <- p + scale_y_reverse(breaks = ybreaks)
} else {
p <- p + scale_y_continuous(breaks = ybreaks)
nudge.label <- -(nudge.label)
}
}
# labels
labelParams <- set_labels_params(nrow(hcdata$labels), direction, fan)
hcdata$labels$angle <- labelParams$angle
p <- p +
geom_text(data = label(hcdata),
aes(x = x,
y = y,
label = label,
colour = factor(clust),
angle = angle),
vjust = labelParams$vjust,
hjust = labelParams$hjust,
nudge_y = ymax * nudge.label,
size = label.size,
show.legend = FALSE)
# colors and limits
if (!is.null(scale.color)) {
p <- p + scale_color_manual(values = scale.color)
}
ylim <- -round(ymax * expand.y, 1)
p <- p + expand_limits(y = ylim)
p
}
左右滑动查看更多
基本的树状图
我们现在准备建立一个树状图。 默认情况下,应用ggplot2的标准主题。
mtc <- scale(mtcars)
D <- dist(mtc)
hc <- hclust(D)
hcdata <- dendro_data_k(hc, 3)
p <- plot_ggdendro(hcdata,
direction = "lr",
expand.y = 0.2)
p
左右滑动查看更多
定制的树状图
我们可以通过调整Plot_ggdendro()参数或添加其他属性来进一步自定义树形图。 下面是ggplot2 :: theme_void()的示例。
cols <- c("#a9a9a9", "#1f77b4", "#ff7f0e", "#2ca02c")
p <- plot_ggdendro(hcdata,
direction = "tb",
scale.color = cols,
label.size = 2.5,
branch.size = 0.5,
expand.y = 0.2)
p <- p + theme_void() + expand_limits(x = c(-1, 32))
p
mytheme <- theme(axis.text = element_text(color = "#50505030"),
panel.grid.major.y = element_line(color = "#50505030",
size = 0.25))
p + mytheme
左右滑动查看更多
p <- plot_ggdendro(hcdata,
fan = TRUE,
scale.color = cols,
label.size = 4,
nudge.label = 0.02,
expand.y = 0.4)
mytheme <- theme(panel.background = element_rect(fill = "black"))
p + theme_void() + mytheme
左右滑动查看更多
进一步定制
除了图形属性之外,还可以添加其他geom元素,使可能性无限。
p <- plot_ggdendro(hcdata,
fan = TRUE,
scale.color = cols,
label.size = 4,
nudge.label = 0.15,
expand.y = 0.8)
mytheme <- theme(panel.background = element_rect(fill = "black"))
p <- p + theme_void() + mytheme
p + geom_point(data = mtcars,
aes(x = match(rownames(mtcars), hcdata$labels$label),
y = -0.7,
fill = as.factor(cyl)),
size = 5,
shape = 21,
show.legend = FALSE) +
scale_fill_manual(values = c("white", "yellow", "red")) +
geom_text(data = mtcars,
aes(x = match(rownames(mtcars), hcdata$labels$label),
y = -0.7,
label = cyl),
size = 3)
左右滑动查看更多
使用gridExtra组合多个图,很容易地得到联结图。
library(gridExtra)
mtc <- scale(mtcars)
D <- dist(mtc)
hc1 <- hclust(D, "average")
hc2 <- hclust(D, "ward.D2")
hcdata1 <- dendro_data_k(hc1, 5)
hcdata2 <- dendro_data_k(hc2, 5)
cols <- c("#a9a9a9", "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd")
p1 <- plot_ggdendro(hcdata1,
direction = "lr",
scale.color = cols,
expand.y = 0.2) +
theme_void()
p2 <- plot_ggdendro(hcdata2,
direction = "rl",
scale.color = cols,
expand.y = 0.2) +
theme_void()
idx <- data.frame(y1 = 1:nrow(hcdata1$labels),
y2 = match(hcdata1$labels$label, hcdata2$labels$label))
p3 <- ggplot() +
geom_segment(data = idx,
aes(x = 0,
y = y1,
xend = 1,
yend = y2),
color = "grey") +
theme_void()
grid.arrange(p1, p3, p2, ncol = 3, widths = c(2, 1, 2))
左右滑动查看更多
library(data.table)
mtc <- scale(mtcars)
D <- dist(mtc)
hc <- hclust(D)
hcdata <- dendro_data_k(hc, 3)
p1 <- plot_ggdendro(hcdata,
direction = "lr",
scale.color = cols,
expand.y = 0.15) +
theme(axis.text.x = element_text(color = "#ffffff"),
panel.background = element_rect(fill = "#ffffff"),
axis.ticks = element_blank()) +
scale_color_brewer(palette = "Set1") +
xlab(NULL) +
ylab(NULL)
# scale from 0 to 1 and reshape mtcars data
scaled <- setDT(lapply(mtcars, scales::rescale))
melted <- melt(scaled, measure.vars = colnames(mtcars))
melted[, variable := as.factor(variable)]
idx <- match(rownames(mtcars), hcdata$labels$label)
melted[, car := rep(idx, ncol(mtcars))]
# 'bubblemap'
p2 <- ggplot(melted) +
geom_point(aes(x = variable,
y = car,
size = value,
color = value),
show.legend = FALSE) +
scale_color_viridis_c(direction = -1) +
theme_minimal() +
theme(axis.text.y = element_blank()) +
xlab(NULL) +
ylab(NULL)
grid.arrange(p1, p2, ncol = 2, widths = 3:2)
左右滑动查看更多
总结
像ggtree或dendextend这样的R包对于开箱即用的树状图非常有用。使用大约120行代码和三个函数,本文描述的方法非常基本,但也很灵活。定制主题参数并将树状图与其他绘图元素结合起来,可以很容易地构建更复杂的可视化(可点击左右下角查看原文)。
►一周热文
数据呈现丨划重点 ! 经济学学Python 需要学哪些内容?——数据可视化呈现必知必会的知识点
数据Seminar
这里是大数据、分析技术与学术研究的三叉路口
作者:数据人网出处:R语言推荐:简华(何年华)编辑:青酱
欢迎扫描👇二维码添加关注