PCA、RDA等排序图的ggplot2二维可视化示例
备注:ggplot2本身的作图语法本篇不涉及,基础部分还请自行学习了。
示例文件、R脚本等的百度盘链接:
https://pan.baidu.com/s/1KUA5owf4y13y1RJbff-l7g
“pca_site.txt”是某PCA分析后,导出的样方(对象)排序坐标,“pca_var.txt”是变量的坐标。
“cap_site.txt”是某db-RDA(CAP)分析后,导出的样方(对象)排序坐标,“cap_sp.txt”是物种(响应变量)排序坐标,“cap_env.txt”是环境变量(解释变量)的排序坐标。
PCA等排序图的绘制
先以某PCA排序图为例吧。
library(ggplot2)
#数据
pca_site <- read.delim('pca_site.txt', sep = '\t', stringsAsFactors = FALSE)
pca_site$group_all <- paste(pca_site$group1, pca_site$group2, sep = '_')
pca_var <- read.delim('pca_var.txt', sep = '\t', stringsAsFactors = FALSE)
常规的只展示对象的排序图,PCA前两轴,颜色区分分组,这里假设原分析中前两轴的贡献率分别为30%和20%(贡献率由实际的计算获得)。
#常规仅样方的点图,单组样式p <- ggplot(data = pca_site, aes(x = PC1, y = PC2)) +
theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent'), legend.key = element_rect(fill = 'transparent')) +
labs(x = 'PCA1: 30%', y = 'PCA2: 20%')
p + geom_point(aes(color = group1)) +
scale_color_manual(values = c('red2', 'purple2', 'green3'))
如果存在多分组,可以分别以更多的颜色区分,或者点的形状等。
#多组可以颜色表示p + geom_point(aes(color = group_all)) +
scale_color_manual(values = c('red', 'purple', 'green', 'red4', 'purple4', 'green4'), limits = c('A_l', 'B_l', 'C_l', 'A_h', 'B_h', 'C_h'))
#或者使用点的颜色+形状区分
p + geom_point(aes(color = group1, shape = group2)) +
scale_color_manual(values = c('red2', 'purple2', 'green3'), limits = c('A', 'B', 'C')) +
scale_shape_manual(values = c(17, 15), limits = c('l', 'h'))
#渐变色表示连续数值
p + geom_point(aes(color = group3, shape = group1)) +
scale_shape_manual(values = c(19, 17, 15), limits = c('A', 'B', 'C')) +
scale_color_gradientn(colors = colorRampPalette(c('#C0D857', '#3EB897', '#663490'))(7))
一些背景网格线的常选样式。
#以上述某样式继续为例展示p <- p + geom_point(aes(color = group1, shape = group2)) +
scale_color_manual(values = c('red2', 'purple2', 'green3'), limits = c('A', 'B', 'C')) +
scale_shape_manual(values = c(17, 15), limits = c('l', 'h'))
#一些常见背景样式
p + theme(axis.line = element_line(colour = 'black'), panel.background = element_blank())
p + geom_vline(xintercept = 0, color = 'gray', size = 0.5) +
geom_hline(yintercept = 0, color = 'gray', size = 0.5)
p + theme(panel.grid.major = element_line(color = 'gray', size = 0.2))
有时能够看到直接在原图中标记对象名称的。
#添加标签,常规方法p + geom_text(aes(label = name, color = group1), size = 3, vjust = 2, show.legend = FALSE)
#防止重叠的添加标签方法
library(ggrepel)
p + geom_text_repel(aes(label = name, color = group1), size = 3, box.padding = unit(0.5, 'lines'), show.legend = FALSE)
p + geom_label_repel(aes(label = name, color = group1), size = 3, box.padding = unit(0.5, 'lines'), show.legend = FALSE)
这种在图中,根据样本添加分组椭圆或者多边形的样式,经常能看到。
#添加置信椭圆,注意它不是聚类p + stat_ellipse(aes(color = group1), level = 0.95, linetype = 2, show.legend = FALSE)
p + stat_ellipse(aes(fill = group1), geom = 'polygon', level = 0.95, alpha = 0.1, show.legend = FALSE) +
scale_fill_manual(values = c('red', 'purple', 'green'))
#“伪聚类”,我也不知道怎么称呼,既不是真正的聚类也不是置信区间,仅用于标记分组而已
#这种样式也经常看到吧
#参考自:https://stackoverflow.com/questions/42575769/how-to-modify-the-backgroup-color-of-label-in-the-multiple-ggproto-using-ggplot2
source('geom_enterotype.r') #见网盘附件
p + geom_enterotype(aes(fill = group1, color = group1, label = group1), show.legend = FALSE) +
scale_fill_manual(values = c('#ffa6a9', '#e8b4fd', '#c7ffc4'))
#还有分组多边形这种
library(plyr)
group1_border <- ddply(pca_site, 'group1', function(df) df[chull(df[[2]], df[[3]]), ])
p + geom_polygon(data = group1_border, aes(fill = group1, color = group1), alpha = 0.1, show.legend = FALSE) +
scale_fill_manual(values = c('red', 'purple', 'green'))
图中额外添加一些标识符、标签或者特殊形状之类的,一般推荐后续用AI、PS等P图上去更为方便;如果用代码添加的话,以一些文字为例。
#额外添加一些标签等,例如p + stat_ellipse(aes(fill = group1), geom = 'polygon', level = 0.95, alpha = 0.1, show.legend = FALSE) +
scale_fill_manual(values = c('red', 'purple', 'green')) +
annotate('text', label = 'A', x = -0.15, y = -0.10, size = 5, color = 'red2') +
annotate('text', label = 'B', x = 0.20, y = 0.09, size = 5, color = 'purple2') +
annotate('text', label = 'C', x = -0.20, y = -0.05, size = 5, color = 'green3') +
annotate('text', label = 'PERMANOVA: P<0.001', x = 0.2, y = 0.14, size = 3)
以上均展示了对象的位置,平常见到的大多数PCA排序图也只展示了对象;如果期望将变量也呈现出来。
#添加变量,PCA 这种线性模型中,变量常以向量形式展示p + geom_segment(data = pca_var, aes(x = 0, y = 0, xend = PC1, yend = PC2), arrow = arrow(length = unit(0.1, 'cm')), color = 'blue', size = 0.3) +
geom_text(data = pca_var, aes(x = PC1 * 1.1, y = PC2 * 1.1, label = name), color = 'blue', size = 3)
#CA 这种单峰模型中,变量则常直接展示为点,这里顺便以该示例数据作个展示,假设它是个 CA 的数据
p + geom_text(data = pca_var, aes(x = PC1, y = PC2, label = name), color = 'blue', size = 3) +
labs(x = 'CA1', y = 'CA2')
组合一下上述提到的方法,随便来一个。
#象征性来个组合样式的p <- ggplot(data = pca_site, aes(x = PC1, y = PC2)) +
geom_point(aes(color = group1, shape = group2)) +
geom_enterotype(aes(fill = group1, color = group1, label = group1), show.legend = FALSE) +
scale_color_manual(values = c('red2', 'purple2', 'green3'), limits = c('A', 'B', 'C')) +
scale_shape_manual(values = c(17, 15), limits = c('l', 'h')) +
scale_fill_manual(values = c('#ffa6a9', '#e8b4fd', '#c7ffc4')) +
guides(color = FALSE) +
theme(panel.grid.major = element_line(color = 'gray', size = 0.2), panel.background = element_rect(color = 'black', fill = 'transparent'),
legend.title = element_blank(), legend.key = element_rect(fill = 'transparent'), legend.position = c(0.92, 0.87), legend.background= element_blank()) +
labs(x = 'PCA1: 30%', y = 'PCA2: 20%') +
annotate('text', label = 'PERMANOVA: P<0.001', x = 0.2, y = 0.14, size = 3)
p
以及偶尔也能看到和其它图形组合在一起的,例如结合箱线图表示对象的分布更为直观。
#在图的两侧添加箱线图展示点的分布pc1_boxplot <- ggplot(data = pca_site, aes(x = group1, y = PC1)) +
geom_boxplot(aes(color = group1)) +
scale_color_manual(values = c('red2', 'purple2', 'green3'), limits = c('A', 'B', 'C')) +
theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent'),
legend.position= 'none', axis.title = element_text(color = 'transparent')) +
coord_flip()
pc2_boxplot <- ggplot(data = pca_site, aes(x = group1, y = PC2)) +
geom_boxplot(aes(color = group1)) +
scale_color_manual(values = c('red2', 'purple2', 'green3'), limits = c('A', 'B', 'C')) +
theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent'),
legend.position= 'none', axis.title = element_text(color = 'transparent'))
library(grid)
grid.newpage()
pushViewport(viewport(layout = grid.layout(3, 3)))
print(p, vp = viewport(layout.pos.row = 1:2, layout.pos.col = 1:2))
print(pc1_boxplot, vp = viewport(layout.pos.row = 3, layout.pos.col = 1:2))
print(pc2_boxplot, vp = viewport(layout.pos.row = 1:2, layout.pos.col = 3))
#这里用了 grid 包实现组合,还有很多 R 包可用于组合,还请自行了解了
#如果觉得对齐效果不太好,在 print() 函数里调试参数对齐
#或者后面用 AI、PS 调整下更方便
RDA等排序图的绘制
这里以某db-RDA(CAP)的计算坐标为例吧。
基本的作图方法上文已作了描述,这里直接先展示一例只包含样方和环境变量的双序图,假设原分析中前两个约束轴的解释率分别为25%和15%(约束轴的解释率由实际的计算获得)。
library(ggplot2)#数据
cap_site <- read.delim('cap_site.txt', sep = '\t', stringsAsFactors = FALSE)
cap_sp <- read.delim('cap_sp.txt', sep = '\t', stringsAsFactors = FALSE)
cap_env <- read.delim('cap_env.txt', sep = '\t', stringsAsFactors = FALSE)
#作图,双序图
p <- ggplot(data = cap_site, aes(x = CAP1, y = CAP2)) +
geom_point(aes(color = group)) +
stat_ellipse(aes(fill = group), geom = 'polygon', level = 0.95, alpha = 0.1, show.legend = FALSE) +
scale_color_manual(values = c('red2', 'purple2', 'green3'), limits = c('A', 'B', 'C')) +
scale_shape_manual(values = c(17, 15), limits = c('l', 'h')) +
scale_fill_manual(values = c('red', 'purple', 'green')) +
theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent'),
legend.title = element_blank(), legend.key = element_rect(fill = 'transparent')) +
geom_vline(xintercept = 0, color = 'gray', size = 0.5) +
geom_hline(yintercept = 0, color = 'gray', size = 0.5) +
geom_segment(data = cap_env, aes(x = 0, y = 0, xend = CAP1, yend = CAP2), arrow = arrow(length = unit(0.2, 'cm')), color = 'blue', size = 0.3) +
geom_text(data = cap_env, aes(x = CAP1 * 1.1, y = CAP2 * 1.1, label = name), color = 'blue', size = 3) +
labs(x = 'CAP1: 25%', y = 'CAP2: 15%')
p
带样方、物种和环境变量的三序图。
#考虑将物种变量加入,展示为三序图p + geom_segment(data = cap_sp, aes(x = 0, y = 0, xend = CAP1, yend = CAP2), arrow = arrow(length = unit(0.1, 'cm')), color = 'orange', size = 0.2) +
geom_text(data = cap_sp, aes(x = CAP1 * 1.1, y = CAP2 * 1.1, label = name), color = 'orange', size = 2)
#物种也用颜色区分下分类
#物种数量太多了,这里选部分物种演示下好了
cap_sp_select <- cap_sp[1:10, ]
p + geom_segment(data = cap_sp_select, aes(x = 0, y = 0, xend = CAP1, yend = CAP2, color = sp_class),
arrow = arrow(length = unit(0.1, 'cm')), size = 0.2, show.legend = FALSE) +
geom_text(data = cap_sp_select, aes(x = CAP1 * 1.1, y = CAP2 * 1.1, label = name, color = sp_class), size = 2) +
scale_color_manual(values = c('red2', 'purple2', 'green3', '#8DD3C7', '#BEBADA', '#FB8072', '#80B1D3'),
limits = c('A', 'B', 'C', 'pa', 'pb', 'pr', 'a'))
#物种直接以点展示,而非向量
p + geom_point(data = cap_sp_select, aes(x = CAP1, y = CAP2), color = 'orange', size = 1)
p + geom_text(data = cap_sp_select, aes(x = CAP1, y = CAP2, label = name), color = 'orange', size = 2)