其他
R 语言 SEM 笔记:使用 ggplot2 衍生包画 Nature 结构方程模型图
引言
Nature 论文 Decoupling of soil nutrient cycles as a function of aridity in global drylands 文中 Online method 把结构方程模型的构建步骤写得很清楚,且附件提供了复现 SEM 所需的数据。
工具包
library(tidyverse)
library(patchwork)
library(readxl)
library(lavaan)
library(ggraph)
原始数据
下载文中 Source data to Fig. 2
数据,然后读取,
# Data input
FIG2_dat <- read_excel("41586_2013_BFnature12670_MOESM308_ESM.xls")
SEM 数据
添加 PCA 轴 1,数据转换等,
# Reduced these two variables (SOC and TN) to a single variable using PCA
CNdt <- FIG2_dat[, 8:9]
# Do PCA
PCA <- prcomp(CNdt, scale. = TRUE, center = TRUE)
font <- "Open Sans"
# font <- "sans"
par(family = font, cex = 1.3, xpd = TRUE)
biplot(PCA, cex = 0.7)
# Extracting PCA first axis scores
PC_1 <- PCA$x[, 1]
# Add PC scores to the data
FIG2_dat$OMC <- PC_1
# So the SEM data
FIG2_dat$Ar2 <- FIG2_dat$Aridity ^ 2
SEMdt <- FIG2_dat[, -c(1:4, 8:9)]
SEMdt <- setNames(SEMdt, nm = c("Lon", "De", "Ar", "TP", "PCov", "Clay", "Penz", "OMC", "Ar2"))
SEMdt$TP <- log(SEMdt$TP)
SEMdt$Penz <- log(SEMdt$Penz)
SEMdt$PCov <- sqrt(SEMdt$PCov)
# Scale the data
SEMdt <- SEMdt |> mutate(across(where(is.numeric), scale))
拟合 SEM
# Model structure
model_structure <- "
TP ~ Lon + De + Ar + Ar2 + PCov + Clay + Penz
Penz ~ Lon + De + Ar + Ar2 + PCov + Clay + OMC
OMC ~ Lon + De + Ar2 + PCov + Clay
PCov ~ Lon + De + Ar
Clay ~ PCov + Lon + De + Ar
Ar ~ Lon + De
Ar2 ~ Lon + De
Ar ~~ Ar2 # 指定相关
"
# Fit the SEM
fitted_SEM <- sem(model_structure, data = SEMdt)
从结果里提取 SEM 数据
# Extract standardized parameters
SEM_para <- lavaan::standardizedSolution(fitted_SEM)
# Edge properties
all_edges <- SEM_para |>
filter(op %in% c("=~", "~", "~~", "<~"), lhs != rhs, !is.na(pvalue)) |>
transmute(
to = lhs,
from = rhs,
val = est.std,
pvalue = pvalue,
type = dplyr::case_when(
op == "=~" ~ "loading",
op == "<~" ~ "composite",
op == "~" ~ "regression",
op == "~~" ~ "correlation",
TRUE ~ NA_character_
)
)
# Keep only "regression" links
SEM_edges <- all_edges |> filter(type == "regression")
# Node properties
SEM_nodes <- SEM_para |>
filter(lhs == rhs) |> # make nodes name unique
transmute(metric = lhs, e = est.std)
# R2
SEM_nodes_RSquared <- data.frame(RSquared = inspect(fitted_SEM, "rsquare")) |>
rownames_to_column(var = "metric") |>
merge(SEM_nodes, all = TRUE)
# SEM Graph
SEM_graph <- tidygraph::tbl_graph(SEM_nodes_RSquared, SEM_edges)
绘制 SEM 示意图
# Plot SEM graph
theme_set(theme_graph(base_family = font))
gr1 <- ggraph(SEM_graph, layout = "linear") +
geom_edge_arc(
strength = 1,
# fold = TRUE,
aes(
filter = type == "regression",
color = ifelse(sign(val) > 0, "Positive", "Negative"),
edge_width = abs(val),
label = round(val, 2),
alpha = ifelse(pvalue <= 0.05, "Significant", "Not significant"),
end_cap = label_rect(node2.metric, padding = margin(10, 10, 10, 10, "pt"))
),
angle_calc = "along",
label_size = 3,
arrow = arrow(angle = 45, length = unit(10, "pt"), type = "open"),
vjust = -0.5,
family = font,
show.legend = FALSE
) +
geom_node_point(
shape = 1,
size = 7,
color = "grey"
) +
geom_node_text(
aes(label = metric),
size = 5,
family = font,
show.legend = FALSE
) +
geom_node_text(
aes(
label = case_when(
is.na(RSquared) ~ "",
TRUE ~ paste("italic(R)^2 * '=' *", round(RSquared, 2))
)
),
color = "black",
size = 4,
hjust = 0.5,
vjust = 2,
parse = TRUE,
family = font,
show.legend = FALSE
) +
scale_edge_color_brewer(palette = "Set2") +
scale_edge_width_continuous(range = c(0.5, 2)) +
scale_edge_alpha_manual(values = c("Significant" = 1, "Not significant" = 0.1)) +
coord_equal(
ratio = 1.3,
clip = "off"
)
gr1
## Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
换个布局,
set.seed(1)
gr2 <- ggraph(SEM_graph, layout = "stress") + # layout = "circle"
geom_edge_diagonal(
strength = 0.7,
aes(
filter = type == "regression",
color = ifelse(sign(val) > 0, "Positive", "Negative"),
edge_width = abs(val),
label = round(val, 2),
alpha = ifelse(pvalue <= 0.05, "Significant", "Not significant"),
end_cap = label_rect(node2.metric, padding = margin(10, 10, 10, 10, "pt"))
),
angle_calc = "along",
label_size = 3,
arrow = arrow(angle = 45, length = unit(10, "pt"), type = "open"),
vjust = -0.5,
family = font,
show.legend = FALSE
) +
geom_node_point(
shape = 1,
size = 9,
color = "grey"
) +
geom_node_text(
aes(label = metric),
size = 9,
family = font,
show.legend = FALSE
) +
geom_node_text(
aes(
label = case_when(
is.na(RSquared) ~ "",
TRUE ~ paste("italic(R)^2==", round(RSquared, 2))
)
),
color = "black",
size = 4,
hjust = 0.5,
vjust = 2,
parse = TRUE,
family = font,
show.legend = FALSE
) +
scale_edge_color_brewer(palette = "Set2") +
scale_edge_width_continuous(range = c(0.5, 2)) +
scale_edge_alpha_manual(values = c("Significant" = 1, "Not significant" = 0.1)) +
coord_equal(
ratio = 1,
clip = "off"
)
gr2
结语
可能不能直接用于论文中,但可以作为手动画图的辅助。