查看原文
其他

R 语言 SEM 笔记:从 X 到 Y 有几条路径?直接或效应

ecologyR ecologyR 2024-03-25

引言

完成一个结构方程模型以后,会关心直接效应、间接效应是多少。这里探究一个的问题:从因素 X 到 Y 的直接、间接路径有哪些?我不清楚有没有现成的、通用性强的 R 包可以做到。

1、工具包

library(tidyverse)
library(igraph)
library(tidygraph)
library(ggraph)
library(piecewiseSEM)
library(lavaan)

2、一个 SEM 结构例子

它的表达式如下,

  • 注,R 语言中,赋值可以放在最后。这时候,使用 -> 符号

"B ~ A,
C ~ B + A,
D ~ C + B + A,
E ~ A + B + C + D"
-> formula.text

3、从 SEM 结构到网络图

第一步:把这个 SEM 的表达式,重新拆分成若干个回归表达式,

# Individual formula
all.f <- lapply(str_split_1(formula.text, ","), as.formula)
[[1]]
B ~ A
<environment: 0x7fb3bdb95db0>

[[2]]
C ~ B + A
<environment: 0x7fb3bdb95db0>

[[3]]
D ~ C + B + A
<environment: 0x7fb3bdb95db0>

[[4]]
E ~ A + B + C + D
<environment: 0x7fb3bdb95db0>

第二步:把这些表达式,整理成一个 from 节点和 to 节点的数据框,

# Formulas to data.frame
gr.dat <- map_dfr(
1:length(all.f),
function(i) {
data.frame(
from = lapply(all.f, function(f) all.vars(f)[-1])[[i]], # all the names in expression
to = lapply(all.f, function(f) all.vars(f)[1])[[i]] # all the names in expression
)
}
)
from to
1 A B
2 B C
3 A C
4 C D
5 B D
6 A D
7 A E
8 B E
9 C E
10 D E

第三步:有了 from 节点和 to 节点,构建一个网络图就很简单了,

  • 函数 as_tbl_graph 把 igraph 转换为 ggraph 网络图,

# Graph
gr.igr <- graph_from_data_frame(gr.dat, directed = TRUE) # igraph 图
gr.ggr <- as_tbl_graph(gr.igr) # ggraph 图

第四步:把图画出来看一下,

# 使用 igraph
# plot(gr.igr)

# 或者
gr.font <- "Open Sans"
theme_set(theme_graph(base_family = gr.font))
gr.ggr |>
ggraph(layout = "circle") +
geom_edge_link(
color = "grey",
arrow = arrow(45, unit(5, "pt"), type = "closed"),
start_cap = circle(10, "pt"),
end_cap = circle(10, "pt")
) +
# geom_node_point(
# shape = 1,
# size = 9,
# color = "grey"
# ) +
geom_node_text(
aes(label = name),
size = 5,
family = gr.font
) +
coord_equal(clip = "off")
## Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

3、判断直接和间接路径

使用 igraph 包的 all_simple_paths 函数,可以发现从 A 到 D 有四条路径,

gr.ggr |> igraph::all_simple_paths(from = "A", to = "D")## [[1]]
## + 4/5 vertices, named, from 395084c:
## [1] A B C D
##
## [[2]]
## + 3/5 vertices, named, from 395084c:
## [1] A B D
##
## [[3]]
## + 3/5 vertices, named, from 395084c:
## [1] A C D
##
## [[4]]
## + 2/5 vertices, named, from 395084c:
## [1] A D
# gr.ggr |>
# igraph::all_simple_paths(from = "A", to = "D") |>
# map( ~ names(.x))

画出这些连接,

# 找到从 A 到 E 的所有路径
from.node <- "A"
to.node <- "D"
target.paths <- gr.igr |> igraph::all_simple_paths(from = from.node, to = to.node)

# 应该可以不用 1 个 1 个写代码,但是怎么批量操作?目前,还没研究
E(gr.igr, path = target.paths[[1]])$paths.1 <- paste(names(target.paths[[1]]), collapse = "→")
E(gr.igr, path = target.paths[[2]])$paths.2 <- paste(names(target.paths[[2]]), collapse = "→")
E(gr.igr, path = target.paths[[3]])$paths.3 <- paste(names(target.paths[[3]]), collapse = "→")
E(gr.igr, path = target.paths[[4]])$paths.4 <- paste(names(target.paths[[4]]), collapse = "→")

# 绘图
gr.igr |>
as_tbl_graph() |>
ggraph(layout = "circle") +
geom_edge_link(
color = "grey",
linetype = 5,
start_cap = circle(10, "pt"),
end_cap = circle(10, "pt"),
arrow = arrow(45, unit(5, "pt"), type = "closed")
) +
# 以下 4 段,可以不用 1 个 1 个写代码,但是怎么批量操作,还没研究
geom_edge_link(
aes(filter = !is.na(paths.1), color = paths.1),
start_cap = circle(10, "pt"),
end_cap = circle(10, "pt"),
arrow = arrow(45, unit(5, "pt"), type = "closed")
) +
# geom_edge_link(
# aes(filter = !is.na(paths.2), color = paths.2),
# start_cap = circle(10, "pt"),
# end_cap = circle(10, "pt"),
# arrow = arrow(45, unit(5, "pt"), type = "closed")
# ) +
# geom_edge_link(
# aes(filter = !is.na(paths.3), color = paths.3),
# start_cap = circle(10, "pt"),
# end_cap = circle(10, "pt"),
# arrow = arrow(45, unit(5, "pt"), type = "closed")
# ) +
geom_edge_link(
aes(filter = !is.na(paths.4), color = paths.4),
start_cap = circle(10, "pt"),
end_cap = circle(10, "pt"),
arrow = arrow(45, unit(5, "pt"), type = "closed")
) +
geom_node_text(aes(label = name), family = gr.font) +
scale_edge_color_brewer(name = NULL, palette = "Dark2") +
coord_equal(clip = "off")

4、数据案例

首先,把 SEM 从表达式变成 graph 这一功能编写成函数 f2gr

f2gr <- function(formula.text) {
# Individual formula
all.f <- lapply(str_split_1(formula.text, ","), as.formula)
# Formulas to data.frame
map_dfr(
1:length(all.f),
function(i) {
data.frame(
from = lapply(all.f, function(f) all.vars(f)[-1])[[i]], # all the names in expression
to = lapply(all.f, function(f) all.vars(f)[1])[[i]] # all the names in expression
)
}
)|>
# 变成一个 igraph 图
graph_from_data_frame(directed = TRUE)
}

案例来自一篇 Nature 文章,又见 R 语言 SEM 笔记:使用 ggplot2 衍生包画 Nature 结构方程模型图

# Decoupling of soil nutrient cycles as a function of aridity in global drylands
# https://www.nature.com/articles/nature12670

SEM 表达式,

TP ~ Lon + De + Ar + Ar2 + PCov + Clay + Penz
Penz ~ Lon + De + Ar + Ar2 + PCov + Clay + OMC
OMC ~ Lon + De + Ar2 + PCov + Clay
PCov ~ Lon + De + Ar
Clay ~ PCov + Lon + De + Ar
Ar ~ Lon + De
Ar2 ~ Lon + De

运用 f2gr 函数,

  • 间接路径很多

"TP ~ Lon + De + Ar + Ar2 + PCov + Clay + Penz,
Penz ~ Lon + De + Ar + Ar2 + PCov + Clay + OMC,
OMC ~ Lon + De + Ar2 + PCov + Clay,
PCov ~ Lon + De + Ar,
Clay ~ PCov + Lon + De + Ar,
Ar ~ Lon + De,
Ar2 ~ Lon + De"
-> sem.f

# 得到 SEM 的 igraph 图
sem.igr <- f2gr(sem.f)

# 得到 firesev 的 rich 所有路径
# all_simple_paths(sem.igr, from = "Lon", to = "TP") # 路径特别多
all_simple_paths(sem.igr, from = "PCov", to = "TP") # 路径不算太多
## [[1]]
## + 4/9 vertices, named, from 96e87e3:
## [1] PCov Clay Penz TP
##
## [[2]]
## + 5/9 vertices, named, from 96e87e3:
## [1] PCov Clay OMC Penz TP
##
## [[3]]
## + 3/9 vertices, named, from 96e87e3:
## [1] PCov Clay TP
##
## [[4]]
## + 3/9 vertices, named, from 96e87e3:
## [1] PCov Penz TP
##
## [[5]]
## + 4/9 vertices, named, from 96e87e3:
## [1] PCov OMC Penz TP
##
## [[6]]
## + 2/9 vertices, named, from 96e87e3:
## [1] PCov TP

给这些连接上色,

# 找到从 Lon 到 TP 的路径。此处只演示 2 个
# focus.paths <- all_simple_paths(sem.igr, from = "Lon", to = "TP")
focus.paths <- all_simple_paths(sem.igr, from = "PCov", to = "TP")
E(sem.igr, path = focus.paths[[1]])$paths.1 <- paste(names(focus.paths[[1]]), collapse = "→")
E(sem.igr, path = focus.paths[[2]])$paths.2 <- paste(names(focus.paths[[2]]), collapse = "→")

# 绘图
set.seed(1)
sem.igr |>
as_tbl_graph() |>
ggraph(layout = "stress") +
# geom_edge_diagonal(
# color = "grey",
# edge_width = 0.1,
# start_cap = circle(10, "pt"),
# end_cap = circle(10, "pt"),
# arrow = arrow(45, unit(5, "pt"), type = "closed")
# ) +
geom_edge_diagonal(
aes(filter = !is.na(paths.1), color = paths.1),
start_cap = circle(10, "pt"),
end_cap = circle(10, "pt"),
arrow = arrow(45, unit(5, "pt"), type = "closed")
) +
geom_edge_diagonal(
aes(filter = !is.na(paths.2), color = paths.2),
start_cap = circle(10, "pt"),
end_cap = circle(10, "pt"),
arrow = arrow(45, unit(5, "pt"), type = "closed"),
position = position_nudge(x = -0.05, y = -0.05)
) +
geom_node_point(shape = 1, size = 9, color = "grey") +
geom_node_text(aes(label = name), family = gr.font) +
scale_edge_color_brewer(name = NULL, palette = "Dark2") +
coord_equal(clip = "off")

结语

既然得到了直接和间接路径,那计算总效应和间接效应该也可以实现。初步探索,待续。

继续滑动看下一个
向上滑动看下一个

您可能也对以下帖子感兴趣

文章有问题?点此查看未经处理的缓存