其他
ggduo展示两组数据间的相关性
开始今天的学习吧~
来自非常棒的可视化R包:GGally
ggduo()
此函数可用于两组变量的可视化,可用于典型相关分析、时间序列、回归等。
使用的例子是自带的psychademic
数据集,这个数据集包括2组数据,共600名学生的心理测试和学术能力测试。
library(GGally)
data("psychademic")
str(psychademic)
## 'data.frame': 600 obs. of 8 variables:
## $ locus_of_control: num -0.84 -0.38 0.89 0.71 -0.64 1.11 0.06 -0.91 0.45 0 ...
## $ self_concept : num -0.24 -0.47 0.59 0.28 0.03 0.9 0.03 -0.59 0.03 0.03 ...
## $ motivation : chr "4" "3" "3" "3" ...
## $ read : num 54.8 62.7 60.6 62.7 41.6 62.7 41.6 44.2 62.7 62.7 ...
## $ write : num 64.5 43.7 56.7 56.7 46.3 64.5 39.1 39.1 51.5 64.5 ...
## $ math : num 44.5 44.7 70.5 54.7 38.4 61.4 56.3 46.3 54.4 38.3 ...
## $ science : num 52.6 52.6 58 58 36.3 58 45 36.3 49.8 55.8 ...
## $ sex : chr "female" "female" "male" "male" ...
## - attr(*, "academic")= chr [1:5] "read" "write" "math" "science" ...
## - attr(*, "psychology")= chr [1:3] "locus_of_control" "self_concept" "motivation"
提取两组数据:
psych <- attr(psychademic, "psychology")
psych
## [1] "locus_of_control" "self_concept" "motivation"
academic <- attr(psychademic, "academic")
academic
## [1] "read" "write" "math" "science" "sex"
先使用ggpairs
展示每组内变量的相关性:
ggpairs(psychademic,columns = psych, title = "Psych 组内变量")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggpairs(psychademic, columns = academic, title = "Academic 组内变量")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
使用ggduo
可以展示两组间的变量关系
library(ggplot2)
ggduo(psychademic, psych, academic,
types = list(continuous = "smooth_lm"),
title = "Psych 和 Academic 组间变量关系",
xlab = "Psych",
ylab = "Academic"
)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
时间变量
library(ggplot2)
data(pigs)
pigs_dt <- pigs[-(2:3)]
pigs_dt$profit_group <- as.numeric(pigs_dt$profit > mean(pigs_dt$profit))
qplot(
time, value,
data = reshape::melt.data.frame(pigs_dt, "time"),
geom = c("smooth", "point")
) +
facet_grid(variable ~ ., scales = "free_y")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
回归分析
swiss <- datasets::swiss
# 增加分组
swiss$Residual <- seq_len(nrow(swiss))
# 计算残差
residuals <- lapply(swiss[2:6], function(x) {
summary(lm(Fertility ~ x, data = swiss))$residuals
})
# 计算残差最大最小值
y_range <- range(unlist(residuals))
# 自定义函数
lm_or_resid <- function(data, mapping, ..., line_color = "red", line_size = 1) {
if (as.character(mapping$y) != "Residual") {
return(ggally_smooth_lm(data, mapping, ...))
}
# 展示残差
resid_data <- data.frame(
x = data[[as.character(mapping$x)]],
y = residuals[[as.character(mapping$x)]]
)
ggplot(data = data, mapping = mapping) +
geom_hline(yintercept = 0, color = line_color, size = line_size) +
ylim(y_range) +
geom_point(data = resid_data, mapping = aes(x = x, y = y), ...)
}
# 画图
ggduo(
swiss,
2:6, c(1,7),
types = list(continuous = lm_or_resid)
)
以上就是今天的内容,希望对你有帮助哦!欢迎点赞、在看、关注、转发!
欢迎在评论区留言或直接添加我的微信!
欢迎关注我的公众号:医学和生信笔记
“医学和生信笔记 公众号主要分享:1.医学小知识、肛肠科小知识;2.R语言和Python相关的数据分析、可视化、机器学习等;3.生物信息学学习资料和自己的学习笔记!
往期精彩内容:
R语言tidy风格医学统计学02
R语言tidy风格医学统计学
R语言多个变量同时进行t检验、方差分析等