其他
画一个好看的桑基图?
今天学习下桑基图的画法,对于多个有上下游关系或者从属关系的数据来说,桑基图是很好的表现形式。在我的日常工作中,我会把中药以及它们的四气五味等性质用这种图形表现出来,一些生信数据也会用到这种表现形式。
加载R包和数据
library(tidyverse)
## -- Attaching packages ----------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts -------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggalluvial)
“这是一份有关巧克力进口国、出口国信息的数据。数据来自于Flavors of Cacao by way of Georgios and Kelsey。一共10列,2530行。
chocolate <- readRDS("chocolate.rds")
glimpse(chocolate)
## Rows: 2,530
## Columns: 10
## $ ref <dbl> 2454, 2458, 2454, 2542, 2546, 2546, 2~
## $ company_manufacturer <chr> "5150", "5150", "5150", "5150", "5150~
## $ company_location <chr> "U.S.A.", "U.S.A.", "U.S.A.", "U.S.A.~
## $ review_date <dbl> 2019, 2019, 2019, 2021, 2021, 2021, 2~
## $ country_of_bean_origin <chr> "Tanzania", "Dominican Republic", "Ma~
## $ specific_bean_origin_or_bar_name <chr> "Kokoa Kamili, batch 1", "Zorzal, bat~
## $ cocoa_percent <chr> "76%", "76%", "76%", "68%", "72%", "8~
## $ ingredients <chr> "3- B,S,C", "3- B,S,C", "3- B,S,C", "~
## $ most_memorable_characteristics <chr> "rich cocoa, fatty, bready", "cocoa, ~
## $ rating <dbl> 3.25, 3.50, 3.75, 3.00, 3.00, 3.25, 3~
数据处理
数就是图,图就是数,想要好看的图,就要先准备好看的数据。数据真的非常重要。
# 计算出口国的数量
ranking_exports <- chocolate %>%
count(country_of_bean_origin) %>%
filter(country_of_bean_origin!="Blend") %>%
arrange(-n) %>%
top_n(5)
## Selecting by n
top_exports <- ranking_exports$country_of_bean_origin
# 计算进口国的数量
ranking_imports <- chocolate %>%
count(company_location) %>%
arrange(-n) %>%
top_n(5)
## Selecting by n
top_imports <- ranking_imports$company_location
# 计算生产商的数量
ranking_manu <- chocolate %>%
count(company_manufacturer) %>%
arrange(-n) %>%
top_n(10)
## Selecting by n
top_manu <- ranking_manu$company_manufacturer
# 筛选合适的数据
top_choco <- chocolate %>%
filter(company_location %in% top_imports) %>%
filter(country_of_bean_origin%in%top_exports) %>%
filter(company_manufacturer%in%top_manu) %>%
count(company_manufacturer,country_of_bean_origin, company_location) %>%
mutate(origin = "origin") %>%
rename(total = "n")
top_choco
## # A tibble: 44 x 5
## company_manufacturer country_of_bean_origin company_location total origin
## <chr> <chr> <chr> <int> <chr>
## 1 A. Morin Ecuador France 1 origin
## 2 A. Morin Madagascar France 1 origin
## 3 A. Morin Peru France 7 origin
## 4 A. Morin Venezuela France 5 origin
## 5 Arete Dominican Republic U.S.A. 2 origin
## 6 Arete Ecuador U.S.A. 4 origin
## 7 Arete Madagascar U.S.A. 1 origin
## 8 Arete Peru U.S.A. 3 origin
## 9 Bonnat Dominican Republic France 1 origin
## 10 Bonnat Ecuador France 1 origin
## # ... with 34 more rows
画图
# 选择合适的颜色
color_palette <- c("#8B9A46",
"#8B9A46",
"#8B9A46",
"#8B9A46",
"#541212",
"#8B9A46",
"#8B9A46",
"#8B9A46")
下面就是画图。
plot <- ggplot() +
geom_alluvium(data = top_choco,
aes(axis1 = country_of_bean_origin,
axis2 = company_location,
axis3 = company_manufacturer,
y = total,
fill = country_of_bean_origin),
curve_type = "quintic",
width = 1/12,
knot.pos = 0.4,
alpha= 0.7)+
geom_stratum(data = top_choco,
aes(axis1 = country_of_bean_origin,
axis2 = company_location,
axis3 = company_manufacturer,
y = total,
fill = country_of_bean_origin),
width = 1/6, color = "black", alpha=1, fill ="white",
linetype="dotted") +
scale_color_manual(values = color_palette) +
scale_fill_manual(values = color_palette) +
scale_x_continuous(breaks = 1:3,
labels = c("Origin", "Destination country", "Company"),
position = "bottom") +
geom_text(stat = "stratum",
data = top_choco,
aes(axis1 = country_of_bean_origin,
axis2 = company_location,
axis3 = company_manufacturer,
y = total,
label = after_stat(stratum)),
color = "black",
size = 3) +
theme_minimal() +
labs(y = "",
x="",
title = "可可到巧克力:从产地到店铺",
subtitle = "巧克力的流通路径:从产地到巧克力店铺以及生产商。\n从图中可以看出,委内瑞拉是最大的原产地,主要消费国家则是美国和法国。",
caption = "Data Source: Flavors of Cacao by way of Georgios and Kelsey."
)+
theme(
legend.position = "none",
plot.title = element_text(size = 20,
face = "bold"),
plot.subtitle = element_text(size=12),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(size = 10, face="bold")
)
以上就是今天的内容,希望对你有帮助哦!欢迎点赞、在看、关注、转发!
欢迎在评论区留言或直接添加我的微信!
欢迎关注公众号:医学和生信笔记
“医学和生信笔记 公众号主要分享:1.医学小知识、肛肠科小知识;2.R语言和Python相关的数据分析、可视化、机器学习等;3.生物信息学学习资料和自己的学习笔记!
往期回顾
2022-01-11
2022-01-12
2022-01-13
2022-01-14
2022-01-15