画一幅更好看的杠铃图!
Try to learn everything about something!
今天学习一幅比较复杂的条形图(或者叫 杠铃图/哑铃图/dumbbell chart)的画法。
这种图形之前的推文中也模仿过,点击链接查看:
R语言画dumbbell chart
不过之前的比较简单,而且是用bbplot
中的geom_dumbbell
函数直接完成的,今天学习一个复杂的,直接用ggplot2
手撸!
使用的数据也是前几天的erasmus
数据,想要数据的小伙伴直接公众号后台回复20220317即可获得。
这个erasmus
是欧盟的一个交换生项目,有兴趣的小伙伴可以自己了解下。
“为了促进欧洲高等教育改革,欧盟委员会于2003年提出一个称为Erasmus Mundus的高等教育项目计划。该项目计划于2003年经欧洲议会和欧洲理事会批准通过。Erasmus Mundus 项目计划定位在“硕士”层次的高等教育交流,通过建立100个跨大学的“欧洲硕士专业”点和提供近上万个奖学金和访问学者名额的方法,吸引更多外国教师和学生在欧洲的大学学习,加强欧盟成员国大学之间的学术联系,提高欧洲高等教育的质量和竞争力,扩大欧洲高等教育在世界上的影响。该项目既面向欧洲学生,也面向第三国(欧洲以外)的留学生和访问学者。
这个数据主要是记录了最近几年这个项目的一些数据,比如哪些国家、每个国家每年派出去多少学生、每年接收多少学生等。
前几天使用这个数据画了一幅网络图,今天用另一种方式展示这个数据。
我们想看一下从2014-2015年间,到2019-2020年间,每个国家接收/派出的学生数量是增加还是减少?具体数量是多少?
加载数据和R包
library(tidyverse)
## -- Attaching packages ----------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts -------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(countrycode)
load(file = "erasmus.Rdata")
# readRDS("erasmus.rds")
简单查看下数据情况
dim(erasmus) # 164635行,24列,数据很大,转为csv会乱码
## [1] 164635 24
glimpse(erasmus)
## Rows: 164,635
## Columns: 24
## $ project_reference <chr> "2014-1-AT02-KA347-000139", "2014-~
## $ academic_year <chr> "2014-2015", "2014-2015", "2014-20~
## $ mobility_start_month <chr> "2014-11", "2014-11", "2014-11", "~
## $ mobility_end_month <chr> "2014-11", "2014-11", "2014-11", "~
## $ mobility_duration <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ activity_mob <chr> "National youth meetings", "Nation~
## $ field_of_education <chr> "? Unknown ?", "? Unknown ?", "? U~
## $ participant_nationality <chr> "AT", "AT", "AT", "AT", "AT", "AT"~
## $ education_level <chr> "??? - ? Unknown ?", "??? - ? Unkn~
## $ participant_gender <chr> "Female", "Female", "Female", "Mal~
## $ participant_profile <chr> "Learner", "Learner", "Learner", "~
## $ special_needs <chr> "No", "No", "No", "No", "No", "No"~
## $ fewer_opportunities <chr> "Yes", "Yes", "Yes", "Yes", "Yes",~
## $ group_leader <chr> "No", "No", "No", "No", "No", "No"~
## $ participant_age <dbl> 13, 14, 15, 14, 15, 15, 16, 17, 18~
## $ sending_country_code <chr> "AT", "AT", "AT", "AT", "AT", "AT"~
## $ sending_city <chr> "Dornbirn", "Dornbirn", "Dornbirn"~
## $ sending_organization <chr> "bOJA - Bundesweites Netzwerk Offe~
## $ sending_organisation_erasmus_code <chr> "-", "-", "-", "-", "-", "-", "-",~
## $ receiving_country_code <chr> "AT", "AT", "AT", "AT", "AT", "AT"~
## $ receiving_city <chr> "Dornbirn", "Dornbirn", "Dornbirn"~
## $ receiving_organization <chr> "bOJA - Bundesweites Netzwerk Offe~
## $ receiving_organisation_erasmus_code <chr> "-", "-", "-", "-", "-", "-", "-",~
## $ participants <dbl> 2, 3, 3, 4, 2, 2, 1, 3, 1, 2, 1, 2~
数据处理
我们需要分别计算2014-2015年、2019年-2020年,每个国家派出了多少学生,接受了多少学生。
# 先计算2019-2020年间的
# 计算派出的数量
sending_2020 <- erasmus %>%
filter(academic_year == "2019-2020") %>%
select(sending_country_code, academic_year) %>%
group_by(sending_country_code, academic_year) %>%
rename(country = sending_country_code) %>%
summarise(sending=n()) %>%
ungroup()
## `summarise()` has grouped output by 'country'. You can override using
## the `.groups` argument.
# 计算接收的数量
receiving_2020 <- erasmus %>%
filter(academic_year == "2019-2020") %>%
select(receiving_country_code, academic_year) %>%
group_by(receiving_country_code, academic_year) %>%
rename(country = receiving_country_code) %>%
summarise(receiving=n()) %>%
ungroup()
## `summarise()` has grouped output by 'country'. You can override using
## the `.groups` argument.
# 合并数据
data_2020 <- inner_join(sending_2020, receiving_2020) %>%
mutate(total_2020=sending+receiving) %>%
select(country, total_2020)
## Joining, by = c("country", "academic_year")
# 再计算2014-2015年间的
# 计算派出的数量
sending_2014 <- erasmus %>%
filter(academic_year == "2014-2015") %>%
select(sending_country_code, academic_year) %>%
group_by(sending_country_code, academic_year) %>%
rename(country = sending_country_code) %>%
summarise(sending=n()) %>%
ungroup()
## `summarise()` has grouped output by 'country'. You can override using
## the `.groups` argument.
# 计算接收的数量
receiving_2014 <- erasmus %>%
filter(academic_year == "2014-2015") %>%
select(receiving_country_code, academic_year) %>%
group_by(receiving_country_code, academic_year) %>%
rename(country = receiving_country_code) %>%
summarise(receiving=n()) %>%
ungroup()
## `summarise()` has grouped output by 'country'. You can override using
## the `.groups` argument.
# 合并数据
data_2014 <- inner_join(sending_2014, receiving_2014) %>%
mutate(total_2014=sending+receiving) %>%
select(country, total_2014)
## Joining, by = c("country", "academic_year")
# 把2个年间的数据合并
data_final<-inner_join(data_2014, data_2020, by='country') %>%
drop_na()
# 把国家代码变成国家名字,使用countrycode这个神奇R包
data_final <- data_final %>%
mutate(countryName = countrycode(data_final$country, origin='iso2c', destination = 'country.name')) %>%
# EL和UK手动转换一下
mutate(countryName = replace(countryName, country=='EL', 'Greece')) %>%
mutate(countryName = replace(countryName, country=='UK', 'United Kingdom'))
这样我们需要的数据基本就搞定了,接下来再计算一下总数、差数以及增加一个颜色映射列。
data_final <- data_final %>%
mutate(diff = total_2020 - total_2014) %>%
mutate(balance_category =
case_when(diff >= 0 ~ "Positive",
diff < 0 ~ "Negative")) %>%
mutate(sum_total = total_2014 + total_2020)
data_final
## # A tibble: 27 x 7
## country total_2014 total_2020 countryName diff balance_category sum_total
## <chr> <int> <int> <chr> <int> <chr> <int>
## 1 AT 175 1579 Austria 1404 Positive 1754
## 2 BG 287 1287 Bulgaria 1000 Positive 1574
## 3 CY 44 710 Cyprus 666 Positive 754
## 4 CZ 234 1105 Czechia 871 Positive 1339
## 5 DE 1227 6170 Germany 4943 Positive 7397
## 6 DK 36 278 Denmark 242 Positive 314
## 7 EE 631 1356 Estonia 725 Positive 1987
## 8 EL 139 1219 Greece 1080 Positive 1358
## 9 ES 613 4977 Spain 4364 Positive 5590
## 10 FR 1377 4720 France 3343 Positive 6097
## # ... with 17 more rows
画图
有了数据,画图不过就是使用正确的元素进行叠加图层,再调整一下细节。
不过这幅图确实细节很多,值的好好学习!
plot <- ggplot(data_final,
aes(y = reorder(countryName, sum_total), color=balance_category))+
# 画线,加粗,变颜色,这部分是灰色背景线
geom_segment(aes(x = -800, xend = total_2020, yend = countryName),
size = 5,
color = "grey70",
alpha = 0.5)+
# 淡蓝色线条
geom_segment(aes(x = total_2020, xend = total_2014, yend = countryName),
size = 5,
alpha =0.6)+
# 两边加点
geom_point(aes(x = total_2020), size = 4.5, alpha = 1)+
geom_point(aes(x = total_2014), size = 4.5, alpha = 1)+
xlim(-800, 7000)+
# 打标签,这一步非常重要,后面再去掉纵坐标!
geom_text(aes(label = countryName), x = -800, hjust = 0, size = 3.5, fontface = "bold")+
scale_color_manual(values = c("firebrick2", "steelblue4"), guide = "none")+
# 加点注释
labs(title = "ERASMUS项目参与人数明显上升",
subtitle = "2020年与2014年相比,大多数欧洲国家都派出/接收了更多的学生",
x = "派出或接收的学生数量",
caption = "数据来源: Data.Europe\n这幅图展示了2014-2015学年到2019-2020学年ERASMUS项目中每个国家派出/接收的学生数量",
y=NULL)+
# 自定义图例
geom_rect(xmax = 5800, ymax = 8, xmin = 3600, ymin = 4, alpha = 0.1, fill = "aliceblue", color = "steelblue4")+
geom_point(x = 4130, y = 7, size = 4.5, color = "steelblue4")+
geom_point(x = 4130, y = 6, size = 4.5, color = "firebrick2")+
geom_text(x = 4200, y = 7, label = "Increase", color = "steelblue4", hjust = 0, size = 3.5, fontface = "bold")+
geom_text(x = 4200, y = 6, label = "Decreaset", color = "firebrick2", hjust = 0, size = 3.5, fontface = "bold")+
geom_point(x = 3930, y = 5, size = 4.5, color = "grey30")+
geom_point(x = 4120, y = 5, size = 4.5, color = "grey30")+
geom_segment(x = 4120, xend = 3930, y = 5, yend = 5, color = "grey30", size = 5)+
geom_text(label = "Change from 2014 to 2020 ", y = 5, x = 4200, hjust = 0, color = "grey30")+
# 细节调整
theme(
panel.background = element_rect(fill = "aliceblue"),
plot.background = element_rect(fill = "aliceblue"),
panel.grid = element_blank(),
panel.grid.major.x = element_line(color = "steelblue2", linetype = "dashed"),
plot.title = element_text(size = 20, hjust = 0, color = "steelblue4", face = "bold"),
plot.subtitle = element_text(size = 14, color = "firebrick4", hjust = 0),
plot.caption = element_text(color = "firebrick4", hjust=0),
axis.line.x = element_line(color = "steelblue2", size = 0.5),
axis.text = element_text(size = 10, color = "steelblue4", face = "bold"),
axis.title = element_text(color = "steelblue4", face = "bold"),
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y= element_blank(),
axis.ticks.x = element_blank())
ggsave("1.jpg", plot = plot, heigh=8, width = 12, unit='in')
大功告成,出图如下:
这样一幅图是不是感觉非常高大上呢!
以上就是今天的内容,希望对你有帮助哦!欢迎点赞、在看、关注、转发!
欢迎在评论区留言或直接添加我的微信!
欢迎关注公众号:医学和生信笔记
“医学和生信笔记 公众号主要分享:1.医学小知识、肛肠科小知识;2.R语言和Python相关的数据分析、可视化、机器学习等;3.生物信息学学习资料和自己的学习笔记!
往期回顾
2022-03-17
2022-03-19
2022-03-18
2022-03-11
2021-12-31