其他
箱线图进行方差分析并添加显著性标记
欢迎关注R语言数据分析指南
❝本节来介绍如何「在计算多样性指数的基础上来进行显著性标记」;数据及代码已经上传小编的VIP群,
❞有需要的观众老爷欢迎加入小编的VIP群
,目前已经上传「公众号文档数据+代码约170余篇」,扫描文末尾二维码加小编微信「付费99元」后邀请进群,「由于群名额有限人满之后将不在添加新成员」,有需要的请尽早加入,早进早享受,如果对加群没兴趣的观众老爷可在文末找到获取数据的方式
加载R包
library(tidyverse)
library(vegan)
library(magrittr)
library(multcompView)
导入数据
alpha <- read.delim("otu_taxa_table-2.xls",sep="\t",row.names = 1) %>%
t() %>% as.data.frame()
group <- read_tsv("group.xls") %>% set_colnames(c("sample","group"))
定义函数计算多样性指数
alpha_diversity <- function(x,y) {
Shannon <- diversity(x, index = 'shannon')
Simpson <- diversity(x, index = 'simpson')
observed_species <- specnumber(x)
Chao1 <- estimateR(x)[2,]
ACE <- estimateR(x)[4,]
pielou <- diversity(x,index = "shannon")/log(specnumber(x),exp(1))
result <- data.frame(Shannon,Simpson,observed_species,Chao1,ACE,pielou) %>%
rownames_to_column("sample") %>%
left_join(.,y,by="sample")
return(result)
}
数据整理
df <- alpha_diversity(alpha,group) %>% select(-sample,-observed_species,-Simpson) %>%
pivot_longer(-group)
定义颜色
col <- c("#1F78B4","#33A02C","#FB9A99","#E31A1C","#FDBF6F","#B2DF8A",
"#A6CEE3","#BA7A70","#9D4E3F","#829BAB")
❝上面这些基本是上一篇文档的内容为了文档结构的完整,将其放置于此;那么接下来就是本文的重点内容多组之间进行方差分析添加显著性标记
❞
方差分析
p <- split(df,list(df$name))
aov_data <- data.frame()
str(p)
for(i in 1:4) {
anova <- aov(value ~ group,data=p[i] %>% as.data.frame() %>%
set_colnames(c("group","name","value")))
Tukey <- TukeyHSD(anova)
cld <- multcompLetters4(anova,Tukey)
dt <- p[i] %>% as.data.frame() %>%
set_colnames(c("group","name","value")) %>%
group_by(group,name) %>%
summarise(value_mean=mean(value),sd=sd(value)) %>%
ungroup() %>%
arrange(desc(value_mean)) %>%
as.data.frame()
cld <- as.data.frame.list(cld$`group`)
dt$Tukey <- cld$Letters
aov_data <- rbind(aov_data,dt)
}
构建显著性标记数据集
df2 <- df %>% arrange(name) %>% left_join(.,aov_data,by=c("group","name"))
text <- df2 %>% group_by(group,name) %>% summarise(max(value)) %>% arrange(name) %>% ungroup() %>%
set_colnames(c("group","name","value")) %>%
left_join(.,df2 %>% select(1,2,6),by=c("group","name")) %>% distinct() %>%
mutate(value=case_when(name =="ACE" ~ value+90,
name =="Chao1" ~ value+90,
name =="pielou" ~ value +0.008,
name =="Shannon" ~ value+0.065))
❝由于循环构建的为条形图的数据,但显著性标记是不区分图形的因此在此通过上面的代码构建箱线图的数据,由于还存在离群值因此做了过多的处理,各位观众老爷细细品味
❞
定义绘图函数
make_plot <- function(data,x,y,z){
ggplot(data,aes(x={{x}},y={{y}},fill={{x}}))+
stat_boxplot(geom="errorbar",position=position_dodge(width=0.2),width=0.2)+
geom_boxplot(position=position_dodge(width =0.2),width=0.5,outlier.shape = NA)+
scale_fill_manual(values={{z}})+
facet_wrap(.~name,scales = "free")+
theme_bw()+
theme(panel.spacing.x = unit(0.2,"cm"),
panel.spacing.y = unit(0.1, "cm"),
axis.title = element_blank(),
strip.text.x = element_text(size=12,color="black"),
axis.text = element_text(color="black"),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
legend.position = "non",
plot.margin=unit(c(0.3,0.3,0.3,0.3),units=,"cm"))
}
数据可视化
make_plot(df,group,value,col)+
geom_text(data=text,aes(label=Tukey,y=value))
数据获取
❝可以看到过程还是比较繁琐的没有直接调用R包来的方便,但是通过此文你一定有所收获,「有需要获取本篇数据的欢迎转发此文档到朋友圈,30分钟后公众号后台截图给小编」,添加小编微信时「请备注一下个人信息及来意以便高效处理」,「当然更加推荐加入小编的VIP交流群」,绝让你体会到物超所值
❞
小编微信
关注下方公众号下回更新不迷路
往期推荐