其他
R语言绘制带有显著性字母标记的柱状图
新
年
快
乐
Tao Wen
2019年1月6日
想想看,人生不觉得过了好多,事情还是需要简单的做。
library(tidyverse)
library(agricolae)
library(car)
library(reshape2)
需求
在很多时候,我们的需求其实很简单,做一个数据描述,方差分析和基本图形的展示; 在做方差分析之前,我们需要对数据进行正太检验,方差齐性检验。方差分析之后,我们做多重比较。
我们做正态检测
####数据导入
setwd("F:/#黄老师组会/第四次周日交流R语言/R语言入门")
data_wt = read.table("R语言入门.txt", header = T,
row.names= 1, sep="\t");head(data_wt,n = 10L)
## group Tn
## CK1 CK 1.10
## CK2 CK 1.11
## CK3 CK 1.20
## CK4 CK 1.02
## CK5 CK NA
## BOF1 BOF 2.00
## BOF2 BOF 2.10
## BOF3 BOF 2.01
## BOF4 BOF 2.03
## BOF5 BOF 3.03
##数据整理
colnames(data_wt) = c("group","TN")
head(data_wt)
## group TN
## CK1 CK 1.10
## CK2 CK 1.11
## CK3 CK 1.20
## CK4 CK 1.02
## CK5 CK NA
## BOF1 BOF 2.00
#data_wt$TN = as.numeric(data_wt$TN)#####数据数据分析与转换+可视化########是否符合正态分布#######
qqPlot(lm(data_wt$TN ~ data_wt$group, data=data_wt),
simulate=TRUE, main="Q-Q Plot", lables=FALSE)
## BOF1 BOF5
## 6 10
##正态检验
shapiro.test(data_wt$TN)
##
## Shapiro-Wilk normality test
##
## data: data_wt$TN
## W = 0.85895, p-value = 0.09338
#无论Bartlett检验还是Levene检验,两者的P值都大于0.05,#因此接受原假设:样本之间的方差是相同的。因此可以接着做方差分析了。# Bartlett检验
bartlett.test(data_wt$TN ~ data_wt$group, data=data_wt)
##
## Bartlett test of homogeneity of variances
##
## data: data_wt$TN by data_wt$group
## Bartlett's K-squared = 6.1377, df = 1, p-value = 0.01323
# Levene检验,对原始数据的正态性不敏感
leveneTest(data_wt$TN ~ data_wt$group, data=data_wt)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.6354 0.4516
## 7
#######方差检验model<-aov(TN ~ group, data=data_wt)
#进行多重比较,不矫正P值
out <- LSD.test(model,"group", p.adj="none")#结果显示:标记字母法out$group
## TN groups
## BOF 2.2340 a
## CK 1.1075 b
grou<- group_by(data_wt,group)
bar_data <- summarise(grou,sd(TN,na.rm = T))
bar_data2 = merge(bar_data ,out$group,by.x="group",by.y = "row.names",all = F)#colnames(bar_data2) = c("group", "SD","TN","label");bar_data2
## group SD TN label
## 1 BOF 0.4466878 2.2340 a
## 2 CK 0.0736546 1.1075 b
######画一张图看看
library("ggplot2")
a = max(bar_data2$TN)*1.5mi=c("#1B9E77" ,"#D95F02", "#7570B3","#E7298A")
p = ggplot(bar_data2 , aes(x = group, y = TN)) +
geom_bar(stat = "identity", width = 0.4,position = "dodge",colour="black",fill="#E6AB02") +
geom_errorbar(aes(ymin=TN - SD,
ymax=TN + SD),
colour="black",width=0.1,size=1)+
geom_text(aes(label = label ,y= TN + SD, x = group),vjust = -0.3,size = 6)+
scale_y_continuous(expand = c(0,0),limits = c(0,a))+
labs(x="TN of all group",
y="group",
title = "")
p
p=p+theme_bw()+
geom_hline(aes(yintercept=mean(TN + SD)), colour="black", linetype=2) +
geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +
scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+
theme(
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.title = element_text(vjust = -8.5,hjust = 0.1),
axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
axis.text = element_text(size = 20,face = "bold"),
axis.text.x = element_text(colour = "black",size = 14),
axis.text.y = element_text(colour = "black",size = 14),
legend.text = element_text(size = 15,face = "bold") #legend.position = "none"#是否删除图例
)
p
整个过程流程化,我们编写一个脚本
我们编号之后,可以在几秒钟得到结果。
setwd("F:/#文涛分析技术/a2R语言技术/R语言脚本学习合集/理化数据得到及其基本统计分析")
# 读入实验设计
data_wt = read.table("理化数据示范.txt", header=T, row.names= 1, sep="\t");head(data_wt)
## group TN TP TK SOC TOC
## CK1 CK 1.10 11.0 5.50 12 120
## CK2 CK 1.11 11.1 5.55 13 130
## CK3 CK 1.20 12.0 6.00 11 110
## CK4 CK 1.02 10.2 5.10 14 NA
## CK5 CK NA 20.2 10.10 15 NA
## BOF1 BOF 2.00 20.0 10.00 5 50
head(data_wt)
## group TN TP TK SOC TOC
## CK1 CK 1.10 11.0 5.50 12 120
## CK2 CK 1.11 11.1 5.55 13 130
## CK3 CK 1.20 12.0 6.00 11 110
## CK4 CK 1.02 10.2 5.10 14 NA
## CK5 CK NA 20.2 10.10 15 NA
## BOF1 BOF 2.00 20.0 10.00 5 50
FileName <- paste("all_Q_Q_正态分布图", ".pdf", sep = "")
pdf(file=FileName)for (i in 2:ncol(data_wt)) {
opar = par(no.readonly = T)
par(mfrow = c(3,2)) for (i in 2:ncol(data_wt)) {
data_i = data_wt[i]
ee <-as.matrix(data_i)
dd <- as.vector(ee)
aoc = bartlett.test(dd ~ group, data=data_wt)
b = round(aoc[[3]],3)
name_i = colnames(data_wt[i])
p = paste("p: ",b,name_i, sep = "");p
qqPlot(lm(dd ~ group, data = data_wt),
simulate=TRUE, main="Q-Q Plot", lables=F,ylab= p)
}
par(opar)
}
dev.off()
## png
## 2
for (i in 2:ncol(data_wt)) {
data_i = data_wt[i]
ee <-as.matrix(data_i)
dd <- as.vector(ee)
name_i = colnames(data_wt[i])
model<-aov(dd ~ group, data=data_wt)#方差分析
out <- LSD.test(model,"group", p.adj="none")#进行多重比较,不矫正P值
aa = out$group#结果显示:标记字母法
aa$group = row.names(aa)
a = max(aa$dd)*1.2
aa
wen1 = as.data.frame(tapply(dd,data_wt$group,mean,na.rm=TRUE))
wen2 = as.data.frame(tapply(dd,data_wt$group,sd,na.rm=TRUE))
went = cbind(wen1,wen2)
wentao = merge(aa,went, by="row.names",all=F)
colnames(wentao) = c(colnames(wentao[1:4]),"mean" ,"SD")
aa = mutate(wentao, ymin = mean - SD, ymax = mean + SD)
mi=c("#1B9E77" ,"#D95F02", "#7570B3","#E7298A")
p = ggplot(aa , aes(x = group, y = dd)) +
geom_bar(stat = "identity", width = 0.4,position = "dodge",colour="black",fill="#E6AB02") +
geom_text(aes(label = groups,y=ymax, x = group,vjust = -0.3,size = 6))+
geom_errorbar(aes(ymin=ymin,
ymax=ymax),
colour="black",width=0.1,size = 1)+
scale_y_continuous(expand = c(0,0),limits = c(0,a))+
labs(x=paste(name_i,"of all group", sep = "_"),
y="group",
title = "")
p=p+theme_bw()+
geom_hline(aes(yintercept=mean(dd)), colour="black", linetype=2) +
geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +
scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+
theme(
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.title = element_text(vjust = -8.5,hjust = 0.1),
axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
axis.text = element_text(size = 20,face = "bold"),
axis.text.x = element_text(colour = "black",size = 14),
axis.text.y = element_text(colour = "black",size = 14),
legend.text = element_text(size = 15,face = "bold"),
legend.position = "none"#是否删除图例
)
p
FileName <- paste(name_i," 字母带bar值柱状图", ".pdf", sep = "_")
ggsave(FileName, p, width = 8, height = 8)
}
dev.off()
## null device
## 1
p
##其他方法# Levene检验,对原始数据的正态性不敏感
leveneTest(TN ~ group, data=data_wt)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.5092 0.6823
## 14
#无论Bartlett检验还是Levene检验,两者的P值都大于0.05,因此接受原假设:样本之间的方差是相同的。因此可以接着做方差分析了。#方差分析,这里缺失值自动忽略,不需处理
model<-aov(TN ~ group, data=data_wt)#进行多重比较,不矫正P值out <- LSD.test(model,"group", p.adj="none")#结果显示:标记字母法out$group
## TN groups
## OF 3.3400 a
## BOF 2.2340 b
## CF 1.9750 b
## CK 1.1075 c