查看原文
其他

R语言绘制带有显著性字母标记的柱状图

文涛聊科研 微生信生物 2022-05-08

Tao Wen

2019年1月6日

引子

想想看,人生不觉得过了好多,事情还是需要简单的做。

library(tidyverse)
library(agricolae)
library(car)
library(reshape2)

需求

在很多时候,我们的需求其实很简单,做一个数据描述,方差分析和基本图形的展示; 在做方差分析之前,我们需要对数据进行正太检验,方差齐性检验。方差分析之后,我们做多重比较。

我们做正态检测

####数据导入

setwd("F:/#黄老师组会/第四次周日交流R语言/R语言入门")
data_wt = read.table("R语言入门.txt", header = T,                      row.names= 1, sep="\t");head(data_wt,n = 10L)
##      group   Tn ## CK1     CK 1.10 ## CK2     CK 1.11 ## CK3     CK 1.20 ## CK4     CK 1.02 ## CK5     CK   NA ## BOF1   BOF 2.00 ## BOF2   BOF 2.10 ## BOF3   BOF 2.01 ## BOF4   BOF 2.03 ## BOF5   BOF 3.03##数据整理
colnames(data_wt) = c("group","TN") head(data_wt)
##      group   TN ## CK1     CK 1.10 ## CK2     CK 1.11 ## CK3     CK 1.20 ## CK4     CK 1.02 ## CK5     CK   NA ## BOF1   BOF 2.00#data_wt$TN = as.numeric(data_wt$TN)#####数据数据分析与转换+可视化########是否符合正态分布#######

qqPlot(lm(data_wt$TN ~ data_wt$group, data=data_wt),       simulate=TRUE, main="Q-Q Plot", lables=FALSE)

## BOF1 BOF5 ##    6   10##正态检验

shapiro.test(data_wt$TN)
## ##  Shapiro-Wilk normality test ## ## data:  data_wt$TN ## W = 0.85895, p-value = 0.09338#无论Bartlett检验还是Levene检验,两者的P值都大于0.05,#因此接受原假设:样本之间的方差是相同的。因此可以接着做方差分析了。# Bartlett检验

bartlett.test(data_wt$TN ~ data_wt$group, data=data_wt)
## ##  Bartlett test of homogeneity of variances ## ## data:  data_wt$TN by data_wt$group ## Bartlett's K-squared = 6.1377, df = 1, p-value = 0.01323# Levene检验,对原始数据的正态性不敏感

leveneTest(data_wt$TN ~ data_wt$group, data=data_wt)
## Levene's Test for Homogeneity of Variance (center = median) ##       Df F value Pr(>F) ## group  1  0.6354 0.4516 ##        7#######方差检验model<-aov(TN ~ group, data=data_wt)
#进行多重比较,不矫正P值
out <- LSD.test(model,"group", p.adj="none")#结果显示:标记字母法out$group
##         TN groups ## BOF 2.2340      a ## CK  1.1075      bgrou<- group_by(data_wt,group) bar_data <- summarise(grou,sd(TN,na.rm = T)) bar_data2 = merge(bar_data ,out$group,by.x="group",by.y = "row.names",all = F)#colnames(bar_data2) = c("group", "SD","TN","label");bar_data2##   group        SD     TN label ## 1   BOF 0.4466878 2.2340     a ## 2    CK 0.0736546 1.1075     b######画一张图看看

library("ggplot2") a = max(bar_data2$TN)*1.5mi=c("#1B9E77" ,"#D95F02", "#7570B3","#E7298A") p = ggplot(bar_data2 , aes(x = group, y = TN)) +  geom_bar(stat = "identity", width = 0.4,position = "dodge",colour="black",fill="#E6AB02") +  geom_errorbar(aes(ymin=TN - SD,                    ymax=TN + SD),                colour="black",width=0.1,size=1)+  geom_text(aes(label = label ,y= TN + SD, x = group),vjust = -0.3,size = 6)+  scale_y_continuous(expand = c(0,0),limits = c(0,a))+  labs(x="TN of all group",       y="group",       title = "") p

p=p+theme_bw()+  geom_hline(aes(yintercept=mean(TN + SD)), colour="black", linetype=2) +  geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +  scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+  theme(        panel.grid.major=element_blank(),    panel.grid.minor=element_blank(),        plot.title = element_text(vjust = -8.5,hjust = 0.1),    axis.title.y =element_text(size = 20,face = "bold",colour = "black"),    axis.title.x =element_text(size = 24,face = "bold",colour = "black"),    axis.text = element_text(size = 20,face = "bold"),    axis.text.x = element_text(colour = "black",size = 14),    axis.text.y = element_text(colour = "black",size = 14),    legend.text = element_text(size = 15,face = "bold")    #legend.position = "none"#是否删除图例      ) p

整个过程流程化,我们编写一个脚本



我们编号之后,可以在几秒钟得到结果。


setwd("F:/#文涛分析技术/a2R语言技术/R语言脚本学习合集/理化数据得到及其基本统计分析")
# 读入实验设计
data_wt = read.table("理化数据示范.txt", header=T, row.names= 1, sep="\t");head(data_wt)
##      group   TN   TP    TK SOC TOC ## CK1     CK 1.10 11.0  5.50  12 120 ## CK2     CK 1.11 11.1  5.55  13 130 ## CK3     CK 1.20 12.0  6.00  11 110 ## CK4     CK 1.02 10.2  5.10  14  NA ## CK5     CK   NA 20.2 10.10  15  NA ## BOF1   BOF 2.00 20.0 10.00   5  50head(data_wt)##      group   TN   TP    TK SOC TOC ## CK1     CK 1.10 11.0  5.50  12 120 ## CK2     CK 1.11 11.1  5.55  13 130 ## CK3     CK 1.20 12.0  6.00  11 110 ## CK4     CK 1.02 10.2  5.10  14  NA ## CK5     CK   NA 20.2 10.10  15  NA ## BOF1   BOF 2.00 20.0 10.00   5  50FileName <- paste("all_Q_Q_正态分布图", ".pdf", sep = "") pdf(file=FileName)for (i in 2:ncol(data_wt)) {  opar = par(no.readonly = T)  par(mfrow = c(3,2))  for (i in 2:ncol(data_wt)) {    data_i = data_wt[i]    ee  <-as.matrix(data_i)    dd <- as.vector(ee)    aoc = bartlett.test(dd ~ group, data=data_wt)  b = round(aoc[[3]],3)    name_i = colnames(data_wt[i])    p = paste("p: ",b,name_i, sep = "");p    qqPlot(lm(dd ~ group, data = data_wt),           simulate=TRUE, main="Q-Q Plot", lables=F,ylab= p)  }  par(opar) } dev.off() ## png ##   2for (i in 2:ncol(data_wt)) {    data_i = data_wt[i]  ee    <-as.matrix(data_i)  dd <- as.vector(ee)  name_i = colnames(data_wt[i])  model<-aov(dd ~ group, data=data_wt)#方差分析  out <- LSD.test(model,"group", p.adj="none")#进行多重比较,不矫正P值  aa = out$group#结果显示:标记字母法  aa$group = row.names(aa)  a = max(aa$dd)*1.2  aa  wen1 = as.data.frame(tapply(dd,data_wt$group,mean,na.rm=TRUE))  wen2 = as.data.frame(tapply(dd,data_wt$group,sd,na.rm=TRUE))  went = cbind(wen1,wen2)  wentao = merge(aa,went, by="row.names",all=F)  colnames(wentao) = c(colnames(wentao[1:4]),"mean" ,"SD")  aa = mutate(wentao, ymin = mean - SD, ymax =  mean + SD)      mi=c("#1B9E77" ,"#D95F02", "#7570B3","#E7298A")  p = ggplot(aa , aes(x = group, y = dd)) +    geom_bar(stat = "identity", width = 0.4,position = "dodge",colour="black",fill="#E6AB02") +    geom_text(aes(label = groups,y=ymax, x = group,vjust = -0.3,size = 6))+    geom_errorbar(aes(ymin=ymin,                      ymax=ymax),                  colour="black",width=0.1,size = 1)+    scale_y_continuous(expand = c(0,0),limits = c(0,a))+    labs(x=paste(name_i,"of all group", sep = "_"),         y="group",         title = "")    p=p+theme_bw()+    geom_hline(aes(yintercept=mean(dd)), colour="black", linetype=2) +    geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +    scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+    theme(            panel.grid.major=element_blank(),      panel.grid.minor=element_blank(),            plot.title = element_text(vjust = -8.5,hjust = 0.1),      axis.title.y =element_text(size = 20,face = "bold",colour = "black"),      axis.title.x =element_text(size = 24,face = "bold",colour = "black"),      axis.text = element_text(size = 20,face = "bold"),      axis.text.x = element_text(colour = "black",size = 14),      axis.text.y = element_text(colour = "black",size = 14),      legend.text = element_text(size = 15,face = "bold"),      legend.position = "none"#是否删除图例          )  p  FileName <- paste(name_i," 字母带bar值柱状图", ".pdf", sep = "_")  ggsave(FileName, p, width = 8, height = 8)   } dev.off() ## null device ##           1p

##其他方法# Levene检验,对原始数据的正态性不敏感
leveneTest(TN ~ group, data=data_wt)
## Levene's Test for Homogeneity of Variance (center = median) ##       Df F value Pr(>F) ## group  3  0.5092 0.6823 ##       14#无论Bartlett检验还是Levene检验,两者的P值都大于0.05,因此接受原假设:样本之间的方差是相同的。因此可以接着做方差分析了。#方差分析,这里缺失值自动忽略,不需处理
model<-aov(TN ~ group, data=data_wt)#进行多重比较,不矫正P值out <- LSD.test(model,"group", p.adj="none")#结果显示:标记字母法out$group
##         TN groups ## OF  3.3400      a ## BOF 2.2340      b ## CF  1.9750      b ## CK  1.1075      c

您可能也对以下帖子感兴趣

文章有问题?点此查看未经处理的缓存