查看原文
其他

使用R实现批量方差分析(aov)和多重比较(LSD)

文涛聊科研 微生信生物 2022-05-08
点击蓝字关注我们


Tao Wen

2019年1月5日





引子

由我的工作来看,coding存在的意义就是无序重复机械的分析过程,让自己的时间集中在科学问题上,然而实现这个过程却花费了更多的?时间。 我们可以轻松的使用各种软件实现方差检验和多重比较。但是在同时对几百甚至几千组数据做方差分析和多重比较的时候,coding的优势就提体现出来了。


全部代码
##清空内存rm(list=ls()) #载入包
library("vegan")
library(dplyr)
library(tidyverse)
library(agricolae)
library(car)
#修改工作目录
setwd("F:/")
##############全部峰值数据来做统计#########去除磷酸之后进行分析
RET_all = read.delim("./Data_processing/a0_RET_缺失值处理clean_root_exudates.txt", header=T, row.names= 1,check.names=F) head(RET_all)
##            CK2       CK5       CK3       CK1      CK4       RS1       RS2 ## RE1  26632.985 25224.034  30596.48  29812.76 25972.92 225065.33  75659.57 ## RE2 118030.479 60185.801 212033.07 105679.97 94223.07      0.00      0.00 ## RE3  92672.041 65451.785  96917.73 100051.53 77912.07 689247.38 232806.81 ## RE4  22620.908  4666.435  18808.13   9232.97 22456.49  11528.64  11517.85 ## RE5  22688.856 14983.546  17679.77  16747.14 16299.52  18650.75  19104.47 ## RE6   9271.438  8784.055  12884.40  10313.30 10313.30      0.00      0.00 ##            RS3        RS5      RS4        B1        B2        B3       B4 ## RE1  31002.659  31484.553 26169.62  30109.59 31586.178  28028.22 30081.14 ## RE2      0.000      0.000     0.00      0.00     0.000      0.00     0.00 ## RE3 107402.115 101363.214 83245.85 151057.74 99776.930 101816.86 90471.41 ## RE4   4962.333   3686.371 14990.28   5080.92  8451.872  27181.51 23715.11 ## RE5  21119.811  14020.793 20534.05  18634.03 20593.487  23398.10 13077.19 ## RE6      0.000      0.000     0.00      0.00     0.000      0.00     0.00 ##            B5                                      compound ## RE1 28979.445                            (-)-Dihydrocarveol ## RE2     0.000 (2R,3S)-2-hydroxy-3-isopropylbutanedioic acid ## RE3 74380.312                        1,4-Cyclohexanedione 2 ## RE4  6579.112             11-beta-prostaglandin-F-2-alpha 1 ## RE5 17367.560                                 1-Hexadecanol ## RE6     0.000                     2,3-Dimethylsuccinic acidRET = RET_all[1:dim(RET_all)[2]-1] head(RET)


#导入需要的作图文件design = read.table("./Data_processing/mapping_RET_all.txt", header=T, row.names= 1, sep="\t") head(design)##     group ## CK5    CK ## CK4    CK ## CK3    CK ## CK2    CK ## CK1    CK ## B5      Bdesign$ID = row.names(design)# 过滤数据并排序idx = rownames(design) %in% colnames(RET) sub_design = design[idx,] count = RET[ ,rownames(sub_design)] head(count)


comp_all = dim(count) head(count)# 转换原始数据为百分比,norm = t(t(count)/colSums(count,na=T)) * 100 # normalization to total 100head(norm)##             CK5         CK4         CK3         CK2         CK1 ## RE1 0.005549065 0.013885418 0.011357306 0.009472478 0.011338921 ## RE2 0.013240346 0.050372725 0.078705924 0.041979565 0.040194100 ## RE3 0.014398817 0.041652675 0.035975518 0.032960401 0.038053388 ## RE4 0.001026575 0.012005496 0.006981510 0.008045514 0.003511648 ## RE5 0.003296248 0.008713910 0.006562666 0.008069681 0.006369571 ## RE6 0.001932415 0.005513607 0.004782645 0.003297546 0.003922539 ##              B5          B4          B3          B2          B1 ## RE1 0.006645724 0.011490223 0.008876583 0.007449352 0.011995785 ## RE2 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 ## RE3 0.017057299 0.034557751 0.032245560 0.023531604 0.060182030 ## RE4 0.001508758 0.009058563 0.008608428 0.001993307 0.002024260 ## RE5 0.003982824 0.004995150 0.007410215 0.004856812 0.007423875 ## RE6 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 ##             RS5         RS4         RS3         RS2         RS1 ## RE1 0.009075086 0.005696559 0.012207578 0.022859290 0.066628198 ## RE2 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 ## RE3 0.029216864 0.018120817 0.042290558 0.070338732 0.204044354 ## RE4 0.001062557 0.003263060 0.001953964 0.003479927 0.003412932 ## RE5 0.004041344 0.004469817 0.008316117 0.005772101 0.005521356 ## RE6 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000norm=as.data.frame(norm) a=norm head(a)




Pvalue<-c(rep(0,nrow(a))) pB<-c(rep(0,nrow(a))) pCK<-c(rep(0,nrow(a))) pRS<-c(rep(0,nrow(a))) str(pCK)##  num [1:604] 0 0 0 0 0 0 0 0 0 0 ...



# 2~4列是处理组1,5~7列是处理组2;#将使用循环对每一行进行方差检验#两组表达量都是0的基因,不检验;#每一行计算得到p value和log2FC将被加入原文件的后两列;#计算log2FC时,每组均值加0.001,是为了防止分母为0导致bug;# i= 2# ab = as.numeric(a[i,1:15])# b = sub_design$group# aa = data.frame(ab,b)# y=aov(ab~b,data = aa)# Pvalue[i]<-summary(y)[[1]][,5][1]# out <- LSD.test(y,"b", p.adj="none")# bb = as.data.frame(out$group)# bb$ID = row.names(bb)# bb<- arrange(bb, row.names(bb))# pB = bb[1,2]# pCK = bb[2,2]# pRS = bb[3,2]for(i in 1:nrow(a)){  if(sum(a[i,1:5])==0&&sum(a[i,6:10])==0&&sum(a[i,11:15])==0){    Pvalue[i] <- "NA"  }else{    ab = as.numeric(a[i,1:15])    b = sub_design$group    aa = data.frame(ab,b)    y=aov(ab~b,data = aa)    Pvalue[i]<-summary(y)[[1]][,5][1]    out <- LSD.test(y,"b", p.adj="none")    bb = as.data.frame(out$group)    bb$ID = row.names(bb)    bb<- arrange(bb, row.names(bb))    pCK[i] = bb$groups[2]    pB[i] = as.character(bb[1,2])    pCK[i] = as.character(bb[2,2])    pRS[i] = as.character(bb[3,2])    pRS[i];pCK[i];pB[i]    # Dunnett<-glht(y, linfct = mcp(type = "Dunnett"),alternative = "two.side")    # out <- LSD.test(y,sub_design$group, p.adj="none")#进行多重比较,不矫正P值    # tuk=TukeyHSD(y)    # out <-scheffe.test (y,"group")      } }# Pvalueout<-cbind(a,Pvalue, pB,pCK,pRS) head(out)##             CK5         CK4         CK3         CK2         CK1 ## RE1 0.005549065 0.013885418 0.011357306 0.009472478 0.011338921 ## RE2 0.013240346 0.050372725 0.078705924 0.041979565 0.040194100 ## RE3 0.014398817 0.041652675 0.035975518 0.032960401 0.038053388 ## RE4 0.001026575 0.012005496 0.006981510 0.008045514 0.003511648 ## RE5 0.003296248 0.008713910 0.006562666 0.008069681 0.006369571 ## RE6 0.001932415 0.005513607 0.004782645 0.003297546 0.003922539 ##              B5          B4          B3          B2          B1 ## RE1 0.006645724 0.011490223 0.008876583 0.007449352 0.011995785 ## RE2 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 ## RE3 0.017057299 0.034557751 0.032245560 0.023531604 0.060182030 ## RE4 0.001508758 0.009058563 0.008608428 0.001993307 0.002024260 ## RE5 0.003982824 0.004995150 0.007410215 0.004856812 0.007423875 ## RE6 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 ##             RS5         RS4         RS3         RS2         RS1 ## RE1 0.009075086 0.005696559 0.012207578 0.022859290 0.066628198 ## RE2 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 ## RE3 0.029216864 0.018120817 0.042290558 0.070338732 0.204044354 ## RE4 0.001062557 0.003263060 0.001953964 0.003479927 0.003412932 ## RE5 0.004041344 0.004469817 0.008316117 0.005772101 0.005521356 ## RE6 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 ##           Pvalue pB pCK pRS ## RE1 2.796373e-01  a   a   a ## RE2 2.278410e-04  b   a   b ## RE3 3.121200e-01  a   a   a ## RE4 2.608948e-01  a   a   a ## RE5 6.511946e-01  a   a   a ## RE6 5.103709e-06  b   a   bRET_name = RET_all[dim(RET_all)[2]] head(RET_name)##                                          compound ## RE1                            (-)-Dihydrocarveol ## RE2 (2R,3S)-2-hydroxy-3-isopropylbutanedioic acid ## RE3                        1,4-Cyclohexanedione 2 ## RE4             11-beta-prostaglandin-F-2-alpha 1 ## RE5                                 1-Hexadecanol ## RE6                     2,3-Dimethylsuccinic acidrusult_out = merge(out ,RET_name, by="row.names",all=F) head(rusult_out)##   Row.names         CK5         CK4         CK3         CK2         CK1 ## 1       RE1 0.005549065 0.013885418 0.011357306 0.009472478 0.011338921 ## 2      RE10 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 ## 3     RE100 0.001650192 0.003067643 0.002522073 0.001038059 0.002182409 ## 4     RE101 0.006185346 0.017028862 0.029767109 0.012691608 0.072403533 ## 5     RE102 0.002229133 0.008797501 0.021920508 0.004811970 0.026383348 ## 6     RE103 0.001405335 0.002621616 0.005901619 0.002184498 0.011798804 ##            B5          B4          B3          B2           B1         RS5 ## 1 0.006645724 0.011490223 0.008876583 0.007449352 0.0119957851 0.009075086 ## 2 0.001245465 0.001468414 0.001217488 0.001201888 0.0004006671 0.001499867 ## 3 0.000000000 0.000000000 0.000000000 0.000000000 0.0000000000 0.002260963 ## 4 0.026783700 0.070971462 0.011183080 0.019668387 0.0648061792 0.015967222 ## 5 0.021370834 0.030246691 0.009708425 0.010392458 0.0244686738 0.008926288 ## 6 0.009331215 0.009423304 0.002036022 0.002954869 0.0090469287 0.004238941 ##           RS4         RS3         RS2          RS1       Pvalue pB pCK pRS ## 1 0.005696559 0.012207578 0.022859290 0.0666281983 2.796373e-01  a   a   a ## 2 0.001142296 0.001528522 0.001253334 0.0006690667 6.950709e-05  a   b   a ## 3 0.001242421 0.003830482 0.002939149 0.0046277758 5.597853e-04  b   a   a ## 4 0.007431094 0.023015901 0.019496753 0.0210254041 3.504130e-01  a   a   a ## 5 0.005726721 0.007780721 0.012159299 0.0048272448 1.341319e-01  a  ab   b ## 6 0.001370010 0.003022282 0.002371406 0.0034698880 2.604930e-01  a   a   a ##                                compound ## 1                    (-)-Dihydrocarveol ## 2 2-Amino-2-norbornanecarboxylic acid 3 ## 3                           Analyte 172 ## 4                           Analyte 177 ## 5                           Analyte 178 ## 6                           Analyte 179dim(rusult_out)## [1] 604  21write.table(rusult_out,"./sub_result_and_script_allgroup//a3_RET_compounds_t检验all结果.txt",quote = FALSE,row.names = T,            col.names = NA,sep = "\t")


学习永无止境,分享永不停歇




您可能也对以下帖子感兴趣

文章有问题?点此查看未经处理的缓存