查看原文
其他

分类堆叠柱状图顺序排列及其添加合适条块标签

宏基因组 2023-08-18

The following article is from 微生信生物 Author 文涛聊科研

堆叠柱状图顺序排列及其添加合适条块标签

这篇文章详细解释了堆叠柱状图各种需求是如何得到的,冲击图是如何对应的,这是一条大家学习代码,而不是仅仅出图的推文。

写在前面:人生嘛,不就是这样,总会有高兴和不高兴,积极和消沉嘛!即便晚上过成了白天,白天过成了晚上。但事情总会过去,有缺憾才完美。

基于phylosep对扩增子下游分析

R语言绘制门类水平堆叠柱状图

有些日子没有更新了,心里面惦记着啥时候推送两篇,今天终于逃课,坐在这里给大家送点干活; 

我也就不做过多的解释了,大家都明白

library("ggalluvial")
library("alluvial")
library("phyloseq")
library("ggplot2")
library("dplyr")
library("biomformat")
library("reshape2")
library("plyr")

基于phylosep和dplyr对数据进行输入、预处理和整理

数据形式


# knitr::kable(#   Taxonomies1[1:10,],#   caption = "A knitr kable"# )

构造排序因子

#按照分组求和

by_cyl <- group_by(Taxonomies1, SampleType,Genus)  
zhnagxu2 = dplyr :: summarise(by_cyl, sum(Abundance))
colnames(zhnagxu2) = c("group", "Genus","Abundance")
head(zhnagxu2)
## # A tibble: 6 x 3
##   group Genus            Abundance
##   <fct> <fct>                <dbl>
## 1 CF    Adhaeribacter        1.55
## 2 CF    Alicyclobacillus     0.590
## 3 CF    Aquicella            0.256
## 4 CF    Azoarcus             0.546
## 5 CF    Bacillus             6.82
## 6 CF    Balneimonas          1.81
##确定因子,这里通过求和按照从小到大的顺序得到因子##长变宽
Taxonomies2 = dcast(Taxonomies1,Genus ~ Sample,value.var = "Abundance")
head(Taxonomies2)
##              Genus CF1.fastq CF4.fastq CF5.fastq CF6.fastq CK1.fastq
## 1    Adhaeribacter 0.4766949        NA 0.6363286 0.4375653 0.3774376
## 2 Alicyclobacillus 0.3354520        NA        NA 0.2547022        NA
## 3        Aquicella 0.2560028        NA        NA        NA        NA
## 4       Arenimonas        NA        NA        NA        NA        NA
## 5         Azoarcus        NA 0.2843602        NA 0.2612330        NA
## 6         Bacillus 1.6596045 1.0663507 1.3883533 2.7037618 2.2698679
Taxonomies2[is.na(Taxonomies2)] <- 0aa = Taxonomies2# head(aa)n = ncol(aa)#增加一行,为整列的均值,计算每一列的均值,2就是表示列aa[n+1]=apply(aa[,c(2:ncol(aa))],1,sum)
bb<- arrange(aa, V14)
head(bb)
##               Genus CF1.fastq CF4.fastq CF5.fastq CF6.fastq CK1.fastq
## 1         Aquicella 0.2560028         0         0         0 0.0000000
## 2   Pseudidiomarina 0.0000000         0         0         0 0.2673516
## 3 Janthinobacterium 0.0000000         0         0         0 0.2725938
## 4        Variovorax 0.2913136         0         0         0 0.0000000
## 5     Pedomicrobium 0.0000000         0         0         0 0.0000000
## 6        Luteimonas 0.0000000         0         0         0 0.3355001
bb = bb[c(1,ncol(bb))]
cc<- arrange(bb, desc(V14))# head(cc)

 因子排序变量查看

cc$Genus = as.character(cc$Genus)
cc$Genus = as.factor(cc$Genus)
cc$Genus

开始对作图分类水平进行按照平均丰度的排序

zhnagxu2$Genus = factor(zhnagxu2$Genus,order = T,levels = cc$Genus)
zhnagxu3 = plyr::arrange(zhnagxu2,desc(Genus))
head(zhnagxu3)
## # A tibble: 6 x 3
##   group Genus             Abundance
##   <fct> <ord>                 <dbl>
## 1 CF    Aquicella             0.256
## 2 CK    Pseudidiomarina       0.267
## 3 CK    Janthinobacterium     0.273
## 4 CF    Variovorax            0.291
## 5 OF    Pedomicrobium         0.301
## 6 CK    Luteimonas            0.336
# ##制作标签坐标,标签位于顶端# Taxonomies_x = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance))# head(Taxonomies_x )#标签位于中部
Taxonomies_x = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance) - 0.5*Abundance)
head(Taxonomies_x,20 )
##    group                 Genus Abundance    label_y
## 1     CF             Aquicella 0.2560028  0.1280014
## 2     CF            Variovorax 0.2913136  0.4016596
## 3     CF              Azoarcus 0.5455932  0.8201130
## 4     CF      Alicyclobacillus 0.5901542  1.3879867
## 5     CF              Opitutus 0.6431613  2.0046444
## 6     CF     Pseudoxanthomonas 0.3707627  2.5116064
## 7     CF           Pontibacter 0.5339744  2.9639750
## 8     CF            Rubrivivax 0.3317536  3.3968389
## 9     CF          Fimbriimonas 0.7129691  3.9192003
## 10    CF           Rhodobacter 0.5965841  4.5739768
## 11    CF        Hydrogenophaga 0.7701422  5.2573400
## 12    CF Candidatus Solibacter 0.2603162  5.7725692
## 13    CF               Thauera 0.2606635  6.0330590
## 14    CF           Skermanella 0.6073668  6.4670742
## 15    CF       Flavisolibacter 1.4495684  7.4955417
## 16    CF               Devosia 1.5499034  8.9952776
## 17    CF             Geobacter 0.8809867 10.2107227
## 18    CF         Adhaeribacter 1.5505888 11.4265104
## 19    CF           Balneimonas 1.8123620 13.1079858
## 20    CF           Sphingobium 0.9007005 14.4645170

添加标签 按照堆叠柱子的宽度进行选择是否需要在柱子上添加标签

Taxonomies_x$label = Taxonomies_x$Genus#使用循环将堆叠柱状图柱子比较窄的别写标签,仅仅宽柱子写上标签
for(i in 1:nrow(Taxonomies_x)){  if(Taxonomies_x[i,3] > 5){
   Taxonomies_x[i,5] = Taxonomies_x[i,5]
 }else{
   Taxonomies_x[i,5] = NA
 }
}

定义需要所需要的颜色

出图

##普通柱状图
p = china_barplots <- ggplot(Taxonomies_x , aes(x =  group, y = Abundance, fill = Genus, order = Genus)) +
 geom_bar(stat = "identity",width = 0.5) +
 scale_fill_manual(values = Phylum_colors) +
 theme(axis.title.x = element_blank()) +
 theme(legend.text=element_text(size=6)) +
 scale_y_continuous(name = "Abundance (%)")+
 geom_text(aes(y = label_y, label = label ),size = 4)
print(china_barplots)
## Warning: Removed 81 rows containing missing values (geom_text).

p =p+theme_bw()+
 scale_y_continuous(expand = c(0,0))+  #geom_hline(aes(yintercept=0), colour="black", linetype=2) +
 #geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +
 #scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+
 theme(
   
   panel.grid.major=element_blank(),
   panel.grid.minor=element_blank(),
   
   plot.title = element_text(vjust = -8.5,hjust = 0.1),
   axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
   axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
   axis.text = element_text(size = 20,face = "bold"),
   axis.text.x = element_text(colour = "black",size = 14),
   axis.text.y = element_text(colour = "black",size = 14),
   legend.text = element_text(size = 10,face = "bold")    #legend.position = "none"#是否删除图例
   
 )
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
p## Warning: Removed 81 rows containing missing values (geom_text).

#ggsave("./result_and_script/a4_分门别类冲击图/属水平水平柱状图.pdf", p, width = 10, height =8 )

下面开始加上分组之间的连线,方便我们进行横向比较

参考的是冲击图,这里我遇到的困难是如何设置连线流动的映射, 我是人工构建的一套流动映射,可能不是最好的解决方案,好的 一点无序改动,跑就行了。

##柱状图冲击图#stratum定义堆叠柱状图柱子内容,以weight定义柱子长度,alluvium定义连head(Taxonomies_x )##   group             Genus Abundance   label_y label
## 1    CF         Aquicella 0.2560028 0.1280014  <NA>
## 2    CF        Variovorax 0.2913136 0.4016596  <NA>
## 3    CF          Azoarcus 0.5455932 0.8201130  <NA>
## 4    CF  Alicyclobacillus 0.5901542 1.3879867  <NA>
## 5    CF          Opitutus 0.6431613 2.0046444  <NA>
## 6    CF Pseudoxanthomonas 0.3707627 2.5116064  <NA>
cs = Taxonomies_x $Genus
cs1 = cs#提取真正的因子的数量
lengthfactor = length(levels(cs1))#提取每个因子对应的数量cs3 = summary (as.factor(cs1))
cs4 = as.data.frame(cs3)
cs4$id = row.names(cs4)#对因子进行排序
df_arrange<- arrange(cs4, id)#对Taxonomies_x 对应的列进行排序
Taxonomies_x1<- arrange(Taxonomies_x , Genus)
head(Taxonomies_x1)
##   group        Genus Abundance  label_y        label
## 1    CF Kaistobacter 16.512434 66.34180 Kaistobacter
## 2    CK Kaistobacter 14.791761 66.46900 Kaistobacter
## 3    OF Kaistobacter 14.228975 67.11143 Kaistobacter
## 4    CF   Nitrospira 11.613095 52.27904   Nitrospira
## 5    CK   Nitrospira  9.342698 54.40177   Nitrospira
## 6    OF   Nitrospira 10.635413 54.67924   Nitrospira
#构建flow的映射列Taxonomies_x
Taxonomies_x1$ID = factor(rep(c(1:lengthfactor), cs4$cs3))#colour = "black",size = 2,,aes(color = "black",size = 0.8)p2 = ggplot(Taxonomies_x1,
      aes(x = group, stratum = Genus, alluvium = ID,
          weight = Abundance,
          fill = Genus, label = Genus)) +
 geom_flow(stat = "alluvium", lode.guidance = "rightleft",
           color = "black",size = 0.2,width = 0.45,alpha = .2) +
 geom_bar(width = 0.45)+
 geom_stratum(width = 0.45,size = 0.1) +  #geom_text(stat = "stratum", size = 3) +
 #theme(legend.position = "none") +
 scale_fill_manual(values = Phylum_colors)+  #ggtitle("fow_plot")+
 #scale_x_discrete(limits = c("CK1","CK3","CK5","CK7","CK9","CK11","CK13","CK15","CK17","CK19"))+
 geom_text(aes(y = label_y, label = label ),size = 4)+
 labs(x="group",
      y="Relative abundancce (%)",
      title="")
p2 =p2+theme_bw()+
 scale_y_continuous(expand = c(0,0))+  #geom_hline(aes(yintercept=0), colour="black", linetype=2) +
 #geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +
 #scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+
 theme(
   
   panel.grid.major=element_blank(),
   panel.grid.minor=element_blank(),
   
   plot.title = element_text(vjust = -8.5,hjust = 0.1),
   axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
   axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
   axis.text = element_text(size = 20,face = "bold"),
   axis.text.x = element_text(colour = "black",size = 14),
   axis.text.y = element_text(colour = "black",size = 14),
   legend.text = element_text(size = 10,face = "bold")    #legend.position = "none"#是否删除图例
   
 )
p2


#ggsave("./result_and_script/a4_/flow_plot_bar.pdf", p2, width = 10, height =8 )


您可能也对以下帖子感兴趣

文章有问题?点此查看未经处理的缓存