查看原文
其他

分类堆叠柱状图顺序排列及其添加合适条块标签

文涛聊科研 微生信生物 2022-05-08

堆叠柱状图顺序排列及其添加合适条块标签

Tao Wen

2019年1月11日

写在前面:人生嘛,不就是这样,总会有高兴和不高兴,积极和消沉嘛!即便晚上过成了白天,白天过成了晚上。但事情总会过去,有缺憾才完美。

基于phylosep对扩增子下游分析

R语言绘制门类水平堆叠柱状图

有些日子没有更新了,心里面惦记着啥时候推送两篇,今天终于逃课,坐在这里给大家送点干活; 

我也就不做过多的解释了,大家都明白

library("ggalluvial")
library("alluvial")
library("phyloseq")
library("ggplot2")
library("dplyr")
library("biomformat")
library("reshape2")
library("plyr")

基于phylosep和dplyr对数据进行输入、预处理和整理

数据形式


# knitr::kable(#   Taxonomies1[1:10,],#   caption = "A knitr kable"# )

构造排序因子

#按照分组求和

by_cyl <- group_by(Taxonomies1, SampleType,Genus)   zhnagxu2 = dplyr :: summarise(by_cyl, sum(Abundance)) colnames(zhnagxu2) = c("group", "Genus","Abundance") head(zhnagxu2)
## # A tibble: 6 x 3 ##   group Genus            Abundance ##   <fct> <fct>                <dbl> ## 1 CF    Adhaeribacter        1.55 ## 2 CF    Alicyclobacillus     0.590 ## 3 CF    Aquicella            0.256 ## 4 CF    Azoarcus             0.546 ## 5 CF    Bacillus             6.82 ## 6 CF    Balneimonas          1.81##确定因子,这里通过求和按照从小到大的顺序得到因子##长变宽
Taxonomies2 = dcast(Taxonomies1,Genus ~ Sample,value.var = "Abundance") head(Taxonomies2)
##              Genus CF1.fastq CF4.fastq CF5.fastq CF6.fastq CK1.fastq ## 1    Adhaeribacter 0.4766949        NA 0.6363286 0.4375653 0.3774376 ## 2 Alicyclobacillus 0.3354520        NA        NA 0.2547022        NA ## 3        Aquicella 0.2560028        NA        NA        NA        NA ## 4       Arenimonas        NA        NA        NA        NA        NA ## 5         Azoarcus        NA 0.2843602        NA 0.2612330        NA ## 6         Bacillus 1.6596045 1.0663507 1.3883533 2.7037618 2.2698679
Taxonomies2[is.na(Taxonomies2)] <- 0aa = Taxonomies2# head(aa)n = ncol(aa)#增加一行,为整列的均值,计算每一列的均值,2就是表示列aa[n+1]=apply(aa[,c(2:ncol(aa))],1,sum) bb<- arrange(aa, V14) head(bb)##               Genus CF1.fastq CF4.fastq CF5.fastq CF6.fastq CK1.fastq ## 1         Aquicella 0.2560028         0         0         0 0.0000000 ## 2   Pseudidiomarina 0.0000000         0         0         0 0.2673516 ## 3 Janthinobacterium 0.0000000         0         0         0 0.2725938 ## 4        Variovorax 0.2913136         0         0         0 0.0000000 ## 5     Pedomicrobium 0.0000000         0         0         0 0.0000000 ## 6        Luteimonas 0.0000000         0         0         0 0.3355001
bb = bb[c(1,ncol(bb))] cc<- arrange(bb, desc(V14))# head(cc)

 因子排序变量查看

cc$Genus = as.character(cc$Genus) cc$Genus = as.factor(cc$Genus) cc$Genus

开始对作图分类水平进行按照平均丰度的排序

zhnagxu2$Genus = factor(zhnagxu2$Genus,order = T,levels = cc$Genus) zhnagxu3 = plyr::arrange(zhnagxu2,desc(Genus)) head(zhnagxu3)## # A tibble: 6 x 3 ##   group Genus             Abundance ##   <fct> <ord>                 <dbl> ## 1 CF    Aquicella             0.256 ## 2 CK    Pseudidiomarina       0.267 ## 3 CK    Janthinobacterium     0.273 ## 4 CF    Variovorax            0.291 ## 5 OF    Pedomicrobium         0.301 ## 6 CK    Luteimonas            0.336# ##制作标签坐标,标签位于顶端# Taxonomies_x = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance))# head(Taxonomies_x )#标签位于中部
Taxonomies_x = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance) - 0.5*Abundance) head(Taxonomies_x,20 )
##    group                 Genus Abundance    label_y ## 1     CF             Aquicella 0.2560028  0.1280014 ## 2     CF            Variovorax 0.2913136  0.4016596 ## 3     CF              Azoarcus 0.5455932  0.8201130 ## 4     CF      Alicyclobacillus 0.5901542  1.3879867 ## 5     CF              Opitutus 0.6431613  2.0046444 ## 6     CF     Pseudoxanthomonas 0.3707627  2.5116064 ## 7     CF           Pontibacter 0.5339744  2.9639750 ## 8     CF            Rubrivivax 0.3317536  3.3968389 ## 9     CF          Fimbriimonas 0.7129691  3.9192003 ## 10    CF           Rhodobacter 0.5965841  4.5739768 ## 11    CF        Hydrogenophaga 0.7701422  5.2573400 ## 12    CF Candidatus Solibacter 0.2603162  5.7725692 ## 13    CF               Thauera 0.2606635  6.0330590 ## 14    CF           Skermanella 0.6073668  6.4670742 ## 15    CF       Flavisolibacter 1.4495684  7.4955417 ## 16    CF               Devosia 1.5499034  8.9952776 ## 17    CF             Geobacter 0.8809867 10.2107227 ## 18    CF         Adhaeribacter 1.5505888 11.4265104 ## 19    CF           Balneimonas 1.8123620 13.1079858 ## 20    CF           Sphingobium 0.9007005 14.4645170

添加标签 按照堆叠柱子的宽度进行选择是否需要在柱子上添加标签

Taxonomies_x$label = Taxonomies_x$Genus#使用循环将堆叠柱状图柱子比较窄的别写标签,仅仅宽柱子写上标签
for(i in 1:nrow(Taxonomies_x)){  if(Taxonomies_x[i,3] > 5){    Taxonomies_x[i,5] = Taxonomies_x[i,5]  }else{    Taxonomies_x[i,5] = NA  } }

定义需要所需要的颜色

出图

##普通柱状图
p = china_barplots <- ggplot(Taxonomies_x , aes(x =  group, y = Abundance, fill = Genus, order = Genus)) +  geom_bar(stat = "identity",width = 0.5) +  scale_fill_manual(values = Phylum_colors) +  theme(axis.title.x = element_blank()) +  theme(legend.text=element_text(size=6)) +  scale_y_continuous(name = "Abundance (%)")+  geom_text(aes(y = label_y, label = label ),size = 4) print(china_barplots)
## Warning: Removed 81 rows containing missing values (geom_text).

p =p+theme_bw()+  scale_y_continuous(expand = c(0,0))+  #geom_hline(aes(yintercept=0), colour="black", linetype=2) +  #geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +  #scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+  theme(        panel.grid.major=element_blank(),    panel.grid.minor=element_blank(),        plot.title = element_text(vjust = -8.5,hjust = 0.1),    axis.title.y =element_text(size = 20,face = "bold",colour = "black"),    axis.title.x =element_text(size = 24,face = "bold",colour = "black"),    axis.text = element_text(size = 20,face = "bold"),    axis.text.x = element_text(colour = "black",size = 14),    axis.text.y = element_text(colour = "black",size = 14),    legend.text = element_text(size = 10,face = "bold")    #legend.position = "none"#是否删除图例      ) ## Scale for 'y' is already present. Adding another scale for 'y', which ## will replace the existing scale.p## Warning: Removed 81 rows containing missing values (geom_text).

#ggsave("./result_and_script/a4_分门别类冲击图/属水平水平柱状图.pdf", p, width = 10, height =8 )

下面开始加上分组之间的连线,方便我们进行横向比较

参考的是冲击图,这里我遇到的困难是如何设置连线流动的映射, 我是人工构建的一套流动映射,可能不是最好的解决方案,好的 一点无序改动,跑就行了。

##柱状图冲击图#stratum定义堆叠柱状图柱子内容,以weight定义柱子长度,alluvium定义连head(Taxonomies_x )##   group             Genus Abundance   label_y label ## 1    CF         Aquicella 0.2560028 0.1280014  <NA> ## 2    CF        Variovorax 0.2913136 0.4016596  <NA> ## 3    CF          Azoarcus 0.5455932 0.8201130  <NA> ## 4    CF  Alicyclobacillus 0.5901542 1.3879867  <NA> ## 5    CF          Opitutus 0.6431613 2.0046444  <NA> ## 6    CF Pseudoxanthomonas 0.3707627 2.5116064  <NA>cs = Taxonomies_x $Genus cs1 = cs#提取真正的因子的数量
lengthfactor = length(levels(cs1))#提取每个因子对应的数量cs3 = summary (as.factor(cs1)) cs4 = as.data.frame(cs3) cs4$id = row.names(cs4)#对因子进行排序
df_arrange<- arrange(cs4, id)#对Taxonomies_x 对应的列进行排序
Taxonomies_x1<- arrange(Taxonomies_x , Genus) head(Taxonomies_x1)
##   group        Genus Abundance  label_y        label ## 1    CF Kaistobacter 16.512434 66.34180 Kaistobacter ## 2    CK Kaistobacter 14.791761 66.46900 Kaistobacter ## 3    OF Kaistobacter 14.228975 67.11143 Kaistobacter ## 4    CF   Nitrospira 11.613095 52.27904   Nitrospira ## 5    CK   Nitrospira  9.342698 54.40177   Nitrospira ## 6    OF   Nitrospira 10.635413 54.67924   Nitrospira#构建flow的映射列Taxonomies_x
Taxonomies_x1$ID = factor(rep(c(1:lengthfactor), cs4$cs3))#colour = "black",size = 2,,aes(color = "black",size = 0.8)p2 = ggplot(Taxonomies_x1,       aes(x = group, stratum = Genus, alluvium = ID,           weight = Abundance,           fill = Genus, label = Genus)) +  geom_flow(stat = "alluvium", lode.guidance = "rightleft",            color = "black",size = 0.2,width = 0.45,alpha = .2) +  geom_bar(width = 0.45)+  geom_stratum(width = 0.45,size = 0.1) +  #geom_text(stat = "stratum", size = 3) +  #theme(legend.position = "none") +  scale_fill_manual(values = Phylum_colors)+  #ggtitle("fow_plot")+  #scale_x_discrete(limits = c("CK1","CK3","CK5","CK7","CK9","CK11","CK13","CK15","CK17","CK19"))+  geom_text(aes(y = label_y, label = label ),size = 4)+  labs(x="group",       y="Relative abundancce (%)",       title="") p2 =p2+theme_bw()+  scale_y_continuous(expand = c(0,0))+  #geom_hline(aes(yintercept=0), colour="black", linetype=2) +  #geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +  #scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+  theme(        panel.grid.major=element_blank(),    panel.grid.minor=element_blank(),        plot.title = element_text(vjust = -8.5,hjust = 0.1),    axis.title.y =element_text(size = 20,face = "bold",colour = "black"),    axis.title.x =element_text(size = 24,face = "bold",colour = "black"),    axis.text = element_text(size = 20,face = "bold"),    axis.text.x = element_text(colour = "black",size = 14),    axis.text.y = element_text(colour = "black",size = 14),    legend.text = element_text(size = 10,face = "bold")    #legend.position = "none"#是否删除图例      ) p2


#ggsave("./result_and_script/a4_/flow_plot_bar.pdf", p2, width = 10, height =8 )


您可能也对以下帖子感兴趣

文章有问题?点此查看未经处理的缓存