其他
分类堆叠柱状图顺序排列及其添加合适条块标签
堆叠柱状图顺序排列及其添加合适条块标签
Tao Wen
2019年1月11日
写在前面:人生嘛,不就是这样,总会有高兴和不高兴,积极和消沉嘛!即便晚上过成了白天,白天过成了晚上。但事情总会过去,有缺憾才完美。
基于phylosep对扩增子下游分析
R语言绘制门类水平堆叠柱状图
有些日子没有更新了,心里面惦记着啥时候推送两篇,今天终于逃课,坐在这里给大家送点干活;
我也就不做过多的解释了,大家都明白
library("ggalluvial")
library("alluvial")
library("phyloseq")
library("ggplot2")
library("dplyr")
library("biomformat")
library("reshape2")
library("plyr")
基于phylosep和dplyr对数据进行输入、预处理和整理
数据形式
# knitr::kable(# Taxonomies1[1:10,],# caption = "A knitr kable"# )
构造排序因子
#按照分组求和
by_cyl <- group_by(Taxonomies1, SampleType,Genus)
zhnagxu2 = dplyr :: summarise(by_cyl, sum(Abundance))
colnames(zhnagxu2) = c("group", "Genus","Abundance")
head(zhnagxu2)
## # A tibble: 6 x 3
## group Genus Abundance
## <fct> <fct> <dbl>
## 1 CF Adhaeribacter 1.55
## 2 CF Alicyclobacillus 0.590
## 3 CF Aquicella 0.256
## 4 CF Azoarcus 0.546
## 5 CF Bacillus 6.82
## 6 CF Balneimonas 1.81
##确定因子,这里通过求和按照从小到大的顺序得到因子##长变宽
Taxonomies2 = dcast(Taxonomies1,Genus ~ Sample,value.var = "Abundance")
head(Taxonomies2)
## Genus CF1.fastq CF4.fastq CF5.fastq CF6.fastq CK1.fastq
## 1 Adhaeribacter 0.4766949 NA 0.6363286 0.4375653 0.3774376
## 2 Alicyclobacillus 0.3354520 NA NA 0.2547022 NA
## 3 Aquicella 0.2560028 NA NA NA NA
## 4 Arenimonas NA NA NA NA NA
## 5 Azoarcus NA 0.2843602 NA 0.2612330 NA
## 6 Bacillus 1.6596045 1.0663507 1.3883533 2.7037618 2.2698679
Taxonomies2[is.na(Taxonomies2)] <- 0aa = Taxonomies2# head(aa)n = ncol(aa)#增加一行,为整列的均值,计算每一列的均值,2就是表示列aa[n+1]=apply(aa[,c(2:ncol(aa))],1,sum)
bb<- arrange(aa, V14)
head(bb)
## Genus CF1.fastq CF4.fastq CF5.fastq CF6.fastq CK1.fastq
## 1 Aquicella 0.2560028 0 0 0 0.0000000
## 2 Pseudidiomarina 0.0000000 0 0 0 0.2673516
## 3 Janthinobacterium 0.0000000 0 0 0 0.2725938
## 4 Variovorax 0.2913136 0 0 0 0.0000000
## 5 Pedomicrobium 0.0000000 0 0 0 0.0000000
## 6 Luteimonas 0.0000000 0 0 0 0.3355001
bb = bb[c(1,ncol(bb))]
cc<- arrange(bb, desc(V14))# head(cc)
因子排序变量查看
cc$Genus = as.character(cc$Genus)
cc$Genus = as.factor(cc$Genus)
cc$Genus
开始对作图分类水平进行按照平均丰度的排序
zhnagxu2$Genus = factor(zhnagxu2$Genus,order = T,levels = cc$Genus)
zhnagxu3 = plyr::arrange(zhnagxu2,desc(Genus))
head(zhnagxu3)
## # A tibble: 6 x 3
## group Genus Abundance
## <fct> <ord> <dbl>
## 1 CF Aquicella 0.256
## 2 CK Pseudidiomarina 0.267
## 3 CK Janthinobacterium 0.273
## 4 CF Variovorax 0.291
## 5 OF Pedomicrobium 0.301
## 6 CK Luteimonas 0.336
# ##制作标签坐标,标签位于顶端# Taxonomies_x = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance))# head(Taxonomies_x )#标签位于中部
Taxonomies_x = ddply(zhnagxu3,"group", transform, label_y = cumsum(Abundance) - 0.5*Abundance)
head(Taxonomies_x,20 )
## group Genus Abundance label_y
## 1 CF Aquicella 0.2560028 0.1280014
## 2 CF Variovorax 0.2913136 0.4016596
## 3 CF Azoarcus 0.5455932 0.8201130
## 4 CF Alicyclobacillus 0.5901542 1.3879867
## 5 CF Opitutus 0.6431613 2.0046444
## 6 CF Pseudoxanthomonas 0.3707627 2.5116064
## 7 CF Pontibacter 0.5339744 2.9639750
## 8 CF Rubrivivax 0.3317536 3.3968389
## 9 CF Fimbriimonas 0.7129691 3.9192003
## 10 CF Rhodobacter 0.5965841 4.5739768
## 11 CF Hydrogenophaga 0.7701422 5.2573400
## 12 CF Candidatus Solibacter 0.2603162 5.7725692
## 13 CF Thauera 0.2606635 6.0330590
## 14 CF Skermanella 0.6073668 6.4670742
## 15 CF Flavisolibacter 1.4495684 7.4955417
## 16 CF Devosia 1.5499034 8.9952776
## 17 CF Geobacter 0.8809867 10.2107227
## 18 CF Adhaeribacter 1.5505888 11.4265104
## 19 CF Balneimonas 1.8123620 13.1079858
## 20 CF Sphingobium 0.9007005 14.4645170
添加标签 按照堆叠柱子的宽度进行选择是否需要在柱子上添加标签
Taxonomies_x$label = Taxonomies_x$Genus#使用循环将堆叠柱状图柱子比较窄的别写标签,仅仅宽柱子写上标签
for(i in 1:nrow(Taxonomies_x)){ if(Taxonomies_x[i,3] > 5){
Taxonomies_x[i,5] = Taxonomies_x[i,5]
}else{
Taxonomies_x[i,5] = NA
}
}
定义需要所需要的颜色
出图
##普通柱状图
p = china_barplots <- ggplot(Taxonomies_x , aes(x = group, y = Abundance, fill = Genus, order = Genus)) +
geom_bar(stat = "identity",width = 0.5) +
scale_fill_manual(values = Phylum_colors) +
theme(axis.title.x = element_blank()) +
theme(legend.text=element_text(size=6)) +
scale_y_continuous(name = "Abundance (%)")+
geom_text(aes(y = label_y, label = label ),size = 4)
print(china_barplots)
## Warning: Removed 81 rows containing missing values (geom_text).
p =p+theme_bw()+
scale_y_continuous(expand = c(0,0))+ #geom_hline(aes(yintercept=0), colour="black", linetype=2) +
#geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +
#scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+
theme(
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.title = element_text(vjust = -8.5,hjust = 0.1),
axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
axis.text = element_text(size = 20,face = "bold"),
axis.text.x = element_text(colour = "black",size = 14),
axis.text.y = element_text(colour = "black",size = 14),
legend.text = element_text(size = 10,face = "bold") #legend.position = "none"#是否删除图例
)
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
p
## Warning: Removed 81 rows containing missing values (geom_text).
#ggsave("./result_and_script/a4_分门别类冲击图/属水平水平柱状图.pdf", p, width = 10, height =8 )
下面开始加上分组之间的连线,方便我们进行横向比较
参考的是冲击图,这里我遇到的困难是如何设置连线流动的映射, 我是人工构建的一套流动映射,可能不是最好的解决方案,好的 一点无序改动,跑就行了。
##柱状图冲击图#stratum定义堆叠柱状图柱子内容,以weight定义柱子长度,alluvium定义连head(Taxonomies_x )
## group Genus Abundance label_y label
## 1 CF Aquicella 0.2560028 0.1280014 <NA>
## 2 CF Variovorax 0.2913136 0.4016596 <NA>
## 3 CF Azoarcus 0.5455932 0.8201130 <NA>
## 4 CF Alicyclobacillus 0.5901542 1.3879867 <NA>
## 5 CF Opitutus 0.6431613 2.0046444 <NA>
## 6 CF Pseudoxanthomonas 0.3707627 2.5116064 <NA>
cs = Taxonomies_x $Genus
cs1 = cs#提取真正的因子的数量
lengthfactor = length(levels(cs1))#提取每个因子对应的数量cs3 = summary (as.factor(cs1))
cs4 = as.data.frame(cs3)
cs4$id = row.names(cs4)#对因子进行排序
df_arrange<- arrange(cs4, id)#对Taxonomies_x 对应的列进行排序
Taxonomies_x1<- arrange(Taxonomies_x , Genus)
head(Taxonomies_x1)
## group Genus Abundance label_y label
## 1 CF Kaistobacter 16.512434 66.34180 Kaistobacter
## 2 CK Kaistobacter 14.791761 66.46900 Kaistobacter
## 3 OF Kaistobacter 14.228975 67.11143 Kaistobacter
## 4 CF Nitrospira 11.613095 52.27904 Nitrospira
## 5 CK Nitrospira 9.342698 54.40177 Nitrospira
## 6 OF Nitrospira 10.635413 54.67924 Nitrospira
#构建flow的映射列Taxonomies_x
Taxonomies_x1$ID = factor(rep(c(1:lengthfactor), cs4$cs3))#colour = "black",size = 2,,aes(color = "black",size = 0.8)p2 = ggplot(Taxonomies_x1,
aes(x = group, stratum = Genus, alluvium = ID,
weight = Abundance,
fill = Genus, label = Genus)) +
geom_flow(stat = "alluvium", lode.guidance = "rightleft",
color = "black",size = 0.2,width = 0.45,alpha = .2) +
geom_bar(width = 0.45)+
geom_stratum(width = 0.45,size = 0.1) + #geom_text(stat = "stratum", size = 3) +
#theme(legend.position = "none") +
scale_fill_manual(values = Phylum_colors)+ #ggtitle("fow_plot")+
#scale_x_discrete(limits = c("CK1","CK3","CK5","CK7","CK9","CK11","CK13","CK15","CK17","CK19"))+
geom_text(aes(y = label_y, label = label ),size = 4)+
labs(x="group",
y="Relative abundancce (%)",
title="")
p2 =p2+theme_bw()+
scale_y_continuous(expand = c(0,0))+ #geom_hline(aes(yintercept=0), colour="black", linetype=2) +
#geom_vline(aes(xintercept=0), colour="black", linetype="dashed") +
#scale_fill_manual(values = mi, guide = guide_legend(title = NULL))+
theme(
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.title = element_text(vjust = -8.5,hjust = 0.1),
axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
axis.text = element_text(size = 20,face = "bold"),
axis.text.x = element_text(colour = "black",size = 14),
axis.text.y = element_text(colour = "black",size = 14),
legend.text = element_text(size = 10,face = "bold") #legend.position = "none"#是否删除图例
)
p2
#ggsave("./result_and_script/a4_/flow_plot_bar.pdf", p2, width = 10, height =8 )