微生物群落来自哪里，我们说了算-FEAST or SourceTracker

宏基因组 2022-05-08

The following article is from 微生信生物 Author 文涛聊科研

写在前面

最近由于老板有分析项目，我实在是进展缓慢，一直苦恼并艰难的探索和进展，所以很长时间没有和大家见面了，今天我为大家带来的source tracker分析，使用前一段时间刚出来的工具FEAST。

刘老师对这片文章进行了详细的解读： Nature Methods：快速准确的微生物来源追溯工具FEAST。跟着刘老师的步伐，今天我对这个工具进行一个尝试。为什么作者不将这个工具封装到R包呢这样不就更容易了吗？可能好多小伙伴都没有从github上克隆过项目。

SourceTracker的流程及其说明宏基因组公众号上有很详细的介绍，点为跳转，这里就略过了。

本次重点FEAST

准备

不仅仅是这一次，我在之后全部的分析都会将整个群落封装到phylsoeq，只是为了更好的更加灵活的对微生物群落数据进行分析，当然大家如果初次见面，可能需要安装依赖极多的phyloseq包。需要熟悉phylsoeq封装的结构和调用方法。

为了让大家更容易操作，我把数据保存为csv，方便尚未接触phyloseq的小伙伴进行无压力测试。

结合作者的分析内核，我构建了基于otu表格和分组文件的流畅pipline，并且添加可视化模块和保存结果模块，希望可以方便使用。

微生物来源分析

FEAST提供两种方式来做微生物来源分析。

基于单个目标的来源。单个样品的来分析。2.基于多个目标和多个来源。多个样品进行来源分析。

首先我们来演示基于单个目标样品和来源样品的来源分析

# rm(list = ls())
# gc()

path = "./phyloseq_7_source_FEAST"
dir.create(path)
##导入主函数
source("./FEAST-master/FEAST_src//src.R")


ps = readRDS("./a3_DADA2_table/ps_OTU_.ps")
# 导入分组文件和OTU表格
metadata <- as.data.frame(sample_data(ps))
head(metadata)

write.csv(metadata,"metadata.csv",quote = F)
# Load OTU table
vegan_otu <-  function(physeq){
  OTU <-  otu_table(physeq)
  if(taxa_are_rows(OTU)){
    OTU <-  t(OTU)
  }
  return(as(OTU,"matrix"))
}
otus <-  as.data.frame(t(vegan_otu(ps)))
write.csv(otus,"otus.csv",quote = F)
otus <- t(as.matrix(otus))



###下面区分目标样品和来源样品。

envs <- metadata$SampleType

metadata<- arrange(metadata, SampleType)
metadata$id = rep(1:6,4)
Ids <- na.omit(unique(metadata$id))
it = 1

train.ix <- which(metadata$SampleType%in%c("B","C","D")& metadata$id == Ids[it])
test.ix <- which(metadata$SampleType=='A'  & metadata$id == Ids[it])


# Extract the source environments and source/sink indices

num_sources <- length(train.ix) #number of sources
COVERAGE =  min(rowSums(otus[c(train.ix, test.ix),]))  #Can be adjusted by the user


#对两组样品进行抽平
sources <- as.matrix(rarefy(otus[train.ix,], COVERAGE))
sinks <- as.matrix(rarefy(t(as.matrix(otus[test.ix,])), COVERAGE))

dim(sinks)
print(paste("Number of OTUs in the sink sample = ",length(which(sinks > 0))))
print(paste("Seq depth in the sources and sink samples = ",COVERAGE))
print(paste("The sink is:", envs[test.ix]))





# Estimate source proportions for each sink
EM_iterations = 1000 # number of EM iterations. default value

FEAST_output<-FEAST(source=sources, sinks = t(sinks), env = envs[train.ix], em_itr = EM_iterations, COVERAGE = COVERAGE)
Proportions_est <- FEAST_output$data_prop[,1]
names(Proportions_est) <- c(as.character(envs[train.ix]), "unknown")

print("Source mixing proportions")
Proportions_est
round(Proportions_est,3)

就正常样品而言，我们都会测定重复，这里基于多个样品的sourceracker分析

基于多个目标和来源的微生物来源分析: different_sources_flags设置目标样品和来源样品的对应关系。是否不同目标对应不同来源样品，还是不同目标对应相同来源样品




##导入主函数
source("./FEAST-master/FEAST_src//src.R")


ps = readRDS("./a3_DADA2_table/ps_OTU_.ps")
# 导入分组文件和OTU表格
metadata <- as.data.frame(sample_data(ps))
head(metadata)
# Load OTU table
vegan_otu <-  function(physeq){
  OTU <-  otu_table(physeq)
  if(taxa_are_rows(OTU)){
    OTU <-  t(OTU)
  }
  return(as(OTU,"matrix"))
}
otus <-  as.data.frame(t(vegan_otu(ps)))
otus <- t(as.matrix(otus))


head(metadata)

metadata<- arrange(metadata, SampleType)
metadata$id = rep(1:6,4)
EM_iterations = 1000 #default value
different_sources_flag = 1


envs <- metadata$SampleType
Ids <- na.omit(unique(metadata$id))
Proportions_est <- list()
it = 1

for(it in 1:length(Ids)){
  
  
  # Extract the source environments and source/sink indices
  if(different_sources_flag == 1){
    
    train.ix <- which(metadata$SampleType%in%c("B","C","D")& metadata$id == Ids[it])
    test.ix <- which(metadata$SampleType=='A'  & metadata$id == Ids[it])
    
  }
  
  else{
    
    train.ix <- which(metadata$SampleType%in%c("B","C","D"))
    test.ix <- which(metadata$SampleType=='A' & metadata$id == Ids[it])
  }
  
  num_sources <- length(train.ix)
  COVERAGE =  min(rowSums(otus[c(train.ix, test.ix),]))  #Can be adjusted by the user
  
  # Define sources and sinks
  
  sources <- as.matrix(rarefy(otus[train.ix,], COVERAGE))
  sinks <- as.matrix(rarefy(t(as.matrix(otus[test.ix,])), COVERAGE))
  
  
  print(paste("Number of OTUs in the sink sample = ",length(which(sinks > 0))))
  print(paste("Seq depth in the sources and sink samples = ",COVERAGE))
  print(paste("The sink is:", envs[test.ix]))
  
  # Estimate source proportions for each sink
  
  FEAST_output<-FEAST(source=sources, sinks = t(sinks), env = envs[train.ix], em_itr = EM_iterations, COVERAGE = COVERAGE)
  Proportions_est[[it]] <- FEAST_output$data_prop[,1]
  
  
  names(Proportions_est[[it]]) <- c(as.character(envs[train.ix]), "unknown")
  
  if(length(Proportions_est[[it]]) < num_sources +1){
    
    tmp = Proportions_est[[it]]
    Proportions_est[[it]][num_sources] = NA
    Proportions_est[[it]][num_sources+1] = tmp[num_sources]
  }
  
  print("Source mixing proportions")
  print(Proportions_est[[it]])
  
  
}

print(Proportions_est)


went = as.data.frame(Proportions_est)
colnames(went) = paste("repeat_",unique(metadata$id),sep = "")
head(went)

filename = paste(path,"/FEAST.csv",sep = "")
write.csv(went,filename,quote = F)

出图，简单出一张饼图供大家参考


library(RColorBrewer)
library(dplyr)
library(graphics)


head(went)

plotname = paste(path,"/FEAST.pdf",sep = "")
pdf(file = plotname,width = 12,height = 12)
par(mfrow=c((length(unique(metadata$SampleType))%/%2 +2 ),2), mar=c(1,1,1,1))
# layouts = as.character(unique(metadata$SampleType))

for (i in 1:length(colnames(went))) {
  
  labs <- paste0(row.names(went)," \n(", round(went[,i]/sum(went[,i])*100,2), "%)")
  
  pie(went[,i],labels=labs, init.angle=90,col =  brewer.pal(nrow(went), "Reds"),
      border="black",main =colnames(went)[i] )
}

dev.off()

基于多个重复，我们合并饼图展示

我们作为生物可能测定9个以上重复了，如果展示九个饼图，那就显得太夸张了，求均值，展示均值饼图

head(went)


asx = as.data.frame(rowMeans(went))

asx  = as.matrix(asx)
asx_norm = t(t(asx)/colSums(asx)) #* 100 # normalization to total 100
head(asx_norm)

plotname = paste(path,"/FEAST_mean.pdf",sep = "")
pdf(file = plotname,width = 6,height = 6)
labs <- paste0(row.names(asx_norm)," \n(", round(asx_norm[,1]/sum(asx_norm[,1])*100,2), "%)")

pie(asx_norm[,1],labels=labs, init.angle=90,col =  brewer.pal(nrow(went), "Reds"),
    border="black",main = "mean of source tracker")
dev.off()

历史目录

R语言分析技术

扩增子专题

基于phyloseq的微生物群落分析

代谢组专题

编程模板: Shell R Perl

生物科普: 肠道细菌人体上的生命生命大跃进细胞暗战人体奥秘

写在后面

为鼓励读者交流、快速解决科研困难，我们建立了“宏基因组”专业讨论群，目前己有国内外5000+ 一线科研人员加入。参与讨论，获得专业解答，欢迎分享此文至朋友圈，并扫码加主编好友带你入群，务必备注“姓名-单位-研究方向-职称/年级”。PI请明示身份，另有海内外微生物相关PI群供大佬合作交流。技术问题寻求帮助，首先阅读《如何优雅的提问》学习解决问题思路，仍未解决群内讨论，问题不私聊，帮助同行。

学习16S扩增子、宏基因组科研思路和分析实战，关注“宏基因组”

观察｜官方通报陕西蒲城一职校学生坠亡：事发前与舍友发生口角和肢体冲突认定该生系高空坠落死亡

桐城一派｜倒在“跨年夜”的龚书记，13个字换来免职调查冤不冤？

比佟丽娅还恋爱脑，怀孕7次流产4次，目睹丈夫背叛却选择原谅

市管干部“龚书记”免职迷局

讣告！又一知名女星在家中去世，终年54岁，曾是无数人白月光…

微生物群落来自哪里，我们说了算-FEAST or SourceTracker

准备

微生物来源分析

就正常样品而言，我们都会测定重复，这里基于多个样品的sourceracker分析

出图，简单出一张饼图供大家参考

基于多个重复，我们合并饼图展示

历史目录

R语言分析技术

扩增子专题

基于phyloseq的微生物群落分析

代谢组专题

当科研遇见python

科学知识图谱

杂谈

猜你喜欢

写在后面

您可能也对以下帖子感兴趣

观察｜官方通报陕西蒲城一职校学生坠亡：事发前与舍友发生口角和肢体冲突 认定该生系高空坠落死亡

桐城一派｜倒在“跨年夜”的龚书记，13个字换来免职调查冤不冤？

比佟丽娅还恋爱脑，怀孕7次流产4次，目睹丈夫背叛却选择原谅

市管干部“龚书记”免职迷局

讣告！又一知名女星在家中去世，终年54岁，曾是无数人白月光…

生成图片，分享到微信朋友圈

微生物群落来自哪里，我们说了算-FEAST or SourceTracker

准备

微生物来源分析

就正常样品而言，我们都会测定重复，这里基于多个样品的sourceracker分析

出图，简单出一张饼图供大家参考

基于多个重复，我们合并饼图展示

历史目录

R语言分析技术

扩增子专题

基于phyloseq的微生物群落分析

代谢组专题

当科研遇见python

科学知识图谱

杂谈

猜你喜欢

写在后面

您可能也对以下帖子感兴趣

观察｜官方通报陕西蒲城一职校学生坠亡：事发前与舍友发生口角和肢体冲突认定该生系高空坠落死亡