其他
shiny CountToTPM/FPKM
定量
转录组数据分析上游分析的最后步骤就是对比对的bam文件进行定量,用的比较多的是HTseq和Subread软件里面的featureCounts函数也可以定量。
下面是它们文章的引用量:
featureCounts使用方法可参考:转录组定量-featureCounts[1]
基本用法
featureCounts -T 15 \ # 使用线程数
-t exon \ # 对什么feature进行定量(gene、transcript、exon、cds等)
-p \ # 测序是双端就加上,单端不加,paired的意思
-g gene_id \ # 根据gtf文件提取meta信息
--extraAttributes gene_name \ # 把基因名也一起输出,推荐
-a gencode.gtf \ # 注释文件
-o test.count.txt \ #输出文件
A.bam B.bam C.bam # bam文件
输出文件内容
counts to TPM/FPKM
定量count结果得到后一边直接走差异分析,一边将count值转为TPM或者FPKM值进行下游可视化分析,比如像热图heatmap,每次想得到tpm/fpkm值都得写代码,还得把gene_id转为基因名,于是为了解放一下双手,就干脆写个shiny小程序就行了。
基本思路:
1.直接输入featureCounts输出的原始count文件 2.点击submit一键获得tpm/fpkm标准化值 3.点击download一键下载
ui部分:
library(shiny)
library(DT)
library(ggsci)
library(colourpicker)
library(stringr)
library(data.table)
library(shinythemes)
library(shinycssloaders)
# 设置上传数据大小,最大100M
options(shiny.maxRequestSize=100*1024^2)
ui <- navbarPage(
theme = shinytheme("simplex"),
title = tags$strong('ZhouLab Normalization'),
tabPanel(
'Normalization',icon = icon('balance-scale'),
tabsetPanel(
tabPanel(
'upload',
fileInput('data',
h4('Upload Count data'),
accept = c("text/csv",
"text/comma-separated-values,text/plain",
".csv"))
),
tabPanel('example data',
hr(),
# 这里可以在www文件夹下放一张上传数据的示例文件图片
tags$img(src='count.PNG'))
),
wellPanel(style = "background: #faf0af",
h4('1、The raw data showed here:',h5('the Chr:Start:End:Strand is too long,not showed here)',style='color:#01a9b4')),
hr(style="border-color: #29bb89"),
DT::dataTableOutput('raw_table')),
hr(),
h4('2、The normalized data showed here:'),
hr(),
actionButton('nor_submit','submit',icon = icon('android')),
hr(style="border-color: #29bb89"),
fluidRow(column(6,
wellPanel(style = "background: #fcdada",
h4(tags$strong('TPM table',style='color:#f39189')),
withSpinner(type = 7,size = 0.5,DT::dataTableOutput('tpm_table')),
hr(),
downloadButton('download_tpm_table','Download'))),
column(6,
wellPanel(style = "background: #99f3bd",
h4(tags$strong('FPKM table',style='color:#0278ae')),
withSpinner(type = 7,size = 0.5,DT::dataTableOutput('fpkm_table')),
hr(),
downloadButton('download_fpkm_table','Download')))),
)
)
server部分:
server <- function(input, output) {
raw_tab <- reactive({
infile <- input$data$datapath
if (is.null(infile))
return(NULL)
d <- infile
type <- str_sub(d,-3)
if(type=='csv'){
count <- fread(infile,header = T,sep = ',',skip = 1)
}else{
count <- fread(infile,header = T,sep = '\t',skip = 1)
}
})
#' @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
output$raw_table <- renderDataTable(options = list(scrollX = TRUE,
aLengthMenu = c(5, 10, 15),
iDisplayLength = 5),{
filter <- raw_tab()[,c(-2:-5)]
filter
})
# ---------------------------------------------------------------
nor_tpm_dat <- eventReactive(input$nor_submit,{
infile <- input$data$datapath
if (is.null(infile))
return(NULL)
d <- infile
type <- str_sub(d,-3)
if(type=='csv'){
counts <- read.table(infile,skip = '#',header = T,row.names = 1,sep = ',')
}else{
counts <- read.table(infile,skip = '#',header = T,row.names = 1,sep = '\t')
}
my_couts <- counts[,-1:-6]
gene_anno=data.frame(gene_id=rownames(counts),gene_name=counts[,6])
##############################################################
#计算tpm
kb <- counts$Length/1000
rpk <- my_couts/kb
tpm <- t(t(rpk)/colSums(rpk)*1000000)
tpm=as.data.frame(tpm)
tpm$gene_id <- rownames(tpm)
tpm_anno=merge(tpm,gene_anno,by='gene_id')
# write.csv(tpm_anno,file = 'TPM-anno.csv',row.names = F)
})
# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
nor_fpkm_dat <- eventReactive(input$nor_submit,{
infile <- input$data$datapath
if (is.null(infile))
return(NULL)
d <- infile
type <- str_sub(d,-3)
if(type=='csv'){
counts <- read.table(infile,skip = '#',header = T,row.names = 1,sep = ',')
}else{
counts <- read.table(infile,skip = '#',header = T,row.names = 1,sep = '\t')
}
my_couts <- counts[,-1:-6]
gene_anno=data.frame(gene_id=rownames(counts),gene_name=counts[,6])
##############################################################
#计算rpk
kb <- counts$Length/1000
rpk <- my_couts/kb
##############################################################
#计算fpkm
fpkm <- t(t(rpk)/colSums(my_couts)*1000000)
fpkm=as.data.frame(fpkm)
fpkm$gene_id <- rownames(fpkm)
fpkm_anno=merge(fpkm,gene_anno,by='gene_id')
# write.csv(fpkm_anno,file = 'FPKM-anno.csv',row.names = F)
})
# -------------------------------------fpkm_table tpm_table
output$tpm_table <- renderDT(options = list(scrollX = TRUE),{
nor_tpm_dat()
})
output$fpkm_table <- renderDT(options = list(scrollX = TRUE),{
nor_fpkm_dat()
})
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# download_tpm_table download_fpkm_table
output$download_tpm_table <- downloadHandler(
filename = function() {
paste('Normalized-TPM', '.csv',sep = "")
},
content = function(file) {
write.csv(nor_tpm_dat(),file,row.names = FALSE)
})
output$download_fpkm_table <- downloadHandler(
filename = function() {
paste('Normalized-FPKM', '.csv',sep = "")
},
content = function(file) {
write.csv(nor_fpkm_dat(),file,row.names = FALSE)
})
}
启动app:
# Run the application
shinyApp(ui = ui, server = server)
运行App
示例数据:
拿个测试数据试一试:
完成!大功告成。
欢迎小伙伴留言评论!
今天的分享就到这里了,敬请期待下一篇!
最后欢迎大家分享转发,您的点赞是对我的鼓励和肯定!
如果觉得对您帮助很大,打赏一下吧!
参考资料转录组定量-featureCounts: https://www.bioinfo-scrounger.com/archives/407/