其他
Molecular Cell 文章 ribosome pausing 结果复现 (四)
不过是大梦一场空
1引言
这部分复现文章里的一些基因的 track 图:
2准备最长转录本基因
prepareLongestCDStranscript("Mus_musculus.GRCm38.102.gff3","longestCDSinfo.txt")
3计算 density
sample = ["SRR12594201","SRR12594202","SRR12594203","SRR12594204","SRR12594205","SRR12594206",
"SRR12594207","SRR12594208","SRR12594209","SRR12594210","SRR12594211","SRR12594212",
"SRR12594213","SRR12594214","SRR12594215","SRR12594216","SRR12594217","SRR12594218"]
outname = ["Normal-rep1","mOSM500-2h-rep1","mOSM600-2h-rep1","mOSM700-15m-rep1","mOSM700-2h-rep1",
"mOSM700-15m-15mrecover-rep1","mOSM700-15m-2hrecover-rep1","mOSM700-2h-15mrecover-rep1","mOSM700-2h-2hrecover-rep1",
"Normal-rep2","mOSM500-2h-rep2","mOSM600-2h-rep2","mOSM700-15m-rep2","mOSM700-2h-rep2",
"mOSM700-15m-15mrecover-rep2","mOSM700-15m-2hrecover-rep2","mOSM700-2h-15mrecover-rep2","mOSM700-2h-2hrecover-rep2"]
for i in range(1,18)
CalculateCWRibosomeDensity("3.map-data/"*sample[i]*".uniq.txt","5.density-data/"*outname[i]*".density.txt")
end
4绘图
批量读取 density 文件:
library(data.table)
library(tidyverse)
library(ggplot2)
library(patchwork)
# load geneinfo
geneInfo <- read.table('longestCDSinfo.txt')
colnames(geneInfo) <- c('id','gene_name','gene_id','trans_id',
'chr','strand','cds_region','exon_region',
'utr5','cds','utr3')
# load gene
# Ndufb3 Ndufb6 Rps7
myGene <- geneInfo %>% dplyr::filter(gene_name %in% c('Ndufb3','Ndufb6','Rps7'))
# tags
ribo <- c("Normal-rep1","mOSM700-2h-rep1","Normal-rep2","mOSM700-2h-rep2")
group <- rep(c('Control','700-mOSM'),2)
# load density data
# x = 1
map_df(1:4,function(x){
# ribo denisty
ribo_tmp <- fread(paste('5.density-data/',ribo[x],'.trans.txt',sep = ''))
# add colnames
colnames(ribo_tmp) <- c('gene_id','transpos','density')
# filter
ribo_tmp <- ribo_tmp %>% dplyr::filter(gene_id %in% myGene$gene_id)
# add type
ribo_tmp$type <- group[x]
# add sample name
ribo_tmp$sample <- ribo[x]
return(ribo_tmp)
}) -> df_gene
数据预处理:
# get mean density
df_plot <- df_gene %>%
group_by(type,gene_id,transpos) %>%
summarise(meanDensity = mean(density))
paste(myGene$gene_name,myGene$utr5,myGene$cds,sep = '|')
# [1] "Rps7|107|585" "Ndufb3|289|315" "Ndufb6|84|387"
# add gene name
df_plot$id <- case_when(
df_plot$gene_id == 'ENSMUSG00000026032' ~ 'Ndufb3|289|315',
df_plot$gene_id == 'ENSMUSG00000071014' ~ 'Ndufb6|84|387',
df_plot$gene_id == 'ENSMUSG00000061477' ~ 'Rps7|107|585'
)
# add 5utr and cds
df_plot$gene_name <- sapply(strsplit(df_plot$id,split = '\\|'),'[',1)
df_plot$utr5 <- sapply(strsplit(df_plot$id,split = '\\|'),'[',2) %>% as.numeric()
df_plot$cds <- sapply(strsplit(df_plot$id,split = '\\|'),'[',3) %>% as.numeric()
# add regin type
df_plot$region_type <- ifelse(df_plot$transpos >= (df_plot$utr5 + 1) & df_plot$transpos <= (df_plot$utr5 + df_plot$cds),
'CDS','UTR')
绘图:
# oder
df_plot$type <- factor(df_plot$type,levels = c('Control','700-mOSM'))
df_plot$gene_name <- factor(df_plot$gene_name,levels = c('Ndufb3','Ndufb6','Rps7'))
# bacth plot
pc <- ggplot(df_plot %>% filter(type == 'Control'),
aes(x = transpos,y = meanDensity)) +
geom_col(aes(fill = region_type),color = NA,width = 1) +
scale_fill_manual(values = c('CDS' = '#FF5C58','UTR' = 'grey80'),
name = '') +
theme_bw(base_size = 14) +
theme(panel.grid = element_blank(),
strip.background = element_rect(colour = NA,fill = 'grey90'),
panel.spacing.y = unit(0,'mm'),
legend.position = 'top',
aspect.ratio = 0.4) +
ylab('Ribosome density (RPM)') +
xlab('Ribosome along transcript (nt)') +
facet_wrap(~gene_name,scales = 'free',ncol = 3)
pt <- ggplot(df_plot %>% filter(type == '700-mOSM'),
aes(x = transpos,y = meanDensity)) +
geom_col(aes(fill = region_type),color = NA,width = 1,
show.legend = F) +
scale_fill_manual(values = c('CDS' = '#FF5C58','UTR' = 'grey80'),
name = '') +
theme_bw(base_size = 14) +
theme(panel.grid = element_blank(),
strip.background = element_rect(colour = NA,fill = 'grey90'),
panel.spacing.y = unit(0,'mm'),
legend.position = 'top',
aspect.ratio = 0.4) +
ylab('Ribosome density (RPM)') +
xlab('Ribosome along transcript (nt)') +
facet_wrap(~gene_name,scales = 'free',ncol = 3)
pc / pt
其它基因的图类似:
####################################################
# track 2
####################################################
myGene <- geneInfo %>% dplyr::filter(gene_name %in% c('Uqcrfs1','Ndufv1'))
# tags
ribo <- c("Normal-rep1","Normal-rep2",
"mOSM700-2h-rep1","mOSM700-2h-rep2",
"mOSM700-2h-15mrecover-rep1","mOSM700-2h-15mrecover-rep2",
"mOSM700-2h-2hrecover-rep1","mOSM700-2h-2hrecover-rep2")
group <- rep(c('Control','700-mOSM','mOSM700-2h(15m)','mOSM700-2h(2h)'),each = 2)
# load density data
# x = 1
map_df(1:8,function(x){
# ribo denisty
ribo_tmp <- fread(paste('5.density-data/',ribo[x],'.trans.txt',sep = ''))
# add colnames
colnames(ribo_tmp) <- c('gene_id','transpos','density')
# filter
ribo_tmp <- ribo_tmp %>% dplyr::filter(gene_id %in% myGene$gene_id)
# add type
ribo_tmp$type <- group[x]
# add sample name
ribo_tmp$sample <- ribo[x]
return(ribo_tmp)
}) -> df_gene
# get mean density
df_plot <- df_gene %>%
group_by(type,gene_id,transpos) %>%
summarise(meanDensity = mean(density))
paste(myGene$gene_name,myGene$utr5,myGene$cds,sep = '|')
# [1] "Uqcrfs1|115|825" "Ndufv1|184|1395"
# add gene name
df_plot$id <- case_when(
df_plot$gene_id == 'ENSMUSG00000038462' ~ 'Uqcrfs1|115|825',
df_plot$gene_id == 'ENSMUSG00000037916' ~ 'Ndufv1|184|1395'
)
# add 5utr and cds
df_plot$gene_name <- sapply(strsplit(df_plot$id,split = '\\|'),'[',1)
df_plot$utr5 <- sapply(strsplit(df_plot$id,split = '\\|'),'[',2) %>% as.numeric()
df_plot$cds <- sapply(strsplit(df_plot$id,split = '\\|'),'[',3) %>% as.numeric()
# add regin type
df_plot$region_type <- ifelse(df_plot$transpos >= (df_plot$utr5 + 1) & df_plot$transpos <= (df_plot$utr5 + df_plot$cds),
'CDS','UTR')
# oder
df_plot$type <- factor(df_plot$type,levels = c('Control','700-mOSM',
'mOSM700-2h(15m)','mOSM700-2h(2h)'))
# bacth plot
pUqcrfs1 <- ggplot(df_plot %>% filter(gene_name == 'Uqcrfs1'),
aes(x = transpos,y = meanDensity)) +
geom_col(aes(fill = region_type),color = NA,width = 1,
show.legend = F) +
scale_fill_manual(values = c('CDS' = '#FF5C58','UTR' = 'grey80'),
name = '') +
theme_bw(base_size = 14) +
theme(panel.grid = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_rect(colour = NA,fill = 'grey90'),
legend.position = 'left',
aspect.ratio = 0.4) +
ylab('Ribosome density (RPM)') +
xlab('Ribosome along transcript (nt)') +
facet_wrap(~type,scales = 'free',ncol = 1,strip.position = 'right') +
ggtitle('Uqcrfs1')
pNdufv1 <- ggplot(df_plot %>% filter(gene_name == 'Ndufv1'),
aes(x = transpos,y = meanDensity)) +
geom_col(aes(fill = region_type),color = NA,width = 1,
show.legend = F) +
scale_fill_manual(values = c('CDS' = '#FF5C58','UTR' = 'grey80'),
name = '') +
scale_y_continuous(position = 'right') +
theme_bw(base_size = 14) +
theme(panel.grid = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_rect(colour = NA,fill = 'grey90'),
legend.position = 'top',
aspect.ratio = 0.4) +
ylab('') +
xlab('Ribosome along transcript (nt)') +
facet_wrap(~type,scales = 'free',ncol = 1,strip.position = 'left') +
ggtitle('Ndufv1')
pUqcrfs1 + pNdufv1
####################################################
# track 3
####################################################
myGene <- geneInfo %>% dplyr::filter(gene_name %in% c('Rad50','Fn1'))
# tags
ribo <- c("Normal-rep1","Normal-rep2",
"mOSM700-15m-rep1","mOSM700-15m-rep2")
group <- rep(c('Control','700-mOSM(15m)'),each = 2)
# load density data
# x = 1
map_df(1:4,function(x){
# ribo denisty
ribo_tmp <- fread(paste('5.density-data/',ribo[x],'.trans.txt',sep = ''))
# add colnames
colnames(ribo_tmp) <- c('gene_id','transpos','density')
# filter
ribo_tmp <- ribo_tmp %>% dplyr::filter(gene_id %in% myGene$gene_id)
# add type
ribo_tmp$type <- group[x]
# add sample name
ribo_tmp$sample <- ribo[x]
return(ribo_tmp)
}) -> df_gene
# get mean density
df_plot <- df_gene %>%
group_by(type,gene_id,transpos) %>%
summarise(meanDensity = mean(density))
paste(myGene$gene_name,myGene$utr5,myGene$cds,sep = '|')
# [1] "Rad50|264|3939" "Fn1|193|7434"
# add gene name
df_plot$id <- case_when(
df_plot$gene_id == 'ENSMUSG00000020380' ~ 'Rad50|264|3939',
df_plot$gene_id == 'ENSMUSG00000026193' ~ 'Fn1|193|7434'
)
# add 5utr and cds
df_plot$gene_name <- sapply(strsplit(df_plot$id,split = '\\|'),'[',1)
df_plot$utr5 <- sapply(strsplit(df_plot$id,split = '\\|'),'[',2) %>% as.numeric()
df_plot$cds <- sapply(strsplit(df_plot$id,split = '\\|'),'[',3) %>% as.numeric()
# add regin type
df_plot$region_type <- ifelse(df_plot$transpos >= (df_plot$utr5 + 1) & df_plot$transpos <= (df_plot$utr5 + df_plot$cds),
'CDS','UTR')
# oder
df_plot$type <- factor(df_plot$type,levels = c('Control','700-mOSM(15m)'))
# bacth plot
pRad50 <- ggplot(df_plot %>% filter(gene_name == 'Rad50'),
aes(x = transpos,y = meanDensity)) +
geom_col(aes(fill = region_type),color = NA,width = 1,
show.legend = F) +
scale_fill_manual(values = c('CDS' = '#FF5C58','UTR' = 'grey80'),
name = '') +
theme_bw(base_size = 14) +
theme(panel.grid = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_rect(colour = NA,fill = 'grey90'),
legend.position = 'left',
aspect.ratio = 0.4) +
ylab('Ribosome density (RPM)') +
xlab('Ribosome along transcript (nt)') +
facet_wrap(~type,scales = 'free',ncol = 1) +
ggtitle('Rad50')
pFn1 <- ggplot(df_plot %>% filter(gene_name == 'Fn1'),
aes(x = transpos,y = meanDensity)) +
geom_col(aes(fill = region_type),color = NA,width = 1,
show.legend = F) +
scale_fill_manual(values = c('CDS' = '#FF5C58','UTR' = 'grey80'),
name = '') +
scale_y_continuous(position = 'right') +
theme_bw(base_size = 14) +
theme(panel.grid = element_blank(),
plot.title = element_text(hjust = 0.5),
strip.background = element_rect(colour = NA,fill = 'grey90'),
legend.position = 'top',
aspect.ratio = 0.4) +
ylab('') +
xlab('Ribosome along transcript (nt)') +
facet_wrap(~type,scales = 'free',ncol = 1) +
ggtitle('Fn1')
pRad50 + pFn1
5结尾
复现结果和原文基本上是差不多的,虽然方法不一样,但是结论不会变。
欢迎加入生信交流群。加我微信我也拉你进 微信群聊 老俊俊生信交流群
(微信交流群满200人后需收取20元入群费用), 数据代码已上传至QQ群,欢迎加入下载。
群二维码:
老俊俊微信:
知识星球:
所以今天你学习了吗?
今天的分享就到这里了,敬请期待下一篇!
最后欢迎大家分享转发,您的点赞是对我的鼓励和肯定!
如果觉得对您帮助很大,赏杯快乐水喝喝吧!
往期回顾
◀Molecular Cell 文章 ribosome pausing 结果复现 (三)
◀Molecular Cell 文章 ribosome pausing 结果复现 (二) (PCR 去重)
◀Molecular Cell 文章 ribosome pausing 结果复现 (一)
◀...