MeRIP-seq 数据分析之 peak 热图可视化
梦还长,梦还多
1引言
刚开始自学生信时,看到这篇文章的图并不知道啥意思,久经磨砺,现在已经了解并能掌握这些类似的图其画法,也算是进步吧,带着理解去画图才是最重要的:
这类图主要出现在 Chip-seq 之类的数据分析中,揭示转录因子或组蛋白标记富集在某个特征位点附近。在 MeRIP-seq 数据分析中,也可以加入类似的图,今天就学习怎么绘制类似的图。
2分析
利用我们之前转换的 bigwig 文件及 m6A peak 文件,我们可以可视化 m6A peak 周围的 reads 的 density 的情况:
library(rtracklayer)
library(tidyverse)
library(GenomicRanges)
# 加载bw文件
ip <- import.bw('../5.bigwig-data/SRR14765647.bw')
input <- import.bw('../5.bigwig-data/SRR14765646.bw')
m3ko <- import.bw('../5.bigwig-data/SRR14765641.bw')
# 查看内容
head(ip,3)
GRanges object with 3 ranges and 1 metadata column:
seqnames ranges strand | score
<Rle> <IRanges> <Rle> | <numeric>
[1] chr1 1-3053960 * | 0.0000000
[2] chr1 3053961-3054110 * | 0.0446246
[3] chr1 3054111-3054310 * | 0.0000000
-------
seqinfo: 66 sequences from an unspecified genome
# 加载m6A peak
m6A_peak <- import.bed('../6.macs-data/WT_rep1_summits.bed')
# 查看内容
head(m6A_peak,3)
GRanges object with 3 ranges and 2 metadata columns:
seqnames ranges strand | name score
<Rle> <IRanges> <Rle> | <character> <numeric>
[1] chr1 4776506 * | WT_rep1_peak_1 80.1210
[2] chr1 4782033 * | WT_rep1_peak_2 31.0281
[3] chr1 6240166 * | WT_rep1_peak_3 107.7288
-------
seqinfo: 22 sequences from an unspecified genome; no seqlengths
这里我们导入 macs2 的 peak summits 文件,也可以使用 exomePeak2 的 peak 文件,计算一下 peak 中心位置作为输入,可能和 summits 含有不太一致,前者是 peak 的最高位置坐标,后者是 peak 中心位置。
查看数据分布,看看是不是有极值:
# 加载R包
library(EnrichedHeatmap)
# 查看数据分布
quantile(ip$score, c(0, 0.25, 0.5, 0.75,0.99, 1))
0% 25% 50% 75% 99% 100%
0.00000e+00 4.46246e-02 2.45436e-01 1.29411e+00 2.58600e+01 1.50008e+03
计算富集矩阵:
# wt
mat_ip = normalizeToMatrix(ip, m6A_peak, value_column = "score",
keep = c(0, 0.99),
extend = 2000, mean_mode = "w0", w = 20)
# input
mat_input = normalizeToMatrix(input, m6A_peak, value_column = "score",
keep = c(0, 0.99),
extend = 2000, mean_mode = "w0", w = 20)
# m3ko
mat_m3ko = normalizeToMatrix(m3ko, m6A_peak, value_column = "score",
keep = c(0, 0.99),
extend = 2000, mean_mode = "w0", w = 20)
# 查看
mat_ip
Normalize ip to m6A_peak:
Upstream 2000 bp (100 windows)
Downstream 2000 bp (100 windows)
Include target regions (width = 1)
12489 target regions
绘制富集热图:
这里 窗口参数 w 设置越小分辨率越高,时间越久:
# 绘图
ht_list = EnrichedHeatmap(mat_ip, col = c("white", "red"),
name = "m6A IP",column_title = 'WT IP Sample',
top_annotation = HeatmapAnnotation(
enriched = anno_enriched(
at = seq(0,10,2),
axis_param = list(side = "left",facing = "outside"))
)) +
EnrichedHeatmap(mat_input, col = c("white", "#4E9F3D"),
name = "m6A INPUT",column_title = 'WT INPUT Sample',
top_annotation = HeatmapAnnotation(
enriched = anno_enriched(
ylim = c(0, 10),at = seq(0,10,2),
gp = gpar(col = '#4E9F3D'),
axis_param = list(side = "right",facing = "outside"))
)) +
EnrichedHeatmap(mat_m3ko, col = c("white", "#125D98"),
name = "METTL3 ko",column_title = 'Mettl3-ko Sample',
top_annotation = HeatmapAnnotation(
enriched = anno_enriched(
ylim = c(0, 10),at = seq(0,10,2),
gp = gpar(col = '#125D98'),
axis_param = list(side = "right",facing = "outside"))
))
# 设置图宽度
draw(ht_list,ht_gap = unit(c(8, 8, 8), "mm"))
可以明显看到,METTL3 ko 以后,reads 在 m6A peak 位置的富集明显减少。
在转录本上的分布:
首先获取 转录本的位置坐标 ,读取 gtf 文件提取获得:
# 读取gtf文件
gtf <- import('../mm10.ncbiRefSeq.gtf',format = 'gtf') %>% as.data.frame()
# 提取转录本位置
sp <- gtf %>% filter(type == 'transcript') %>% .[,c(1:3,10,4,5)] %>%
unique() %>%
makeGRangesFromDataFrame()
# 查看内容
head(sp,3)
GRanges object with 3 ranges and 0 metadata columns:
seqnames ranges strand
<Rle> <IRanges> <Rle>
1 chr1 3199733-3672278 -
2 chr1 3214482-3671498 -
3 chr1 3322816-3672278 -
-------
seqinfo: 45 sequences from an unspecified genome; no seqlengths
这里就展示 WT 样本的,其它代码一样:
# wt
mat_ip = normalizeToMatrix(ip, sp, value_column = "score",
keep = c(0, 0.99),
extend = 2000, mean_mode = "w0", w = 100)
# 绘图
EnrichedHeatmap(mat_ip, col = c("white", "#E02401"),
name = "m6A IP",
column_title = 'WT IP Sample',
use_raster = F,
top_annotation = HeatmapAnnotation(
enriched = anno_enriched(
ylim = c(0, 1),at = seq(0,1,0.2),
gp = gpar(col = '#E02401',size = 1),
axis_param = list(side = "left",facing = "outside"))
))
绘制起始密码子及终止密码子附件的热图:
提取 起始密码子 和 终止密码子 位置信息绘图即可:
# 提取start_codon位置
st <- gtf %>% filter(type == 'start_codon') %>% .[,c(1:3,10,4,5)] %>%
unique() %>%
makeGRangesFromDataFrame()
# 提取stop_codon位置
stp <- gtf %>% filter(type == 'stop_codon') %>% .[,c(1:3,10,4,5)] %>%
unique() %>%
makeGRangesFromDataFrame()
# 查看内容
head(stp,3)
GRanges object with 3 ranges and 0 metadata columns:
seqnames ranges strand
<Rle> <IRanges> <Rle>
1 chr1 3216022-3216024 -
3 chr1 3323747-3323749 -
4 chr1 4115935-4115937 -
-------
seqinfo: 45 sequences from an unspecified genome; no seqlengths
起始密码子:
# 在起始密码子附件绘图
# wt
mat_ip = normalizeToMatrix(ip, st, value_column = "score",
keep = c(0, 0.99),
extend = 2000, mean_mode = "w0", w = 100)
# input
mat_input = normalizeToMatrix(input, st, value_column = "score",
keep = c(0, 0.99),
extend = 2000, mean_mode = "w0", w = 100)
# m3ko
mat_m3ko = normalizeToMatrix(m3ko, st, value_column = "score",
keep = c(0, 0.99),
extend = 2000, mean_mode = "w0", w = 100)
# 在起始密码子附件绘图
ht_list = EnrichedHeatmap(mat_ip, col = c("white", "#E02401"),
name = "m6A IP",
column_title = 'WT IP Sample',
use_raster = F,
top_annotation = HeatmapAnnotation(
enriched = anno_enriched(
ylim = c(0, 2),at = seq(0,2,0.4),
gp = gpar(col = '#E02401',size = 1),
axis_param = list(side = "left",facing = "outside"))
)) +
EnrichedHeatmap(mat_input, col = c("white", "#4E9F3D"),
name = "m6A INPUT",column_title = 'WT INPUT Sample',
use_raster = F,
top_annotation = HeatmapAnnotation(
enriched = anno_enriched(
ylim = c(0, 2),at = seq(0,2,0.4),
gp = gpar(col = '#4E9F3D',size = 1),
axis_param = list(side = "right",facing = "outside"))
)) +
EnrichedHeatmap(mat_m3ko, col = c("white", "#125D98"),
name = "METTL3 ko",column_title = 'Mettl3-ko Sample',
use_raster = F,
top_annotation = HeatmapAnnotation(
enriched = anno_enriched(
ylim = c(0, 2),at = seq(0,2,0.4),
gp = gpar(col = '#125D98',size = 1),
axis_param = list(side = "right",facing = "outside"))
))
draw(ht_list,ht_gap = unit(c(8, 8, 8), "mm"))
METTL3 ko 后 start codon 处 m6A 信号上升。
终止密码子:
# 在终止密码子附件绘图
# wt
mat_ip2 = normalizeToMatrix(ip, stp, value_column = "score",
keep = c(0, 0.99),
extend = 2000, mean_mode = "w0", w = 100)
# input
mat_input2 = normalizeToMatrix(input, stp, value_column = "score",
keep = c(0, 0.99),
extend = 2000, mean_mode = "w0", w = 100)
# m3ko
mat_m3ko2 = normalizeToMatrix(m3ko, stp, value_column = "score",
keep = c(0, 0.99),
extend = 2000, mean_mode = "w0", w = 100)
# 在终止密码子附件绘图
ht_list = EnrichedHeatmap(mat_ip2, col = c("white", "#E02401"),
name = "m6A IP",
column_title = 'WT IP Sample',
use_raster = F,
top_annotation = HeatmapAnnotation(
enriched = anno_enriched(
ylim = c(0, 2),at = seq(0,2,0.4),
gp = gpar(col = '#E02401',size = 1),
axis_param = list(side = "left",facing = "outside"))
)) +
EnrichedHeatmap(mat_input2, col = c("white", "#4E9F3D"),
name = "m6A INPUT",column_title = 'WT INPUT Sample',
use_raster = F,
top_annotation = HeatmapAnnotation(
enriched = anno_enriched(
ylim = c(0, 2),at = seq(0,2,0.4),
gp = gpar(col = '#4E9F3D',size = 1),
axis_param = list(side = "right",facing = "outside"))
)) +
EnrichedHeatmap(mat_m3ko2, col = c("white", "#125D98"),
name = "METTL3 ko",column_title = 'Mettl3-ko Sample',
use_raster = F,
top_annotation = HeatmapAnnotation(
enriched = anno_enriched(
ylim = c(0, 2),at = seq(0,2,0.4),
gp = gpar(col = '#125D98',size = 1),
axis_param = list(side = "right",facing = "outside"))
))
draw(ht_list,ht_gap = unit(c(8, 8, 8), "mm"))
METTL3 ko 后 stop codon m6A 信号明显下降。
欢迎加入生信交流群。加我微信我也拉你进 微信群聊 老俊俊生信交流群
哦,代码已上传至QQ群文件夹,欢迎下载。
群二维码:
老俊俊微信:
知识星球:
所以今天你学习了吗?
欢迎小伙伴留言评论!
今天的分享就到这里了,敬请期待下一篇!
最后欢迎大家分享转发,您的点赞是对我的鼓励和肯定!
如果觉得对您帮助很大,赏杯快乐水喝喝吧!
往期回顾
◀MeRIP-seq 数据分析之 homer 富集 motif
◀MeRIP-seq 数据分析之 homer 注释 peaks
◀MeRIP-seq 数据分析之 callpeak 及 peak 可视化
◀eRNA 上的 m6A 修饰可以促进转录凝聚物的形成和基因激活
◀...