其他
性能提升3-4倍!贝壳基于Flink + OceanBase的实时维表服务
作者介绍:肖赞,贝壳找房(北京)科技有限公司 OLAP 平台负责人,基础研发线大数据平台部架构师。
贝壳找房是中国最大的居住服务平台。作为居住产业数字化服务平台,贝壳致力于推进居住服务的产业数字化、智能化进程,通过聚合、助力优质服务者,为中国家庭提供包括二手房交易、新房交易、租赁、家装、家居、家服等一站式、高品质、高效率服务。
前几天,我们在《贝壳降本提效实践:基于 OceanBase 的实时字典服务》中,介绍了实时字典服务的应用场景,在上线 OceanBase 后,贝壳获得了更高的查询性能和稳定性。今天为大家介绍 OceanBase 在贝壳的第二个应用场景——实时维表服务,通过替代原有的 HBase 维表服务,让贝壳的性能提升了 3-4 倍,硬件成本节省了一半,与此同时,运维成本获得了极大降低。
在典型的实时数仓或实时业务场景里,Flink 实时流处理过程中,经常需要将事实表与外部维度表进行关联,查询维度表,补全事实表中的信息。例如,在贝壳家居等业务场景中,需要在用户下单后将订单信息与维度表中商品信息的相关信息进行实时关联。考虑到维表数据量较大,并且 Flink 实时查询 QPS 较高,传统数据库 MySQL 等难以支撑,因此,贝壳采用 HBase 作为维表。HBase 是一个分布式列存储 NoSQL 数据库,具有较好地查询性能,但是也存在一些痛点。
show create table tb_dim_benchmark_range_partitioned;
create table `tb_dim_benchmark_range_partitioned`
(
t1 bigint(20) NOT NULL,
t2 varchar(200) DEFAULT NULL,
……
t30 varchar(200) DEFAULT NULL,
)
PRIMARY KEY (`t1`)
) DEFAULT CHARSET = utf8mb4
ROW_FORMAT = COMPACT
COMPRESSION = 'zstd_1.3.8' REPLICA_NUM = 3 BLOCK_SIZE = 16384 USE_BLOOM_FILTER = FALSE TABLET_SIZE =
134217728 PCTFREE = 0
partition by range(t1)
(partition PT1 values less than (10000000),
partition PT2 values less than (20000000),
partition PT3 values less than (30000000),
partition PT4 values less than (40000000),
partition PT5 values less than (50000000),
partition PT6 values less than (60000000),
partition PT7 values less than (70000000),
partition PT8 values less than (80000000),
partition PT9 values less than (90000000),
partition PT10 values less than (100000000));
select * from tb_dim_benchmark_range_partitioned limit 1;
# 10000000,c5181f1335efd950960f41cbecb1ab0ed97c43502252b99834f4b6905ea7f7490ca72e1d676bbe9b77016d23e52ada249f2c,
2b5480769a360133d57f09cba16d1c449cc06b42b614bcfa3f9db6bbf7a04bac2be1d373d11c63a77676daf53111c2321b32,
db88f926925d87175aa4be6740f6f2f49d8f8b38f0d0efff2e5e832f3c1aec21e06cc4f2f0b5053e0b9fbab8a16cce80b9ff,
9c0b94cdde25b68264704c890d141444d28544a7ce4955856b3115f913442ec4bc741f033477e366005c927e41842a7cd9be,
4d69eedaae9e42b4ab7388e66992efddfa39cbb6802cf69b97c5892070a68e6eed51f823770587771a49cbbd1b7be1f2e024,
c60b30f6c4e1b3c02d6fb2de58badf8097f782a8534e0c9dc78497ede12b2573e2d9441e0596f37739d26f0830918fb03ff5,
a8a01cbe3bd44e6d52b7e83bd020a23ae305713fd376a0627f610302018c39ec3aa540519dccceb764324282dfbf0bdda6cc,
fd358773a94c1770980e92e66fcd9e4f70d6f3ef35dff86c65a97826698c750489682c2d1d36ab75ddb588da65b61cd6fc63,
cb8a60222389c9ff9ff4e4e492a4f16ed7ea0e6b781379afc7fad78539fbf8da54b0ef8ea7ef9680543ebc0c18a908092bd1,
9cdbf58a3d454d2b14ebf17167d045887ab5eb3a21d3916acc393475011a079c350295fa8b4b324dab63a00f1fbadfb22edb,
cda510824ef5bc82cd4e014c851ed367dbd6da8828cc261070a0db9cc9341764baf445506a12a7eb7265434f29d63c65b3a1,
8d7c4bbcd42364b93b8cae11eff8f50115e36f1f4f4e6a492687bc2374444c4eaf80e1903eb13fcdfbea6f00de999e0f0587,
107b23e4b7e5a16149a8ea7f75c45c607bb5974cbbdf36077615d92591f4830ec5b2b33945d82e8e526f92cb0072cbf8a260,
cda4ab39b6f2b67d1d283077a1beb01771639eff1ae371372bb2555de594699821d43509fdd7014bcf3e5098bd13c30c8199,
a330f59ee2e48051362241f9a24ba1adad4b61fdd18676cd209799bbec6775dc01120abb0e157589d3f594051b5ae2dd6572,
b8e98c3979610c67ea65433a560ab6cf8663c9de201ae1051a14034b317f90aaa1085b49eba3d86748677f4e0575169fb76c,
6753542147a9cf38f4d040f205483a798d1d2a2c0cf2283ec98c735bf82422a8ecdea432cee8c76a00917b8add7eac5aa0b4,
8d8e0c2caeed82f21ddb288affe2fb567c008e8982cb5a4d07343dc4fd6679f550856649fe4bd40eec9747485c660b01e55e,,
c261014cf13c462815e0afece1512409d2549a699e33eaf8cb23b0b23719c870c83817fcaa7466d5d88a1ae240458ba0201a,
2ac55e6bc39eb79694bf00c2b69768365b7833d9f0cb7df078525d9ab98eba5ce2bfc3cc1fb9f4398c49c16073fb5d863172,
77ab6010d7bc664b6861927322276b2d35d4f5ff2d6bc2eec3da9ef936ae836dfbed6783a8c7f9970e19d46e43b52e49a0f6,
4109f993c94f8ca40c6932d01a726fb173beb60e34b57bf488f86fe9e6c12f7f7497c720fa95099c6a43cb3442444b367ea4,
7891bdf8a52dc19d311f392fa5f34509c6dcb33b8b8e291131ca5d46c517ea0933868874244aff1b3345ea5279fe0c659709,
200e69e8ec8e6104834596c2fefe8ed772ba9b7de4f1287c91c3b91469dd985fbb93d55a9497b2606ae9003975458b6b054b,
a2de28933b2cf1f9166cf3aab732f5c6b68967eddef0472a8577a82f37e77bcfc45a5e0adc11d382160d3c84ec14e0e75b5d,
1fa6bcf4d9ef2076aa016e78db575595a9155dfe6484a9812ae690fc20c244bf2d09355ba7dbc32495330a21b6e3c893ba6b,
b01a0b4ba3d8ae159d330720bb8baffe3ad2504b221151b8f68304ed7c14a03d21f75a4e6ad16873ea0c8904717478d3f7c4,
a00ae3e9a8c89f5a0f0fae92934d23adeb9117ef7c91f80f0d5306eca558b77422f273283e867a6b7320e91895087e652ed7
CREATE TABLE `data_gen_source` (`t1` BIGINT, `t2` VARCHAR, `proctime` AS PROCTIME()) WITH (
'connector' = 'datagen',
'fields.t1.kind' = 'random',
'fields.t1.min' = '1',
'fields.t1.max' = '100000',
'rows-per-second' = '100000000'
);
CREATE TABLE `tb_dim_benchmark_1`(
`t1` BIGINT,
`t2` VARCHAR,
……
`t30` VARCHAR,
PRIMARY KEY (`t1`) NOT ENFORCED
) WITH (
'connector' = 'jdbc',
'url' = '',
'driver' = 'com.mysql.jdbc.Driver',
'sink.buffer-flush.max-rows' = '500',
'table-name' = 'tb_dim_benchmark_range_partitioned_10w'
);
CREATE TABLE blackhole_table (
`t1` BIGINT,
`t2` VARCHAR,
……
`t30` VARCHAR
) WITH ('connector' = 'blackhole');
INSERT INTO blackhole_table
SELECT tb1.`t1`,tb2.`t2`,tb2.`t3`,tb2.`t4`,tb2.`t5`,tb2.`t6`,tb2.`t7`,tb2.`t8`,tb2.`t9`,tb2.`t10`,tb2.`t11`,tb2.
`t12`,tb2.`t13`,tb2.`t14`,tb2.`t15`,tb2.`t16`,tb2.`t17`,tb2.`t18`,tb2.`t19`,tb2.`t21`,tb2.`t22`,tb2.`t23`,tb2.
`t24`,tb2.`t25`,tb2.`t26`,tb2.`t27`,tb2.`t28`,tb2.`t29`,tb2.`t30`
FROM `data_gen_source` tb1
LEFT JOIN `tb_dim_benchmark_1` FOR SYSTEM_TIME as of tb1.`proctime` as tb2 ON tb1.`t1` = tb2.`t1`;
维表 1 对 1 关联,即 DataGen 生成随机值与 OceanBase(索引字段)和HBase(RowKey)关联,测试数据如下表所示。
维表 1 对 N 关联,即 DataGen 生成随机值与 OceanBase(二级索引列)关联, 测试那颗数据如下表所示。
维表数据量在 2000 万及 1 亿条(大数据量)时,低任务并行度下的 OceanBase QPS 优于 HBase,高任务并行度下 OceanBase 相比 HBase 有 3-4 倍性能提升,优势明显。
维表数据量在 10w(小数据量)时,低任务并行度下 HBase QPS 略高于 OceanBase,高并行度下 OceanBase 优势明显。
对 OceanBase 使用非索引列关联性能较差,后续使用需注意大维表关联时关联字段加索引,实时计算平台可从平台功能角度优化,例如用户关联了非索引列则在 SQL 校验阶段提示用户创建索引。
对 OceanBase 使用二级索引列关联(1 对 N 关联)性能良好,可满足较高 QPS 业务场景需求。
往期推荐