一 前言
二 大数据架构中的实时计算
1 实时计算场景
2 Flink SQL 实时计算
# 源表 - 用户订单数据,代表某个用户(user_id)在 timestamp 时按 price 的价格购买了商品(item_id)
CREATE TEMPORARY TABLE user_action_source (
`timestamp` BIGINT,
`user_id` BIGINT,
`item_id` BIGINT,
`price` DOUBLE,SQs
) WITH (
'connector' = 'kafka',
'topic' = '<your_topic>',
'properties.bootstrap.servers' = 'your_kafka_server:9092',
'properties.group.id' = '<your_consumer_group>'
'format' = 'json',
'scan.startup.mode' = 'latest-offset'
);
# 维表 - 物品详情
CREATE TEMPORARY TABLE item_detail_dim (
id STRING,
catagory STRING,
PRIMARY KEY (id) NOT ENFORCED
) WITH (
'connector' = 'redis',
'host' = '<your_redis_host>',
'port' = '<your_redis_port>',
'password' = '<your_redis_password>',
'dbNum' = '<your_db_num>'
);
# 结果表 - 按时间(分钟)和分类的 GMV 输出
CREATE TEMPORARY TABLE gmv_output (
time_minute STRING,
catagory STRING,
gmv DOUBLE,
PRIMARY KEY (time_minute, catagory)
) WITH (
type='rds',
url='<your_jdbc_mysql_url_with_database>',
tableName='<your_table>',
userName='<your_mysql_database_username>',
password='<your_mysql_database_password>'
);
# 处理过程
INSERT INTO gmv_output
SELECT
TUMBLE_START(s.timestamp, INTERVAL '1' MINUTES) as time_minute,
d.catagory,
SUM(d.price) as gmv
FROM
user_action_source s
JOIN item_detail_dim FOR SYSTEM_TIME AS OF PROCTIME() as d
ON s.item_id = d.id
GROUP BY TUMBLE(s.timestamp, INTERVAL '1' MINUTES), d.catagory;