小案例之鸡生蛋还是蛋生鸡？

原创 xiongcc PostgreSQL学徒 2023-12-03

收录于合集 #故障案例 36个

前言

这几天一直迷迷糊糊脑袋不清醒，没错我也不幸中招了🐑，周一确诊反复低烧高烧，周二好了一天，嘚瑟不到一天就又开始反复发烧，撑到了第四天，脑袋终于清醒多了，也有时间回过头来看工作上或是群里小伙伴提的一些问题。

正文

第一个问题是："各位大佬，想问下在手工进行分析之后，为什么这些数据还是 0 呢？"

原来这个表是基于 TimescaleDB 的一个超表，分区在 TimescaleDB 中被称之为 chunk ，TimescaleDB 会自动调整 chunk 的大小，且按时间、空间自动分片。

因此这个问题现象和分区表是类似的，看个栗子 👇🏻

CREATE TABLE ptab01 (
    id         int not null,
    tm         timestamptz primary key not null
) PARTITION BY RANGE (tm);
create table ptab01_202201 partition of ptab01 for values from ('2022-01-01') to ('2022-02-01');
create table ptab01_202202 partition of ptab01 for values from ('2022-02-01') to ('2022-03-01');
create table ptab01_202203 partition of ptab01 for values from ('2022-03-01') to ('2022-04-01');
create table ptab01_202204 partition of ptab01 for values from ('2022-04-01') to ('2022-05-01');
create table ptab01_202205 partition of ptab01 for values from ('2022-05-01') to ('2022-06-01');
create table ptab01_202206 partition of ptab01 for values from ('2022-06-01') to ('2022-07-01');

insert into ptab01 select extract(epoch from seq), seq from generate_series('2022-01-01'::timestamptz, '2022-06-30 23:59:59'::timestamptz, interval '10 seconds') as seq;
  
postgres=# select n_tup_ins,n_live_tup,last_analyze,last_autoanalyze from pg_stat_user_tables where relname = 'ptab01_202201';
 n_tup_ins | n_live_tup |         last_analyze          |       last_autoanalyze        
-----------+------------+-------------------------------+-------------------------------
    267840 |     267840 | 2022-12-15 15:16:19.605814+08 | 2022-12-15 15:11:03.496317+08
(1 row)

postgres=# select n_tup_ins,n_live_tup,last_analyze,last_autoanalyze from pg_stat_user_tables where relname = 'ptab01';
 n_tup_ins | n_live_tup |         last_analyze          | last_autoanalyze 
-----------+------------+-------------------------------+------------------
         0 |          0 | 2022-12-15 15:16:19.549188+08 | 
(1 row)

可以看到父表的数据是空的，数据体现在子表上。当然这是最常见的一种情况，还有一种情况，这里先卖个关子，继续往下看。

我们知道在 PostgreSQL 中，统计信息的收集是当表上行数的增删改到达了阈值 (autovacuum_analyze_scale_factor + autovacuum_analyze_threshold) 之后由后台进程 autovacuum 来收集的，而统计信息又是由 analyze 来更新的，这貌似就陷入到了鸡生蛋还是蛋生鸡这样一个孰先孰后的问题。对于这种问题，其实一般数据库内都会有一个所谓的"计数器"，PostgreSQL 也不例外，这个计数器正是 pg_stat_all_tables，看下视图定义 👇🏻

View definition:
 SELECT c.oid AS relid,
    n.nspname AS schemaname,
    c.relname,
    pg_stat_get_numscans(c.oid) AS seq_scan,
    pg_stat_get_tuples_returned(c.oid) AS seq_tup_read,
    sum(pg_stat_get_numscans(i.indexrelid))::bigint AS idx_scan,
    sum(pg_stat_get_tuples_fetched(i.indexrelid))::bigint + pg_stat_get_tuples_fetched(c.oid) AS idx_tup_fetch,
    pg_stat_get_tuples_inserted(c.oid) AS n_tup_ins,
    pg_stat_get_tuples_updated(c.oid) AS n_tup_upd,
    pg_stat_get_tuples_deleted(c.oid) AS n_tup_del,
    pg_stat_get_tuples_hot_updated(c.oid) AS n_tup_hot_upd,
    pg_stat_get_live_tuples(c.oid) AS n_live_tup,
    pg_stat_get_dead_tuples(c.oid) AS n_dead_tup,
    pg_stat_get_mod_since_analyze(c.oid) AS n_mod_since_analyze,
    pg_stat_get_ins_since_vacuum(c.oid) AS n_ins_since_vacuum,
    pg_stat_get_last_vacuum_time(c.oid) AS last_vacuum,
    pg_stat_get_last_autovacuum_time(c.oid) AS last_autovacuum,
    pg_stat_get_last_analyze_time(c.oid) AS last_analyze,
    pg_stat_get_last_autoanalyze_time(c.oid) AS last_autoanalyze,
    pg_stat_get_vacuum_count(c.oid) AS vacuum_count,
    pg_stat_get_autovacuum_count(c.oid) AS autovacuum_count,
    pg_stat_get_analyze_count(c.oid) AS analyze_count,
    pg_stat_get_autoanalyze_count(c.oid) AS autoanalyze_count
   FROM pg_class c
     LEFT JOIN pg_index i ON c.oid = i.indrelid
     LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
  WHERE c.relkind = ANY (ARRAY['r'::"char", 't'::"char", 'm'::"char", 'p'::"char"])
  GROUP BY c.oid, n.nspname, c.relname;

一目了然，关于表上各个维度的计数器，而这个统计的过程则是由 stats collector 进程来做的。而其他的比如 pg_stat_user_tables / pg_stat_sys_tables 则是基于 pg_stat_all_tables 的视图。因此，数据库在运行过程中，stats collector 会实时收集数据库内的活动状态，那么这里就引申到 stats collector 进程了

/* ----------
 * pgstat_init() -
 *
 * Called from postmaster at startup. Create the resources required
 * by the statistics collector process.  If unable to do so, do not
 * fail --- better to let the postmaster start with stats collection
 * disabled.
 * ----------
 */
void
pgstat_init(void)
{
 ACCEPT_TYPE_ARG3 alen;
 struct addrinfo *addrs = NULL,
      *addr,
    hints;
 int   ret;
 fd_set  rset;
 struct timeval tv;
 char  test_byte;
 int   sel_res;
 int   tries = 0;

#define TESTBYTEVAL ((char) 199)

 /*
  * This static assertion verifies that we didn't mess up the calculations
  * involved in selecting maximum payload sizes for our UDP messages.
  * Because the only consequence of overrunning PGSTAT_MAX_MSG_SIZE would
  * be silent performance loss from fragmentation, it seems worth having a
  * compile-time cross-check that we didn't.
  */
 StaticAssertStmt(sizeof(PgStat_Msg) <= PGSTAT_MAX_MSG_SIZE,
      "maximum stats message size exceeds PGSTAT_MAX_MSG_SIZE");

 /*
  * Create the UDP socket for sending and receiving statistic messages
  */
 hints.ai_flags = AI_PASSIVE;
 hints.ai_family = AF_UNSPEC;
 hints.ai_socktype = SOCK_DGRAM;
 hints.ai_protocol = 0;
 hints.ai_addrlen = 0;
 hints.ai_addr = NULL;
 hints.ai_canonname = NULL;
 hints.ai_next = NULL;
 ret = pg_getaddrinfo_all("localhost", NULL, &hints, &addrs);
 if (ret || !addrs)
 {
  ereport(LOG,
    (errmsg("could not resolve \"localhost\": %s",
      gai_strerror(ret))));
  goto startup_failed;
 }

在代码中，stats collector 进程初始化的步骤中有一步会去绑定 UDP 端口，检查一下

[postgres@xiongcc ~]$ netstat -anpo | grep postgres | grep udp
(Not all processes could be identified, non-owned process info
 will not be shown, you would have to be root to see it all.)
udp        0      0 127.0.0.1:57528         127.0.0.1:57528         ESTABLISHED 24995/postgres       off (0.00/0/0)

[postgres@xiongcc ~]$ ps -ef | grep 'stats' | grep -v grep
postgres 25002 24995  0 17:56 ?        00:00:00 postgres: stats collector

确实如此。那么让我们故意使个坏，提供一个错误的地址映射给数据库

[root@xiongcc ~]# echo "111.112.113.114   localhost" > /etc/hosts    ---演示使用

重启数据库之后，这次就没有 stats collector 进程了。在日志中也有所体现

[postgres@xiongcc ~]$ ps -ef | grep  postgres
root     22471 22438  0 17:25 pts/4    00:00:00 su - postgres
postgres 22472 22471  0 17:25 pts/4    00:00:00 -bash
postgres 25348     1  0 18:01 ?        00:00:00 /usr/pgsql-14/bin/postgres -D pgdata
postgres 25349 25348  0 18:01 ?        00:00:00 postgres: logger 
postgres 25351 25348  0 18:01 ?        00:00:00 postgres: checkpointer 
postgres 25352 25348  0 18:01 ?        00:00:00 postgres: background writer 
postgres 25353 25348  0 18:01 ?        00:00:00 postgres: walwriter 
postgres 25354 25348  0 18:01 ?        00:00:00 postgres: logical replication launcher

2022-12-15 18:01:07.335 CST [25348] LOG:  could not bind socket for statistics collector: Cannot assign requested address
2022-12-15 18:01:07.335 CST [25348] LOG:  disabling statistics collector for lack of working socket
2022-12-15 18:01:07.335 CST [25348] WARNING:  autovacuum not started because of misconfiguration
2022-12-15 18:01:07.335 CST [25348] HINT:  Enable the "track_counts" option.

当然这个是最容易想到的一个方式，stats collector 进程没了，当然就没有统计信息可言了，数据库陷入到了一个危险的境地，autovacuum 也会罢工，这种方式很直观，巡检一下就可以正常检测出来。

别急，让我们再看一个骚一点隐晦一点的

[root@xiongcc ~]# echo "" > /etc/hosts    ---演示使用

注意差异，这次变成了 ipv6

[postgres@xiongcc ~]$ netstat -anpo | grep postgres | grep udp
(Not all processes could be identified, non-owned process info
 will not be shown, you would have to be root to see it all.)
udp6       0      0 ::1:59223               ::1:59223               ESTABLISHED 28291/postgres       off (0.00/0/0)

小小操作一下，让本地环回地址禁止所有的 ipv6

[root@xiongcc ~]# ifconfig lo
lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        inet6 ::1  prefixlen 128  scopeid 0x10<host>  ---ipv6
        loop  txqueuelen 1  (Local Loopback)
        RX packets 46065745  bytes 99947290698 (93.0 GiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 46065745  bytes 99947290698 (93.0 GiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
       
[root@xiongcc ~]# echo "net.ipv6.conf.lo.disable_ipv6 = 1" >> /etc/sysctl.conf   ---进程环回地址 ipv6 
[root@xiongcc ~]# sysctl -p /etc/sysctl.conf   
[root@xiongcc ~]# ifconfig lo
lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        loop  txqueuelen 1  (Local Loopback)
        RX packets 46065739  bytes 99947288450 (93.0 GiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 46065739  bytes 99947288450 (93.0 GiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
        
[root@xiongcc ~]# netstat -anpo | grep postgres | grep udp
udp6       0      0 ::1:59223               ::1:59223               ESTABLISHED 28291/postgres       off (0.00/0/0)

这样我们就关闭掉了环回地址的 ipv6 ，但是此时数据库还在兢兢业业地在收集统计信息中，奈何ipv6此时已经没有响应。无奈数据库只能沿用陈旧的统计信息将就将就

[postgres@xiongcc ~]$ ps -ef | grep postgres
postgres 28291     1  0 18:41 ?        00:00:00 /usr/pgsql-14/bin/postgres -D pgdata
postgres 28292 28291  0 18:41 ?        00:00:00 postgres: logger 
postgres 28294 28291  0 18:41 ?        00:00:00 postgres: checkpointer 
postgres 28295 28291  0 18:41 ?        00:00:00 postgres: background writer 
postgres 28296 28291  0 18:41 ?        00:00:00 postgres: walwriter 
postgres 28297 28291  0 18:41 ?        00:00:00 postgres: autovacuum launcher 
postgres 28298 28291  0 18:41 ?        00:00:00 postgres: stats collector 
postgres 28299 28291  0 18:41 ?        00:00:00 postgres: logical replication launcher

[postgres@xiongcc log]$ tail -f postgresql-15.log 
2022-12-15 18:41:30.921 CST [28293] LOG:  database system was shut down at 2022-12-15 18:41:30 CST
2022-12-15 18:41:30.925 CST [28291] LOG:  database system is ready to accept connections
2022-12-15 18:45:41.257 CST [28297] LOG:  using stale statistics instead of current ones because stats collector is not responding
2022-12-15 18:45:51.363 CST [28641] LOG:  using stale statistics instead of current ones because stats collector is not responding  ---使用陈旧的统计信息
...

因此在这段期间做的任何操作都不会有统计信息（新的对象没有统计信息，旧的对象则使用最近一次的统计信息）

postgres=# create table test(id int,info text);
CREATE TABLE
postgres=# insert into test select n,md5(random()::text) from generate_series(1,100000) as n;
INSERT 0 100000
postgres=# analyze verbose test;
INFO:  analyzing "public.test"
INFO:  "test": scanned 834 of 834 pages, containing 100000 live rows and 0 dead rows; 30000 rows in sample, 100000 estimated total rows
ANALYZE
postgres=# delete from test where id < 50000;
DELETE 49999
postgres=# update test set id = 99 where id > 60000;
UPDATE 40000
postgres=# select last_vacuum,last_autovacuum from pg_stat_user_tables where relname = 'test';   
^CCancel request sent
ERROR:  canceling statement due to user request

恢复之后，可以看到 test 这个新建对象的统计信息全是空的 👇🏻

postgres=# select *,last_vacuum,last_autovacuum from pg_stat_user_tables where relname = 'test';
  relid  | schemaname | relname | seq_scan | seq_tup_read | idx_scan | idx_tup_fetch | n_tup_ins | n_tup_upd | n_tup_del | 
n_tup_hot_upd | n_live_tup | n_dead_tup | n_mod_since_analyze | n_ins_since_vacuum | last_vacuum | last_autovacuum | last_a
nalyze | last_autoanalyze | vacuum_count | autovacuum_count | analyze_count | autoanalyze_count | last_vacuum | last_autova
cuum 
---------+------------+---------+----------+--------------+----------+---------------+-----------+-----------+-----------+-
--------------+------------+------------+---------------------+--------------------+-------------+-----------------+-------
-------+------------------+--------------+------------------+---------------+-------------------+-------------+------------
-----
 2293089 | public     | test    |        0 |            0 |          |               |         0 |         0 |         0 | 
            0 |          0 |          0 |                   0 |                  0 |             |                 |       
       |                  |            0 |                0 |             0 |                 0 |             | 
(1 row)

至于为什么前面会夯住，不难理解，查询需要借助 stats collector 后台进程去查询统计信息，而 stats collector 已经没有响应了，用 strace 跟踪一下👇🏻

[postgres@xiongcc log]$ strace -p 28298
strace: Process 28298 attached
epoll_wait(4, [{EPOLLIN, {u32=46012504, u64=46012504}}], 1, -1) = 1
recvfrom(9, "\1\0\0\0 \0\0\0T\206|\211\332\222\2\0004\345t\211\332\222\2\0D6\0\0\332\222\2\0", 1000, 0, NULL, NULL) = 32
open("pg_stat_tmp/global.tmp", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 5
fstat(5, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8e4134b000
open("pg_stat_tmp/db_13892.tmp", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 6
fstat(6, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8e4134a000
write(6, "\242\274\245\1T[\n\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 4096) = 4096
write(6, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 4096) = 4096
write(6, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 4096) = 4096
write(6, "\0\0\0\0\0\0\0\205\0\0\0\0\0\0\0i_Minh\0\10#\0\0\0\0\0\0\0k"..., 4096) = 4096
write(6, "\0\0\0\0\0H\2\0\0\0\0\0\0-\2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 641) = 641
close(6)                                = 0
munmap(0x7f8e4134a000, 4096)            = 0
rename("pg_stat_tmp/db_13892.tmp", "pg_stat_tmp/db_13892.stat") = 0
open("pg_stat_tmp/db_0.tmp", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 6
fstat(6, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8e4134a000
write(6, "\242\274\245\1T\275\4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 2225) = 2225
close(6)                                = 0
munmap(0x7f8e4134a000, 4096)            = 0
rename("pg_stat_tmp/db_0.tmp", "pg_stat_tmp/db_0.stat") = 0
write(5, "\242\274\245\1L\207|\211\332\222\2\0!:\0\0\0\0\0\0\"\0\0\0\0\0\0\0I\301\3\0"..., 1335) = 1335
close(5)                                = 0
munmap(0x7f8e4134b000, 4096)            = 0
rename("pg_stat_tmp/global.tmp", "pg_stat_tmp/global.stat") = 0

小结

PostgreSQL 使用标准 POSIX 函数 getaddrinfo(3) 来解析本地主机，然后遍历该调用返回的所有地址，创建 UDP 套接字并进行连通性测试，直到有一个可用的套接字。因此假如在进行一些诸如主机迁移、数据库迁移之类的事情，不要忘记这个可能的危害！否则表膨胀/年龄炸弹/长事务等危害会接踵而至。

That's all.

继续滑动看下一个

《鱿鱼游戏2》今天下午四点开播，网友无心上班了，导演悄悄剧透

人民日报征集“中美友好合作故事”，令人感奋

刘恺威近况曝光，父亲刘丹证实已分手，目前失业在家，没有资源

紧急通告！三高的“克星”终于被找到了！！不是吃素和控糖,而是多喝它....

话费充值活动来了：95元充值100元电话费！

小案例之鸡生蛋还是蛋生鸡？

前言

正文

小结

您可能也对以下帖子感兴趣

《鱿鱼游戏2》今天下午四点开播，网友无心上班了，导演悄悄剧透

人民日报征集“中美友好合作故事”，令人感奋

刘恺威近况曝光，父亲刘丹证实已分手，目前失业在家，没有资源

紧急通告！三高的“克星”终于被找到了！！不是吃素和控糖,而是多喝它....

话费充值活动来了：95元充值100元电话费！

生成图片，分享到微信朋友圈

小案例之鸡生蛋还是蛋生鸡？

前言

正文

小结

您可能也对以下帖子感兴趣