项目地址:https://github.com/patroni/patroni

官方文档:https://patroni.readthedocs.io/en/latest/

中文手册:https://postgres-cn.github.io/patroni-doccn/

参考:Percona Documentation

服务规划

hostname ip 服务
node1 172.17.83.2 etcd,postgresql,Patroni,haproxy
node2 172.17.83.8 etcd,postgresql,Patroni,haproxy
node3 172.17.83.12 etcd,postgresql,Patroni,haproxy

准备工作

调整 hostname

# 修改 hostname
hostnamectl set-hostname node1
# 修改 hosts文件 /etc/hosts
172.17.83.2 node1
172.17.83.8 node2
172.17.83.12 node3

Watchdog

# 加载 `softdog` 内核模块
modprobe softdog
chown postgres /dev/watchdog
# 确保 Watchdog 设备文件存在
[root@node2 pgsql]# ll /dev/watchdog
crw------- 1 postgres root 10, 130 2月  12 09:54 /dev/watchdog

部署 PG14

安装PG

# 更新系统包
yum install -y epel-release
yum update -y
# 安装pg
yum install -y https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm  
yum install -y postgresql14-server

安装 Postgis

# 清理重复的包
yum install yum-utils -y
package-cleanup --dupes
# Postgis
yum install -y zlib-devel openssl postgis33_14 postgis33_14-utils

初始化

数据目录

mkdir -p /data/pgsql/
touch /data/pgsql/patroni.yml
chown -Rf postgres:postgres /data/pgsql

部署 ETCD

https://github.com/etcd-io/etcd

etcd 集群的节点时间一定要同步!

时间同步

yum install chrony -y
systemctl start chronyd.service
systemctl enable chronyd.service
systemctl status chronyd.service

配置文件

[root@VM-0-17-centos etcd]# egrep -v '^#|^$' /etc/chrony.conf
server ntpupdate.tencentyun.com iburst
driftfile /var/lib/chrony/drift
makestep 1.0 3
rtcsync
logdir /var/log/chrony

YUM 安装

在线安装

yum -y install etcd

离线安装

# 制作离线包
yumdownloader --destdir=/tmp/etcd --resolve etcd
# 安装
rpm -ivh etcd-3.3.11-2.el7.centos.x86_64.rpm

配置文件

/etc/etcd/etcd.conf

定义变量

export NODE_NAME=`hostname -f`
export NODE_IP=`hostname -i | awk '{print $1}'`

生成配置

echo "
ETCD_NAME="${NODE_NAME}"
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_CLIENT_URLS="http://${NODE_IP}:2379,http://127.0.0.1:2379"
ETCD_LISTEN_PEER_URLS="http://${NODE_IP}:2380"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://${NODE_IP}:2380"
ETCD_ADVERTISE_CLIENT_URLS="http://${NODE_IP}:2379"
ETCD_INITIAL_CLUSTER_STATE="new"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER="node1=http://172.17.83.2:2380,node2=http://172.17.83.8:2380,node3=http://172.17.83.12:2380"
" | sudo tee /etc/etcd/etcd.conf

启动测试

WARNING: Environment variable ETCDCTL_API is not set; defaults to etcdctl v2. Set environment variable ETCDCTL_API=3 to use v3 API or ETCDCTL_API=2 to use v2 API

设置环境变量

vim /etc/profile
export ETCDCTL_API=3
export ENDPOINTS=172.17.83.2:2379,172.17.83.8:2379,172.17.83.12:2379
# 生效
source /etc/profile

启动服务

# 关闭防火墙
systemctl stop firewalld && systemctl disable firewalld
# 启动etcd
systemctl start etcd.service
systemctl enable etcd.service

检查状态

# 查看节点状态
[root@node3 ~]# etcdctl member list --write-out=table
+------------------+---------+-------+--------------------------+--------------------------+
|        ID        | STATUS  | NAME  |        PEER ADDRS        |       CLIENT ADDRS       |
+------------------+---------+-------+--------------------------+--------------------------+
| 94e289847b4c3e24 | started | node2 |  http://172.17.83.8:2380 |  http://172.17.83.8:2379 |
| 9e132ce4635c9d9d | started | node1 |  http://172.17.83.2:2380 |  http://172.17.83.2:2379 |
| e32396bcdee20faf | started | node3 | http://172.17.83.12:2380 | http://172.17.83.12:2379 |
+------------------+---------+-------+--------------------------+--------------------------+
--------+-------------------------+
[root@node3 ~]# etcdctl endpoint status --endpoints=$ENDPOINTS --write-out=table
+-------------------+------------------+---------+---------+-----------+-----------+------------+
|     ENDPOINT      |        ID        | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+-------------------+------------------+---------+---------+-----------+-----------+------------+
|  172.17.83.2:2379 | 9e132ce4635c9d9d |  3.3.11 |   20 kB |     false |       366 |         10 |
|  172.17.83.8:2379 | 94e289847b4c3e24 |  3.3.11 |   20 kB |     false |       366 |         10 |
| 172.17.83.12:2379 | e32396bcdee20faf |  3.3.11 |   20 kB |      true |       366 |         10 |
+-------------------+------------------+---------+---------+-----------+-----------+------------+

数据操作

# 查询存储的所有key,并且前缀为'/'
[root@VM-0-3-centos system]# etcdctl get / --prefix
/service/pg_patroni/config
{"loop_wait":10,"retry_timeout":10,"ttl":30,"postgresql":{"parameters":{"listen_addresses":"0.0.0.0","hot_standby":"on","max_connections":2000,"shared_buffers":"4GB","max_locks_per_transaction":64,"max_prepared_transactions":0,"max_replication_slots":10,"max_wal_senders":10,"max_worker_processes":8,"track_commit_timestamp":"off","wal_keep_size":"128MB","wal_level":"replica","wal_log_hints":"on"},"use_pg_rewind":true,"use_slots":true}}
/service/pg_patroni/initialize
7428520042617295490
/service/pg_patroni/status
{"optime":24414320,"slots":{"patroni1":24414320},"retain_slots":["patroni1"]}
# 清除所有数据
[root@VM-0-3-centos system]# etcdctl del / --prefix
3

部署 Patroni

Patroni 依赖 Python3,必须 3.7 及以上版本

PIP 安装

pip install patroni[psycopg3,etcd3]

YUM 安装

yum install patroni patroni-etcd

配置文件

https://patroni.readthedocs.io/en/latest/yaml_configuration.html

定义变量

export NODE_NAME=`hostname -f`
export NODE_IP=`hostname -i | awk '{print $1}'`

生成配置

注意修改 postgres 用户的密码,pg_hba 网络的配置,etcd 的ip

echo "
scope: 'pg_patroni'
namespace: /db/
name: '${NODE_NAME}'

log:
  format: '%(asctime)s %(levelname)s: %(message)s'
  level: INFO
  max_queue_size: 1000
  traceback_level: ERROR
  type: plain

restapi:
  connect_address: '${NODE_IP}:8008'
  listen: '0.0.0.0:8008'

etcd3:
  hosts:
  - 172.17.83.2:2379
  - 172.17.83.8:2379
  - 172.17.83.12:2379
  retry_timeout: 30
  ttl: 60

bootstrap:
  dcs:
    loop_wait: 10
    retry_timeout: 10
    ttl: 30
    maximum_lag_on_failover: 1048576

    postgresql:
      use_pg_rewind: true
      use_slots: true
      parameters:
        hot_standby: 'on'
        max_connections: 2000
        shared_buffers: 4GB
        max_locks_per_transaction: 64
        max_prepared_transactions: 0
        max_replication_slots: 10
        max_wal_senders: 10
        max_worker_processes: 8
        track_commit_timestamp: 'off'
        wal_keep_size: 5GB
        wal_level: replica
        wal_log_hints: 'on'
        max_wal_size: '10GB'
        archive_mode: "on"
        archive_timeout: 600s
        archive_command: "cp -f %p /data/pgsql/data/archived/%f"

  initdb:
    - encoding: UTF8
    - locale: C
    - data-checksums

  pg_hba:
    - host all all 0.0.0.0/0 md5
    - host replication replicator 172.17.83.0/24 md5
    - host replication replicator 127.0.0.1/32 trust

postgresql:
  listen: '0.0.0.0:5432'
  connect_address: '${NODE_IP}:5432'
  bin_dir: '/usr/pgsql-14/bin/'
  data_dir: '/data/pgsql/data/'
  parameters:
    password_encryption: scram-sha-256
    unix_socket_directories: "/var/run/postgresql/"
  authentication:
    replication:
      password: 'replicator'
      username: replicator
    superuser:
      password: 'xxxx'
      username: postgres
  create_replica_methods:
      - basebackup
  basebackup:
      checkpoint: 'fast'

watchdog:
  mode: automatic
  device: /dev/watchdog

tags:
  clonefrom: false
  failover_priority: 1
  noloadbalance: false
  nostream: false
  nosync: false

" | sudo tee /data/pgsql/patroni.yml

启动集群

手动启动

# 切换 postgres
su postgres
cd /data/pgsql/
nohup patroni patroni.yml > patroni.log 2>&1 &

systemd

创建启动文件

注意修改 patroni 可执行文件路径

/etc/systemd/system/percona-patroni.service

echo "
[Unit]
Description=Patroni PostgreSQL HA
After=network.target

[Service]
Type=simple
User=postgres
Group=postgres
ExecStart=/opt/python3.8/lib/python3.8/site-packages/bin/patroni /data/pgsql/patroni.yml
ExecReload=/bin/kill -HUP $MAINPID
Restart=always
TimeoutSec=300
LimitNOFILE=4096

[Install]
WantedBy=multi-user.target
" | sudo tee /etc/systemd/system/patroni.service

启动服务

先启动一个 node1 节点,服务启动后,再依次启动其他 node节点

systemctl daemon-reload
systemctl start patroni

检查状态

[root@node1 pgsql]# patronictl -c /data/pgsql/patroni.yml list
+ Cluster: pg_patroni (7470725539388708392) --+----+-----------+----------------------+
| Member | Host         | Role    | State     | TL | Lag in MB | Tags                 |
+--------+--------------+---------+-----------+----+-----------+----------------------+
| node1  | 172.17.83.2  | Replica | streaming |  1 |         0 | failover_priority: 1 |
| node2  | 172.17.83.8  | Leader  | running   |  1 |           | failover_priority: 1 |
| node3  | 172.17.83.12 | Replica | streaming |  1 |         0 | failover_priority: 1 |
+--------+--------------+---------+-----------+----+-----------+----------------------+

HAProxy

YUM 安装

yum install haproxy -y
# 制作离线包
yumdownloader --destdir=/tmp/haproxy --resolve haproxy

配置文件

/etc/haproxy/haproxy.cfg

global
    maxconn 2000

defaults
    log global
    mode tcp
    retries 2
    timeout client 40m
    timeout connect 4s
    timeout server 40m
    timeout check 5s

listen stats
    bind *:8089
    mode http
    stats enable
    stats uri /stats
    stats refresh 10s

listen primary
    bind *:5000
    option httpchk /primary 
    http-check expect status 200
    default-server inter 3s fall 3 rise 2 on-marked-down shutdown-sessions
    server node1 node1:5432 maxconn 2000 check port 8008
    server node2 node2:5432 maxconn 2000 check port 8008
    server node3 node3:5432 maxconn 2000 check port 8008

启动验证

# 启动服务
systemctl enable --now haproxy

监控管理端:http://172.17.83.8:8089/stats

image.png|800

负载均衡

haproxy 的 5000 端口 会转发到 PG 集群的 Leader 节点的 5432

再使用负载均衡服务,转发 node1,node2,node3 的 5000 端口

应用服务配置负载均衡的地址,即可实现PG高可用。

高可用验证

主动切换

检查集群状态,node3 为 Leader 节点

image.png|800

切换节点,需指定集群名称:pg_patroni

patronictl -c /data/pgsql/patroni.yml switchover pg_patroni

image.png|800

检查节点状态,成功切换为 node1 为 Leader 节点

image.png|800

关停服务

关停 PG

使用 kill 命令,停止 postgresql 进程,patroni 会主动拉起 postgresql 服务。

关停 patroni

停止 patroni 服务,会同时停止 postgresql 进程,PG 集群会重新选主,并剔除挂掉的节点。 重新启动 patroni 服务,PG 集群恢复1主2从,自动同步数据。

systemctl stop patroni

image.png|800

systemctl start patroni

image.png|800