项目地址:https://github.com/patroni/patroni
官方文档:https://patroni.readthedocs.io/en/latest/
中文手册:https://postgres-cn.github.io/patroni-doccn/
服务规划
hostname | ip | 服务 |
---|---|---|
node1 | 172.17.83.2 | etcd,postgresql,Patroni,haproxy |
node2 | 172.17.83.8 | etcd,postgresql,Patroni,haproxy |
node3 | 172.17.83.12 | etcd,postgresql,Patroni,haproxy |
准备工作
调整 hostname
# 修改 hostname
hostnamectl set-hostname node1
# 修改 hosts文件 /etc/hosts
172.17.83.2 node1
172.17.83.8 node2
172.17.83.12 node3
Watchdog
# 加载 `softdog` 内核模块
modprobe softdog
chown postgres /dev/watchdog
# 确保 Watchdog 设备文件存在
[root@node2 pgsql]# ll /dev/watchdog
crw------- 1 postgres root 10, 130 2月 12 09:54 /dev/watchdog
部署 PG14
安装PG
# 更新系统包
yum install -y epel-release
yum update -y
# 安装pg
yum install -y https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm
yum install -y postgresql14-server
安装 Postgis
# 清理重复的包
yum install yum-utils -y
package-cleanup --dupes
# Postgis
yum install -y zlib-devel openssl postgis33_14 postgis33_14-utils
初始化
数据目录
mkdir -p /data/pgsql/
touch /data/pgsql/patroni.yml
chown -Rf postgres:postgres /data/pgsql
部署 ETCD
etcd 集群的节点时间一定要同步!
时间同步
yum install chrony -y
systemctl start chronyd.service
systemctl enable chronyd.service
systemctl status chronyd.service
配置文件
[root@VM-0-17-centos etcd]# egrep -v '^#|^$' /etc/chrony.conf
server ntpupdate.tencentyun.com iburst
driftfile /var/lib/chrony/drift
makestep 1.0 3
rtcsync
logdir /var/log/chrony
YUM 安装
在线安装
yum -y install etcd
离线安装
# 制作离线包
yumdownloader --destdir=/tmp/etcd --resolve etcd
# 安装
rpm -ivh etcd-3.3.11-2.el7.centos.x86_64.rpm
配置文件
/etc/etcd/etcd.conf
定义变量
export NODE_NAME=`hostname -f`
export NODE_IP=`hostname -i | awk '{print $1}'`
生成配置
echo "
ETCD_NAME="${NODE_NAME}"
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_CLIENT_URLS="http://${NODE_IP}:2379,http://127.0.0.1:2379"
ETCD_LISTEN_PEER_URLS="http://${NODE_IP}:2380"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://${NODE_IP}:2380"
ETCD_ADVERTISE_CLIENT_URLS="http://${NODE_IP}:2379"
ETCD_INITIAL_CLUSTER_STATE="new"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER="node1=http://172.17.83.2:2380,node2=http://172.17.83.8:2380,node3=http://172.17.83.12:2380"
" | sudo tee /etc/etcd/etcd.conf
启动测试
WARNING: Environment variable ETCDCTL_API is not set; defaults to etcdctl v2. Set environment variable ETCDCTL_API=3 to use v3 API or ETCDCTL_API=2 to use v2 API
设置环境变量
vim /etc/profile
export ETCDCTL_API=3
export ENDPOINTS=172.17.83.2:2379,172.17.83.8:2379,172.17.83.12:2379
# 生效
source /etc/profile
启动服务
# 关闭防火墙
systemctl stop firewalld && systemctl disable firewalld
# 启动etcd
systemctl start etcd.service
systemctl enable etcd.service
检查状态
# 查看节点状态
[root@node3 ~]# etcdctl member list --write-out=table
+------------------+---------+-------+--------------------------+--------------------------+
| ID | STATUS | NAME | PEER ADDRS | CLIENT ADDRS |
+------------------+---------+-------+--------------------------+--------------------------+
| 94e289847b4c3e24 | started | node2 | http://172.17.83.8:2380 | http://172.17.83.8:2379 |
| 9e132ce4635c9d9d | started | node1 | http://172.17.83.2:2380 | http://172.17.83.2:2379 |
| e32396bcdee20faf | started | node3 | http://172.17.83.12:2380 | http://172.17.83.12:2379 |
+------------------+---------+-------+--------------------------+--------------------------+
--------+-------------------------+
[root@node3 ~]# etcdctl endpoint status --endpoints=$ENDPOINTS --write-out=table
+-------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+-------------------+------------------+---------+---------+-----------+-----------+------------+
| 172.17.83.2:2379 | 9e132ce4635c9d9d | 3.3.11 | 20 kB | false | 366 | 10 |
| 172.17.83.8:2379 | 94e289847b4c3e24 | 3.3.11 | 20 kB | false | 366 | 10 |
| 172.17.83.12:2379 | e32396bcdee20faf | 3.3.11 | 20 kB | true | 366 | 10 |
+-------------------+------------------+---------+---------+-----------+-----------+------------+
数据操作
# 查询存储的所有key,并且前缀为'/'
[root@VM-0-3-centos system]# etcdctl get / --prefix
/service/pg_patroni/config
{"loop_wait":10,"retry_timeout":10,"ttl":30,"postgresql":{"parameters":{"listen_addresses":"0.0.0.0","hot_standby":"on","max_connections":2000,"shared_buffers":"4GB","max_locks_per_transaction":64,"max_prepared_transactions":0,"max_replication_slots":10,"max_wal_senders":10,"max_worker_processes":8,"track_commit_timestamp":"off","wal_keep_size":"128MB","wal_level":"replica","wal_log_hints":"on"},"use_pg_rewind":true,"use_slots":true}}
/service/pg_patroni/initialize
7428520042617295490
/service/pg_patroni/status
{"optime":24414320,"slots":{"patroni1":24414320},"retain_slots":["patroni1"]}
# 清除所有数据
[root@VM-0-3-centos system]# etcdctl del / --prefix
3
部署 Patroni
Patroni 依赖 Python3,必须 3.7 及以上版本
PIP 安装
pip install patroni[psycopg3,etcd3]
YUM 安装
yum install patroni patroni-etcd
配置文件
https://patroni.readthedocs.io/en/latest/yaml_configuration.html
定义变量
export NODE_NAME=`hostname -f`
export NODE_IP=`hostname -i | awk '{print $1}'`
生成配置
注意修改 postgres 用户的密码,pg_hba 网络的配置,etcd 的ip
echo "
scope: 'pg_patroni'
namespace: /db/
name: '${NODE_NAME}'
log:
format: '%(asctime)s %(levelname)s: %(message)s'
level: INFO
max_queue_size: 1000
traceback_level: ERROR
type: plain
restapi:
connect_address: '${NODE_IP}:8008'
listen: '0.0.0.0:8008'
etcd3:
hosts:
- 172.17.83.2:2379
- 172.17.83.8:2379
- 172.17.83.12:2379
retry_timeout: 30
ttl: 60
bootstrap:
dcs:
loop_wait: 10
retry_timeout: 10
ttl: 30
maximum_lag_on_failover: 1048576
postgresql:
use_pg_rewind: true
use_slots: true
parameters:
hot_standby: 'on'
max_connections: 2000
shared_buffers: 4GB
max_locks_per_transaction: 64
max_prepared_transactions: 0
max_replication_slots: 10
max_wal_senders: 10
max_worker_processes: 8
track_commit_timestamp: 'off'
wal_keep_size: 5GB
wal_level: replica
wal_log_hints: 'on'
max_wal_size: '10GB'
archive_mode: "on"
archive_timeout: 600s
archive_command: "cp -f %p /data/pgsql/data/archived/%f"
initdb:
- encoding: UTF8
- locale: C
- data-checksums
pg_hba:
- host all all 0.0.0.0/0 md5
- host replication replicator 172.17.83.0/24 md5
- host replication replicator 127.0.0.1/32 trust
postgresql:
listen: '0.0.0.0:5432'
connect_address: '${NODE_IP}:5432'
bin_dir: '/usr/pgsql-14/bin/'
data_dir: '/data/pgsql/data/'
parameters:
password_encryption: scram-sha-256
unix_socket_directories: "/var/run/postgresql/"
authentication:
replication:
password: 'replicator'
username: replicator
superuser:
password: 'xxxx'
username: postgres
create_replica_methods:
- basebackup
basebackup:
checkpoint: 'fast'
watchdog:
mode: automatic
device: /dev/watchdog
tags:
clonefrom: false
failover_priority: 1
noloadbalance: false
nostream: false
nosync: false
" | sudo tee /data/pgsql/patroni.yml
启动集群
手动启动
# 切换 postgres
su postgres
cd /data/pgsql/
nohup patroni patroni.yml > patroni.log 2>&1 &
systemd
创建启动文件
注意修改 patroni 可执行文件路径
/etc/systemd/system/percona-patroni.service
echo "
[Unit]
Description=Patroni PostgreSQL HA
After=network.target
[Service]
Type=simple
User=postgres
Group=postgres
ExecStart=/opt/python3.8/lib/python3.8/site-packages/bin/patroni /data/pgsql/patroni.yml
ExecReload=/bin/kill -HUP $MAINPID
Restart=always
TimeoutSec=300
LimitNOFILE=4096
[Install]
WantedBy=multi-user.target
" | sudo tee /etc/systemd/system/patroni.service
启动服务
先启动一个 node1 节点,服务启动后,再依次启动其他 node节点
systemctl daemon-reload
systemctl start patroni
检查状态
[root@node1 pgsql]# patronictl -c /data/pgsql/patroni.yml list
+ Cluster: pg_patroni (7470725539388708392) --+----+-----------+----------------------+
| Member | Host | Role | State | TL | Lag in MB | Tags |
+--------+--------------+---------+-----------+----+-----------+----------------------+
| node1 | 172.17.83.2 | Replica | streaming | 1 | 0 | failover_priority: 1 |
| node2 | 172.17.83.8 | Leader | running | 1 | | failover_priority: 1 |
| node3 | 172.17.83.12 | Replica | streaming | 1 | 0 | failover_priority: 1 |
+--------+--------------+---------+-----------+----+-----------+----------------------+
HAProxy
YUM 安装
yum install haproxy -y
# 制作离线包
yumdownloader --destdir=/tmp/haproxy --resolve haproxy
配置文件
/etc/haproxy/haproxy.cfg
global
maxconn 2000
defaults
log global
mode tcp
retries 2
timeout client 40m
timeout connect 4s
timeout server 40m
timeout check 5s
listen stats
bind *:8089
mode http
stats enable
stats uri /stats
stats refresh 10s
listen primary
bind *:5000
option httpchk /primary
http-check expect status 200
default-server inter 3s fall 3 rise 2 on-marked-down shutdown-sessions
server node1 node1:5432 maxconn 2000 check port 8008
server node2 node2:5432 maxconn 2000 check port 8008
server node3 node3:5432 maxconn 2000 check port 8008
启动验证
# 启动服务
systemctl enable --now haproxy
监控管理端:http://172.17.83.8:8089/stats
负载均衡
haproxy 的 5000 端口 会转发到 PG 集群的 Leader 节点的 5432
再使用负载均衡服务,转发 node1,node2,node3 的 5000 端口
应用服务配置负载均衡的地址,即可实现PG高可用。
高可用验证
主动切换
检查集群状态,node3 为 Leader 节点
切换节点,需指定集群名称:pg_patroni
patronictl -c /data/pgsql/patroni.yml switchover pg_patroni
检查节点状态,成功切换为 node1 为 Leader 节点
关停服务
关停 PG
使用 kill 命令,停止 postgresql 进程,patroni 会主动拉起 postgresql 服务。
关停 patroni
停止 patroni 服务,会同时停止 postgresql 进程,PG 集群会重新选主,并剔除挂掉的节点。 重新启动 patroni 服务,PG 集群恢复1主2从,自动同步数据。
systemctl stop patroni
systemctl start patroni