AI 文章摘要
一、查询ETCD集群
1、定义文章来源(Source):https://dqzboy.com ETCD集群变量
#ETCD集群IP数组
export ETCD_IPS=(192.168.66.62 192.168.66.63 192.168.66.64)
export ETCD_NODE1=192.168.66.62
export ETCD_NODE2=192.168.66.63
export ETCD_NODE3=192.168.66.64
# etcd 集群服务地址列表;注意IP地址根据自己的ETCD集群服务器地址填写
export ETCD_ENDPOINTS="https://192.168.66.62:2379,https://192.168.66.63:2379,https://192.168.66.64:2379"
# etcd 集群间通信的 IP 和端口;注意此处改为自己的实际ETCD所在服务器主机名
export ETCD_NODES="k8s-master1=https://192.168.66.62:2380,k8s-master2=https://192.168.66.63:2380,k8s-master3=https://192.168.66.64:2380
2、查看ETCD集群状态
for node_ip in ${ETCD_IPS[@]}
do
echo ">>> ${node_ip}"
/opt/k8s/bin/etcdctl \
--endpoints=https://${node_ip}:2379 \
--cacert=/etc/kubernetes/cert/ca.pem \
--cert=/etc/etcd/cert/etcd.pem \
--key=/etc/etcd/cert/etcd-key.pem endpoint health
done

3、获取ETCD版本信息
for node_ip in ${ETCD_IPS[@]}
do
echo ">>> ${node_ip}"
/opt/k8s/bin/etcdctl \
--endpoints=https://${node_ip}:2379 \
--cacert=/etc/kubernetes/cert/ca.pem \
--cert=/etc/etcd/cert/etcd.pem \
--key=/etc/etcd/cert/etcd-key.pem version
done

二、ETCD数据备份
- 注意:ETCD 不同的版本的
etcdctl命令不一样,但大致差不多,本文备份使用napshot save, 每次备份一个节点就行。 - 命令备份(ETCD集群
leader节点机器上备份):
1、查看ETCD集群的leader
#注意更改为自己集群中证书的存储路径
/opt/k8s/bin/etcdctl \
-w table --cacert=/opt/k8s/work/ca.pem \
--cert=/etc/etcd/cert/etcd.pem \
--key=/etc/etcd/cert/etcd-key.pem \
--endpoints=${ETCD_ENDPOINTS} endpoint status

2、leader节点执行下面操作
mkdir -p /data/etcd_backup_dir
/opt/k8s/bin/etcdctl \
--cacert=/etc/kubernetes/cert/ca.pem \
--cert=/etc/etcd/cert/etcd.pem \
--key=/etc/etcd/cert/etcd-key.pem \
--endpoints=https://192.168.66.62:2379 \
snapshot save /data/etcd_backup_dir/etcd-snapshot-`date +%Y%m%d`.db

三、ETCD数据恢复
1、准备工作
- 停止所有 Master 上
kube-apiserver服务
[root@k8s-master1 ~]# systemctl stop kube-apiserver
# 确认 kube-apiserver 服务是否停止
[root@k8s-master1 ~]# ps -ef | grep kube-apiserver
- 停止集群中所有 ETCD 服务
for node_ip in ${ETCD_IPS[@]}
do
echo ">>> ${node_ip}"
ssh root@${node_ip} "systemctl stop etcd"
done
- 移除所有 ETCD 存储目录下数据以及wal日志目录
- 根据自己的集群存储路径进行修改
mkdir -p /data/etcd_backup_dir
mv /data/k8s/etcd/data/ /data/etcd_backup_dir/data_`date +%Y%m%d`
mv /data/k8s/etcd/wal /data/etcd_backup_dir/wal_`date +%Y%m%d`
- 拷贝 ETCD 备份快照
# 从 ETCD集群的leader节点机器上拷贝备份到另外2台ETCD节点上
scp /data/etcd_backup_dir/etcd-snapshot-20210818.db root@k8s-master2:/data/etcd_backup_dir/
scp /data/etcd_backup_dir/etcd-snapshot-20210818.db root@k8s-master3:/data/etcd_backup_dir/
2、恢复备份
# k8s-master1 机器上操作
[root@k8s-master1 ~]# ETCDCTL_API=3 /opt/k8s/bin/etcdctl snapshot restore /data/etcd_backup_dir/etcd-snapshot-20211014.db \
--name k8s-master1 \
--initial-cluster "${ETCD_NODES}" \
--initial-cluster-token etcd-cluster-0 \
--initial-advertise-peer-urls https://${ETCD_NODE1}:2380 \
--data-dir=/data/k8s/etcd/data
# k8s-master2 机器上操作
[root@k8s-master2 ~]# ETCDCTL_API=3 /opt/k8s/bin/etcdctl snapshot restore /data/etcd_backup_dir/etcd-snapshot-20211014.db \
--name k8s-master2 \
--initial-cluster "${ETCD_NODES}" \
--initial-cluster-token etcd-cluster-0 \
--initial-advertise-peer-urls https://${ETCD_NODE2}:2380 \
--data-dir=/data/k8s/etcd/data
# k8s-master3 机器上操作
[root@k8s-master2 ~]# ETCDCTL_API=3 /opt/k8s/bin/etcdctl snapshot restore /data/etcd_backup_dir/etcd-snapshot-20211014.db \
--name k8s-master3 \
--initial-cluster "${ETCD_NODES}" \
--initial-cluster-token etcd-cluster-0 \
--initial-advertise-peer-urls https://${ETCD_NODE3}:2380 \
--data-dir=/data/k8s/etcd/data
3、启动ETCD
- 上面三台 ETCD 都恢复完成后,启动 ETCD
[root@k8s-master1 ~]# for node_ip in ${ETCD_IPS[@]}
do
echo ">>> ${node_ip}"
ssh root@${node_ip} "chmod 0700 /data/k8s/etcd/data"
ssh root@${node_ip} "systemctl daemon-reload && systemctl enable etcd && systemctl restart etcd"
done
4、检查ETCD
- 三台 ETCD 启动完成,检查 ETCD 集群状态
[root@k8s-master1 ~]# for node_ip in ${ETCD_IPS[@]}
do
echo ">>> ${node_ip}"
/opt/k8s/bin/etcdctl \
--endpoints=https://${node_ip}:2379 \
--cacert=/etc/kubernetes/cert/ca.pem \
--cert=/etc/etcd/cert/etcd.pem \
--key=/etc/etcd/cert/etcd-key.pem endpoint health
done
5、启动Apiserver
- 三台 ETCD 全部健康, 启动
kube-apiserver - 注意:我这块的
MASTER_IPS是定义的一个Master节点的IP数组,这个大家根据自己的集群进行定义即可
for node_ip in ${MASTER_IPS[@]}
do
echo ">>> ${node_ip}"
ssh root@${node_ip} "systemctl daemon-reload && systemctl enable kube-apiserver && systemctl restart kube-apiserver"
done
6、检查集群
- 检查Kuber
文章来源(Source):https://dqzboy.com netes集群是否恢复正常
[root@k8s-master1 ~]# kubectl get cs


感谢博主分享