Kubernetes v1.20.13 百年证书高可用!

1. 基础环境配置
1.1 配置信息
系统版本    centOS7.9
docker版本  20.10.11
kubernetes  v1.20.13
Pod网段     10.244.0.0/16
service网段 10.96.0.0/16

1.2 所有节点修改/etc/hosts
cat <<EOF | sudo tee /etc/hosts
10.10.10.2 app1 #master
10.10.10.3 app2 #master
10.10.10.4 app3 #master
10.10.10.5 app4
10.10.10.6 db1
10.10.10.7 db2
EOF

1.3 配置yum源
curl -o /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-7.repo
wget -O /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo

yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
sed -i 's+download.docker.com+mirrors.aliyun.com/docker-ce+' /etc/yum.repos.d/docker-ce.repo

cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF

1.4 必备工具安装
yum install wget jq psmisc vim net-tools telnet yum-utils device-mapper-persistent-data lvm2 git -y

1.5 所有节点关闭firewalld、selinux、swap

systemctl disable --now firewalld 

swapoff -a && sysctl -w vm.swappiness=0
sed -ri '/^[^#]*swap/s@^@#@' /etc/fstab

setenforce 0
sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/sysconfig/selinux
sed -i 's#SELINUX=enforcing#SELINUX=disabled#g' /etc/selinux/config

1.6 所有节点设置时间同步
yum install ntpdate -y
ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
echo 'Asia/Shanghai' >/etc/timezone
ntpdate ntp.aliyun.com
*/5 * * * * /usr/sbin/ntpdate ntp.aliyun.com    # 加入到crontab

1.7 所有节点配置limit
ulimit -SHn 65535

cat <<EOF | sudo tee /etc/security/limits.conf   # 末尾添加如下内容
root        soft        nofile        1048576
root        hard        nofile        1048576
root        soft        stack         10240
* soft nofile 655360
* hard nofile 131072
* soft nproc 655350
* hard nproc 655350
* soft memlock unlimited
* hard memlock unlimited
EOF

1.8 Master01配置ssh免密登录其他节点
ssh-keygen -t rsa

for i in app1 app2 app3 app4 db1 db2;do ssh-copy-id -i .ssh/id_rsa.pub $i;done

1.9 所有节点升级
yum update -y --exclude=kernel* && reboot

1.10 所有节点安装ipvsadm
yum install ipvsadm ipset sysstat conntrack libseccomp -y

modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack

cat <<EOF | sudo tee /etc/modules-load.d/ipvs.conf 
ip_vs
ip_vs_lc
ip_vs_wlc
ip_vs_rr
ip_vs_wrr
ip_vs_lblc
ip_vs_lblcr
ip_vs_dh
ip_vs_sh
ip_vs_fo
ip_vs_nq
ip_vs_sed
ip_vs_ftp
ip_vs_sh
nf_conntrack
ip_tables
ip_set
xt_set
ipt_set
ipt_rpfilter
ipt_REJECT
ipip
EOF  

systemctl enable --now systemd-modules-load.service

1.11 所有开启k8s必须的内核参数
cat <<EOF > /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-arptables = 1
fs.may_detach_mounts = 1
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches = 1048576
fs.file-max = 5000000
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720
net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl =15
net.ipv4.tcp_max_tw_buckets = 36000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_max_orphans = 327680
net.ipv4.tcp_orphan_retries = 3
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.ip_conntrack_max = 65536
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.tcp_timestamps = 0
net.core.somaxconn = 32768
vm.swappiness = 0
net.ipv4.tcp_syncookies = 0
fs.inotify.max_user_instances = 1024
net.ipv4.conf.all.rp_filter = 1
net.ipv4.neigh.default.gc_thresh1 = 80000
net.ipv4.neigh.default.gc_thresh2 = 90000
net.ipv4.neigh.default.gc_thresh3 = 100000
EOF

sysctl --system

#Irqbalance 服务可以将各个设备对应的中断号分别绑定到不同的 CPU 上,以防止所有中断请求都落在同一个 CPU 上而引发性能瓶颈。
systemctl enable irqbalance
systemctl start irqbalance

reboot
lsmod | grep --color=auto -e ip_vs -e nf_conntrack    # 重启后查看内核参数 

2. 基本组件安装
2.1 安装docker-ce
yum list docker-ce --showduplicates | sort -r
yum install docker-ce docker-ce-cli -y

cat <<EOF > /etc/docker/daemon.json
{
    "registry-mirrors": [
       "https://wli8urvv.mirror.aliyuncs.com",
       "https://docker.mirrors.ustc.edu.cn",
       "https://hub-mirror.c.163.com",
       "https://registry.docker-cn.com"
     ],
    "exec-opts": ["native.cgroupdriver=systemd"],
    "log-driver": "json-file",
    "log-opts": {
        "max-size": "300m",
        "max-file":"5"
     },
    "max-concurrent-downloads": 10,
    "max-concurrent-uploads": 5,
    "live-restore": true
    "storage-driver": "overlay2",
    "storage-opts": [
      "overlay2.override_kernel_check=true"
    ],
    "data-root": "/data1/docker"    
}
EOF

Docker 的数据目录设置为 /data1/docker

mkdir -p /etc/systemd/system/docker.service.d
cat > /etc/systemd/system/docker.service.d/limit-nofile.conf <<EOF
[Service]
LimitNOFILE=1048576
EOF

systemctl daemon-reload && systemctl enable --now docker    # 所有节点设置开机自启动

2.2 安装kubeadm、kubelet、kubectl
yum list kubeadm.x86_64 --showduplicates | sort -r
yum install -y kubelet-1.20.13 kubeadm-1.20.13 kubectl-1.20.13

cat >/etc/sysconfig/kubelet<<EOF    # 修改pause的镜像仓库
KUBELET_EXTRA_ARGS="--cgroup-driver=systemd --pod-infra-container-image=registry.cn-hangzhou.aliyuncs.com/google_containers/pause-amd64:3.2"
EOF

systemctl daemon-reload
systemctl enable --now kubelet

2.3 master节点安装高可用组件
yum install keepalived nginx -y

# 所有master节点都设置,将原内容换成下面的内容
cat > /etc/nginx/nginx.conf << EOF
user root;
worker_processes auto;
error_log /var/log/nginx/error.log;
pid /run/nginx.pid;

# Load dynamic modules. See /usr/share/doc/nginx/README.dynamic.
include /usr/share/nginx/modules/*.conf;

events {
    worker_connections 1024;
}

stream {

    log_format  main  '$remote_addr $upstream_addr - [$time_local] $status $upstream_bytes_sent';

    access_log  /var/log/nginx/k8s-access.log  main;

    upstream k8s-apiserver {
       server 10.10.10.2:6443;
       server 10.10.10.3:6443;
       server 10.10.10.4:6443;
    }

    server {
       listen 16443;
       proxy_pass k8s-apiserver;
    }

    upstream web-443 {
       server 10.10.10.2:9443;
       server 10.10.10.3:9443;
       server 10.10.10.4:9443;
    }

    server {
       listen 443;
       proxy_pass web-443;
    }

    upstream web-80 {
       server 10.10.10.2:8080;
       server 10.10.10.3:8080;
       server 10.10.10.4:8080;
    }

    server {
       listen 80;
       proxy_pass web-80;
    }

}

http {
    log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
                      '$status $body_bytes_sent "$http_referer" '
                      '"$http_user_agent" "$http_x_forwarded_for"';

    access_log  /var/log/nginx/access.log  main;

    sendfile            on;
    tcp_nopush          on;
    tcp_nodelay         on;
    keepalive_timeout   65;
    types_hash_max_size 4096;
    proxy_http_version 1.1;

    include             /etc/nginx/mime.types;
    default_type        application/octet-stream;

    include /etc/nginx/conf.d/*.conf;

}
EOF

# app1节点配置
cat /etc/keepalived/keepalived.conf
global_defs {
        router_id huangshan
        script_user root
        enable_script_security
}
vrrp_script check_app {
        script "/etc/keepalived/check_nginx.sh"
}
vrrp_instance VI_1 {
        state MASTER
        interface ens2f0
        virtual_router_id 50
        priority 100
        advert_int 1
        authentication {
                auth_type PASS
                auth_pass 1111
        }
        track_script {
                check_nginx
        }
        virtual_ipaddress {
                10.10.10.99
        }
}

# app2节点配置
# cat /etc/keepalived/keepalived.conf
global_defs {
        router_id huangshan
        script_user root
        enable_script_security
}
vrrp_script check_nginx {
        script "/etc/keepalived/check_nginx.sh"
}
vrrp_instance VI_1 {
        state BACKUP1
        interface ens1f0
        virtual_router_id 50
        priority 90
        advert_int 1
        authentication {
                auth_type PASS
                auth_pass 1111
        }
        track_script {
                check_nginx
        }
        virtual_ipaddress {
                10.10.10.99
        }
}

# app3节点配置,将原内容换成下面的内容
cat /etc/keepalived/keepalived.conf
global_defs {
        router_id huangshan
        script_user root
        enable_script_security
}
vrrp_script check_nginx {
        script "/etc/keepalived/check_nginx.sh"
}
vrrp_instance VI_1 {
        state BACKUP2
        interface ens2f0
        virtual_router_id 50
        priority 80
        advert_int 1
        authentication {
                auth_type PASS
                auth_pass 1111
        }
        track_script {
                check_nginx
        }
        virtual_ipaddress {
                10.10.10.99
        }
}

# 所有master节点配置KeepAlived健康检查文件
cat /etc/keepalived/check_nginx.sh
#!/bin/bash
count=$(ps -ef |grep nginx | grep sbin | egrep -cv "grep|$$")
if [ "$count" -eq 0 ];then
    systemctl stop keepalived
fi

systemctl daemon-reload
systemctl enable --now nginx
systemctl enable --now keepalived

3. 集群初始化
3.1 初始化
cat kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta2
kind: ClusterConfiguration
kubernetesVersion: v1.20.13
controlPlaneEndpoint: 10.10.10.99:16443
imageRepository: registry.aliyuncs.com/google_containers
apiServer:
 certSANs:
 - 10.10.10.2
 - 10.10.10.3
 - 10.10.10.4
 - 10.10.10.5
 - 10.10.10.6
 - 10.10.10.7
 - 10.10.10.99
networking:
  podSubnet: 10.244.0.0/16
  serviceSubnet: 10.96.0.0/16
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs

拉取kubeadm 99年证书版本
wget https://home.vimll.com:9999/download/kubeadm-v.1.20.13-99years
mv /usr/bin/kubeadm /usr/bin/kubeadm.bak ;
cp kubeadm-v.1.20.13-99years /usr/bin/kubeadm ;chmod +x /usr/bin/kubeadm
kubeadm config migrate --old-config kubeadm-config.yaml --new-config new.yaml   # 更新kubeadm文件
kubeadm config images pull --config /root/new.yaml  # 先下载镜像
systemctl enable --now kubelet
kubeadm init --config /root/new.yaml  --upload-certs    # 初始化

# 初始化成功后执行,master节点才能使用kubectl
mkdir -p $HOME/.kube
cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
chown $(id -u):$(id -g) $HOME/.kube/config

# kubectl命令补全
yum -y install bash-completion
source /usr/share/bash-completion/bash_completion
source <(kubectl completion bash)
echo "source <(kubectl completion bash)" >> ~/.bashrc

kubeadm reset -f ; ipvsadm --clear  ; rm -rf ~/.kube    # 如果初始化失败,重置后再次初始化

3.2 添加新的master节点和node节点
# 添加master节点
kubeadm join 10.10.10.99:16443 --token zbla21.jj0pfuz91vuvx7qc     --discovery-token-ca-cert-hash sha256:fe9058e1a39b0d3ac8212c79f907830b171d695856e67c1892d8e97da0b83f1b \
    --control-plane --certificate-key 501b00ce18f97884656fbc3b16224dcf4ae01520d16d6e6c05732169c47f14ea

# 添加node节点
kubeadm join 10.10.10.99:16443 --token zbla21.jj0pfuz91vuvx7qc     --discovery-token-ca-cert-hash sha256:fe9058e1a39b0d3ac8212c79f907830b171d695856e67c1892d8e97da0b83f1b

kubeadm token create --print-join-command   # 如果token过期,生成新token

kubeadm init phase upload-certs  --upload-certs     # Master需要生成--certificate-key

3.3 app1安装calico组件(这一部分内容需一行一行复制,不然容易出错)
kubectl apply -f https://home.vimll.com:9999/download/hs/calico.yaml

Master节点重置
测试集群:
在需要重置的master节点上操作:
kubectl delete  nodes 172.16.100.30
kubeadm reset
sudo ifconfig cni0 down    
sudo ip link delete cni0

在集群正常的master节点操作:
kubeadm token create --print-join-command
kubeadm init phase upload-certs  --upload-certs

kubeadm join 172.16.100.100:6443 --token 8mcp5p.kfpnqr46av1df0bz     --discovery-token-ca-cert-hash sha256:007f26ca26f70cf2bd33f38b702921e107bbd1656998619e1cba99840d0c9a30 --control-plane --certificate-key 1d9848b3590b6c1616c013ebd10edbf0aa0f2e0998559b7dbf481aaffc68a897

删除master节点后重新加入时etcd错误:
kubectl  exec -it -n kube-system etcd-172.16.100.32 sh
## 配置环境
export ETCDCTL_API=3
alias etcdctl='etcdctl --endpoints=https://127.0.0.1:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key'

## 查看 etcd 集群成员列表
$ etcdctl member list

63bfe05c4646fb08, started, k8s-master-2-11, https://192.168.2.11:2380, https://192.168.2.11:2379, false
8e41efd8164c6e3d, started, k8s-master-2-12, https://192.168.2.12:2380, https://192.168.2.12:2379, false
a61d0bd53c1cbcb6, started, k8s-master-2-13, https://192.168.2.13:2380, https://192.168.2.13:2379, false

## 删除 etcd 集群成员 k8s-master-2-11
$ etcdctl member remove 63bfe05c4646fb08

Member 63bfe05c4646fb08 removed from cluster ed984b9o8w35cap2

## 再次查看 etcd 集群成员列表
$ etcdctl member list

8e41efd8164c6e3d, started, k8s-master-2-12, https://192.168.2.12:2380, https://192.168.2.12:2379, false
a61d0bd53c1cbcb6, started, k8s-master-2-13, https://192.168.2.13:2380, https://192.168.2.13:2379, false

## 退出容器
$ exit

## 重新加入master节点