Kubelet证书到期轮转
# 一.因素
Kubeadm部署的k8s集群,kubelet证书默认是一年有效期.kubelet证书的有效期是由kube-controller-manager的experimental-cluster-signing-duration参数决定的官网文档 (opens new window),github上有issues (opens new window)
# 二.场景
# 1. 查看日志,发现证书过期报错
[root@master01 ~]# kubectl logs -f -n kube-mon prometheus-658f65b6fd-85xdl
Error from server: Get "https://10.0.200.37:10250/containerLogs/kube-mon/cloud-kafka-exporter-86cb49cd85-rpbmb/cloud-kafka-exporter?follow=true": x509: certificate has expired or is not yet valid: current time 2024-06-24T16:36:15+08:00 is after 2024-03-20T07:35:03Z
1
2
2
# 2. 查看证书的有效期
[root@monitor etc]# curl -kvs https://10.0.200.37:10250
* About to connect() to 10.0.200.37 port 10250 (#0)
* Trying 10.0.200.37...
* Connected to 10.0.200.37 (10.0.200.37) port 10250 (#0)
* Initializing NSS with certpath: sql:/etc/pki/nssdb
* skipping SSL peer certificate verification
* NSS: client certificate not found (nickname not specified)
* SSL connection using TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256
* Server certificate:
* subject: CN=monitor@1679387703
* start date: 3月 21 07:35:03 2023 GMT
* expire date: 3月 20 07:35:03 2024 GMT
* common name: monitor@1679387703
* issuer: CN=monitor-ca@1679387703
> GET / HTTP/1.1
> User-Agent: curl/7.29.0
> Host: 10.0.200.37:10250
> Accept: */*
>
< HTTP/1.1 404 Not Found
< Content-Type: text/plain; charset=utf-8
< X-Content-Type-Options: nosniff
< Date: Mon, 24 Jun 2024 08:55:13 GMT
< Content-Length: 19
<
404 page not found
* Connection #0 to host 10.0.200.37 left intact
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# 二.更换证书
# 1. 问题节点加上serverTLSBootstrap: true参数,就能由controller-manager签发kubelet自身的https证书了
[root@monitor kubelet]# vim /var/lib/kubelet/config.yaml
....
staticPodPath: /etc/kubernetes/manifests
streamingConnectionIdleTimeout: 4h0m0s
syncFrequency: 1m0s
#tlsCertFile: /var/lib/kubelet/pki/kubelet.crt #注: 因为通过controller-manager签发证书了,不需要读取本地的证书
#tlsPrivateKeyFile: /var/lib/kubelet/pki/kubelet.key # 注: 因为通过controller-manager签发证书了,不需要读取本地的证书
volumeStatsAggPeriod: 1m0s
serverTLSBootstrap: true # 新增
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 2. 问题节点,重启kubelet.service
[root@monitor kubelet]# systemctl restart kubelet.service
1
# 3. master节点,查看csr
[root@master01 exporter]# kubectl get csr
NAME AGE SIGNERNAME REQUESTOR CONDITION
csr-5xzdt 37m kubernetes.io/kubelet-serving system:node:monitor Pengding
1
2
3
2
3
# 4. master节点,手动收到签发证书
[root@master01 ~]# kubectl certificate approve csr-5xzdt && kubectl get csr
[root@master01 ~]# kubectl get csr
NAME AGE SIGNERNAME REQUESTOR CONDITION
csr-5xzdt 38m kubernetes.io/kubelet-serving system:node:monitor Approved,Issued
1
2
3
4
2
3
4
# 5.问题节点,查看证书有效期
[root@monitor kubelet]# curl -kvs https://10.0.200.37:10250
* About to connect() to 10.0.200.37 port 10250 (#0)
* Trying 10.0.200.37...
* Connected to 10.0.200.37 (10.0.200.37) port 10250 (#0)
* Initializing NSS with certpath: sql:/etc/pki/nssdb
* skipping SSL peer certificate verification
* NSS: client certificate not found (nickname not specified)
* SSL connection using TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256
* Server certificate:
* subject: CN=system:node:monitor,O=system:nodes
* start date: 6月 24 08:30:26 2024 GMT
* expire date: 6月 22 08:30:26 2034 GMT
* common name: system:node:monitor
* issuer: CN=kubernetes
> GET / HTTP/1.1
> User-Agent: curl/7.29.0
> Host: 10.0.200.37:10250
> Accept: */*
>
< HTTP/1.1 404 Not Found
< Content-Type: text/plain; charset=utf-8
< X-Content-Type-Options: nosniff
< Date: Mon, 24 Jun 2024 09:12:18 GMT
< Content-Length: 19
<
404 page not found
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
上次更新: 2025/04/25, 03:40:17