VMagent
前面用VictoriaMetrics作为监控数据存储,官方 (opens new window)建议用VMagent,相比Prometheus抓取指标来说具有更多的灵活性,比如除了拉取(pull)指标还可以推送(push)指标,此外还有很多其他特性,且完全兼容Prometheus.
# 一 vmagent部署
# 1.1 vmagent 自动发现监控的资源对象,需要访问 APIServer 获取资源对象,所以首先需要配置 rbac 权限,创建如下所示的资源清单
[root@k8s-uat-manager vm]# cat vmagent-rbac.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: vmagent
namespace: kube-mon
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: vmagent
rules:
- apiGroups: ["", "networking.k8s.io", "extensions"]
resources:
- nodes
- nodes/metrics
- services
- endpoints
- endpointslices
- pods
- app
- ingresses
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources:
- namespaces
- configmaps
verbs: ["get"]
- nonResourceURLs: ["/metrics", "/metrics/resources"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: vmagent
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: vmagent
subjects:
- kind: ServiceAccount
name: vmagent
namespace: kube-mon
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# 1.2 添加 vmagent 配置
[root@k8s-uat-manager vm]# cat vmagent-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: vmagent-config
namespace: kube-mon
data:
scrape.yml: |
global:
scrape_interval: 15s
scrape_timeout: 15s
external_labels:
cluster: 'uat'
scrape_configs:
- job_name: 'kubernetes-nodes'
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
action: replace
target_label: __address__
regex: '(.*):10250'
replacement: '${1}:9100'
- source_labels: [instance]
regex: '(.*)'
replacement: '${1}:9100'
target_label: instance
action: replace
- source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname]
regex: (.+)
target_label: 'node_name'
replacement: '$1'
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# 1.3 vmagent以集群分片 (opens new window)状态下运行
[root@k8s-uat-manager vm]# cat vmagent-sts.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: vmagent
namespace: kube-mon
labels:
app: vmagent
spec:
replicas: 3
serviceName: vmagent
selector:
matchLabels:
app: vmagent
template:
metadata:
labels:
app: vmagent
spec:
serviceAccountName: vmagent
containers:
- name: agent
image: registry.us-east-1.aliyuncs.com/y110/vmagent:latest
imagePullPolicy: IfNotPresent
args:
- -promscrape.config=/config/scrape.yml
- -remoteWrite.tmpDataPath=/tmpData # 程存储不可用的时候用来缓存收集的指标,当远程存储修复后,缓存的指标就会被正常发送到远程写入
- -promscrape.cluster.membersCount=3 # vmagent分片数量需要跟 replicas: 一致
- -promscrape.cluster.replicationFactor=2 # 指标副本数
- -promscrape.cluster.memberNum=$(POD_NAME)
- -remoteWrite.url=http://victoria-metrics:8428/api/v1/write # vm-single节点
- -envflag.enable=true
- -envflag.prefix=VM_
- -loggerFormat=json
ports:
- name: http
containerPort: 8429
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
volumeMounts:
- name: config
mountPath: /config
volumes:
- name: config
configMap:
name: vmagent-config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# 1.4 部署了三个vmagent 实例会分别采集部分指标 (opens new window)
[root@manager vm]# kubectl get pod -n kube-mon -owide |grep vmagent
vmagent-0 1/1 Running 0 7m12s 172.19.100.20 cn-shanghai.172.19.100.20 <none> <none>
vmagent-1 1/1 Running 0 7m14s 172.19.100.1 cn-shanghai.172.19.100.1 <none> <none>
vmagent-2 1/1 Running 0 7m17s 172.19.100.19 cn-shanghai.172.19.100.19 <none> <none>
[root@manager vm]# kubectl logs -f -n kube-mon vmagent-0
···
{"ts":"2024-08-08T05:40:45.559Z","level":"info","caller":"VictoriaMetrics/lib/promscrape/scraper.go:432","msg":"kubernetes_sd_configs: added targets: 6, removed targets: 0; total targets: 6"}
[root@k8s-uat-manager vm]# kubectl logs -f -n kube-mon vmagent-1
···
{"ts":"2024-08-08T05:42:56.463Z","level":"info","caller":"VictoriaMetrics/lib/promscrape/scraper.go:432","msg":"kubernetes_sd_configs: added targets: 9, removed targets: 0; total targets: 9"}
[root@k8s-uat-manager vm]# kubectl logs -f -n kube-mon vmagent-2
···
{"ts":"2024-08-08T05:46:19.165Z","level":"info","caller":"VictoriaMetrics/lib/promscrape/scraper.go:432","msg":"kubernetes_sd_configs: added targets: 9, removed targets: 0; total targets: 9"}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# 1.4.1 vmagent-0采集的指标
# 1.4.2 vmagent-1采集的指标
# 1.4.3 vmagent-2采集的指标
如上图示,12个节点会进行分片交叉采集, 每个指标采集2次,说明我们-promscrape.cluster.replicationFactor=2
参数是生效的,但是同时问题也来了,根据官网的介绍,我们需要对存入VictoriaMetrics指标去重 (opens new window)
# 1.5 vmagen单副本部署
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vmagent
namespace: kube-mon
labels:
app: vmagent
spec:
selector:
matchLabels:
app: vmagent
template:
metadata:
labels:
app: vmagent
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8429"
spec:
serviceAccountName: vmagent
containers:
- name: vmagent
image: hundun-registry-registry.cn-shanghai.cr.aliyuncs.com/devops/vmagent:latest
imagePullPolicy: IfNotPresent
args:
- -promscrape.config=/config/scrape.yml
- -remoteWrite.url=http://victoria-metrics:8428/api/v1/write
- -promscrape.maxScrapeSize=500MB
- -remoteWrite.tmpDataPath=/tmpData
- -loggerTimezone=Asia/Shanghai
- -loggerFormat=json
env:
- name: TZ
value: Asia/Shanghai
ports:
- name: http
containerPort: 8429
volumeMounts:
- name: config
mountPath: /config
resources:
limits:
cpu: '2'
memory: 2Gi
requests:
cpu: 100m
memory: 256Mi
volumes:
- name: config
configMap:
name: vmagent-config
---
apiVersion: v1
kind: Service
metadata:
name: vmagent
namespace: kube-mon
spec:
selector:
app: vmagent
ports:
- name: http
port: 8429
targetPort: http
type: NodePort
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
上次更新: 2025/04/25, 03:40:17