global:
smtp_smarthost: 'smtp.163.com:25'
smtp_from: 's@163.com'
smtp_auth_username: 's@163.com'
smtp_auth_password: '密码'
receivers:
- name: default-receiver
email_configs:
- to: 'songjxin@*.com,b@c.a'
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 5m
receiver: default-receiver
repeat_interval: 1h
alter.rulers(configMap-monitoring-prometheus-alertrules)配置如下(用于配置报警规则)
groups:
- name: test-rule
rules:
- alert: clients
expr: sum(kube_node_info) < 9
for: 1m
labels:
severity: warning
annotations:
summary: \"{{$labels.instance}}: node is not ready\"
description: \"{{$labels.instance}}: node number is less than 10 (current value is: {{ $value }}\"
- alert: NodeMemoryUsage
expr: (((node_memory_MemTotal - node_memory_MemFree - node_memory_Cached) / (node_memory_MemTotal)
* 100)) > 25
for: 1m
labels:
severity: page
annotations:
DESCRIPTION: '{{$labels.instance}}: Memory usage is above 75% (current value
is: {{ $value }})'
SUMMARY: '{{$labels.instance}}: High memory usage detected'
- alert: HighCPUUsage
expr: ((sum(node_cpu{mode=~\"user|nice|system|irq|softirq|steal|idle|iowait\"})
BY (instance, job)) - (sum(node_cpu{mode=~\"idle|iowait\"}) BY (instance, job)))
/ (sum(node_cpu{mode=~\"user|nice|system|irq|softirq|steal|idle|iowait\"}) BY
(instance, job)) * 100 > 30
for: 1m
labels:
service: backend
annotations:
description: This machine has really high CPU usage for over 10m
summary: High CPU Usage