Merge "Prometheus kubelet.rules change"

This commit is contained in:
Zuul 2018-10-23 17:57:26 +00:00 committed by Gerrit Code Review
commit 11ec46bdce
1 changed files with 18 additions and 10 deletions

View File

@ -1178,15 +1178,23 @@ conf:
- name: kubelet.rules
rules:
- alert: K8SNodeNotReady
expr: kube_node_status_ready{condition="true"} == 0
for: 1h
expr: kube_node_status_condition{condition="Ready", status="unknown"} == 1 or kube_node_status_condition{condition="Ready", status="false"} == 1
for: 1m
labels:
severity: warning
severity: critical
annotations:
description: The Kubelet on {{ $labels.node }} has not checked in with the API, or has set itself to NotReady, for more than an hour
summary: Node status is NotReady
description: The Kubelet on {{ $labels.node }} has not checked in with the API, or has set itself to NotReady, for more than a minute
summary: '{{ $labels.node }} Node status is NotReady and {{ $labels.status }}'
- alert: K8SManyNodesNotReady
expr: count(kube_node_status_ready{condition="true"} == 0) > 1 and (count(kube_node_status_ready{condition="true"} == 0) / count(kube_node_status_ready{condition="true"})) > 0.2
expr: count(kube_node_status_condition{condition="Ready", status="unknown"} == 1) > 1 and (count(kube_node_status_condition{condition="Ready", status="unknown"} == 1) / count(kube_node_status_condition{condition="Ready", status="unknown"})) > 0.2
for: 1m
labels:
severity: critical
annotations:
description: '{{ $value }} Kubernetes nodes (more than 10% are in the NotReady state).'
summary: Many Kubernetes nodes are Not Ready
- alert: K8SManyNodesNotReady
expr: count(kube_node_status_condition{condition="Ready", status="false"} == 1) > 1 and (count(kube_node_status_condition{condition="Ready", status="false"} == 1) / count(kube_node_status_condition{condition="Ready", status="false"})) > 0.2
for: 1m
labels:
severity: critical
@ -1194,7 +1202,7 @@ conf:
description: '{{ $value }} Kubernetes nodes (more than 10% are in the NotReady state).'
summary: Many Kubernetes nodes are Not Ready
- alert: K8SNodesNotReady
expr: count(kube_node_status_ready{condition="true"} == 0) > 0
expr: count(kube_node_status_condition{condition="Ready", status="false"} == 1) > 0 or count(kube_node_status_condition{condition="Ready", status="unknown"} == 1) > 0
for: 1m
labels:
severity: critical
@ -1203,15 +1211,15 @@ conf:
summary: One or more Kubernetes nodes are Not Ready
- alert: K8SKubeletDown
expr: count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.03
for: 1h
for: 1m
labels:
severity: warning
severity: critical
annotations:
description: Prometheus failed to scrape {{ $value }}% of kubelets.
summary: Many Kubelets cannot be scraped
- alert: K8SKubeletDown
expr: absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.1
for: 1h
for: 1m
labels:
severity: critical
annotations: