Grafana: Support multiple Ceph clusters with dashboards

This updates the Grafana Ceph dashboards to use templating to
determine which ceph-mgr to use for displaying ceph related
metrics.  This required setting the appropriate labels on the
ceph-mgr service to be able to distinguish between releases

Change-Id: Id2eceacadc5b6366d7bc6668bc16ccf5ba878e4a
This commit is contained in:
Steve Wilkerson 2018-10-16 09:50:54 -05:00
parent a4111037b0
commit f3d8bda9d6
2 changed files with 81 additions and 49 deletions

View File

@ -22,6 +22,8 @@ apiVersion: v1
kind: Service
metadata:
name: ceph-mgr
labels:
{{ tuple $envAll "ceph" "manager" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }}
annotations:
{{- if .Values.monitoring.prometheus.enabled }}
{{ tuple $prometheus_annotations | include "helm-toolkit.snippets.prometheus_service_annotations" | indent 4 }}

View File

@ -3289,7 +3289,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_health_status)
- expr: count(ceph_health_status{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
refId: A
@ -3355,7 +3355,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_mon_quorum_count
- expr: ceph_mon_quorum_count{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: ''
@ -3416,7 +3416,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: count(ceph_pool_max_avail)
- expr: count(ceph_pool_max_avail{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
@ -3477,7 +3477,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: ceph_cluster_total_bytes
- expr: ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: ''
@ -3538,7 +3538,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: ceph_cluster_total_used_bytes
- expr: ceph_cluster_total_used_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: ''
@ -3599,7 +3599,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_cluster_total_used_bytes/ceph_cluster_total_bytes
- expr: ceph_cluster_total_used_bytes/ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: ''
@ -3665,7 +3665,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_osd_in)
- expr: count(ceph_osd_in{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
@ -3725,7 +3725,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_osd_metadata) - count(ceph_osd_in)
- expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) - count(ceph_osd_in{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
@ -3785,7 +3785,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: sum(ceph_osd_up)
- expr: sum(ceph_osd_up{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
@ -3845,7 +3845,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_osd_metadata) - count(ceph_osd_up)
- expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"}) - count(ceph_osd_up{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
@ -3905,7 +3905,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: avg(ceph_osd_numpg)
- expr: avg(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: ''
@ -3973,7 +3973,7 @@ conf:
stack: true
steppedLine: false
targets:
- expr: ceph_cluster_total_bytes - ceph_cluster_total_used_bytes
- expr: ceph_cluster_total_bytes{application="ceph",release_group="$ceph_cluster"} - ceph_cluster_total_used_bytes{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Available
@ -4060,13 +4060,13 @@ conf:
stack: true
steppedLine: false
targets:
- expr: sum(ceph_osd_op_w)
- expr: sum(ceph_osd_op_w{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Write
refId: A
step: 60
- expr: sum(ceph_osd_op_r)
- expr: sum(ceph_osd_op_r{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Read
@ -4133,13 +4133,13 @@ conf:
stack: true
steppedLine: false
targets:
- expr: sum(ceph_osd_op_in_bytes)
- expr: sum(ceph_osd_op_in_bytes{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Write
refId: A
step: 60
- expr: sum(ceph_osd_op_out_bytes)
- expr: sum(ceph_osd_op_out_bytes{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Read
@ -4214,7 +4214,7 @@ conf:
stack: true
steppedLine: false
targets:
- expr: ceph_cluster_total_objects
- expr: ceph_cluster_total_objects{application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Total
@ -4282,37 +4282,37 @@ conf:
stack: true
steppedLine: false
targets:
- expr: sum(ceph_osd_numpg)
- expr: sum(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Total
refId: A
step: 60
- expr: sum(ceph_pg_active)
- expr: sum(ceph_pg_active{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Active
refId: B
step: 60
- expr: sum(ceph_pg_inconsistent)
- expr: sum(ceph_pg_inconsistent{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Inconsistent
refId: C
step: 60
- expr: sum(ceph_pg_creating)
- expr: sum(ceph_pg_creating{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Creating
refId: D
step: 60
- expr: sum(ceph_pg_recovering)
- expr: sum(ceph_pg_recovering{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Recovering
refId: E
step: 60
- expr: sum(ceph_pg_down)
- expr: sum(ceph_pg_down{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Down
@ -4380,19 +4380,19 @@ conf:
stack: true
steppedLine: false
targets:
- expr: sum(ceph_pg_degraded)
- expr: sum(ceph_pg_degraded{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Degraded
refId: A
step: 60
- expr: sum(ceph_pg_stale)
- expr: sum(ceph_pg_stale{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Stale
refId: B
step: 60
- expr: sum(ceph_pg_undersized)
- expr: sum(ceph_pg_undersized{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Undersized
@ -4450,6 +4450,16 @@ conf:
- 30d
templating:
list:
- current: {}
hide: 0
label: Cluster
name: ceph_cluster
options: []
type: query
query: label_values(ceph_health_status, release_group)
refresh: 1
sort: 1
datasource: prometheus
- auto: true
auto_count: 10
auto_min: 1m
@ -4599,7 +4609,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_osd_up{ceph_daemon="osd.$osd"}
- expr: ceph_osd_up{ceph_daemon="osd.$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
refId: A
@ -4672,7 +4682,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: ceph_osd_in{ceph_daemon="osd.$osd"}
- expr: ceph_osd_in{ceph_daemon="osd.$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
refId: A
@ -4739,7 +4749,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: count(ceph_osd_metadata)
- expr: count(ceph_osd_metadata{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
refId: A
@ -4807,13 +4817,13 @@ conf:
stack: true
steppedLine: false
targets:
- expr: ceph_osd_numpg{ceph_daemon=~"osd.$osd"}
- expr: ceph_osd_numpg{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Number of PGs - {{ osd.$osd }}
refId: A
step: 60
- expr: avg(ceph_osd_numpg)
- expr: avg(ceph_osd_numpg{application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Average Number of PGs in the Cluster
@ -4888,7 +4898,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: true
targets:
- expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd"}/ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd"})*100
- expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"}/ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"})*100
interval: "$interval"
intervalFactor: 1
legendFormat: ''
@ -4948,14 +4958,14 @@ conf:
stack: true
steppedLine: false
targets:
- expr: ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd"}
- expr: ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Used - {{ osd.$osd }}
metric: ceph_osd_used_bytes
refId: A
step: 60
- expr: ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd"} - ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd"}
- expr: ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"} - ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"}
hide: false
interval: "$interval"
intervalFactor: 1
@ -5024,7 +5034,7 @@ conf:
stack: false
steppedLine: false
targets:
- expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd"}/ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd"})
- expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"}/ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd",application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
legendFormat: Available - {{ osd.$osd }}
@ -5082,6 +5092,16 @@ conf:
- 30d
templating:
list:
- current: {}
hide: 0
label: Cluster
name: ceph_cluster
options: []
type: query
query: label_values(ceph_health_status, release_group)
refresh: 1
sort: 1
datasource: prometheus
- auto: true
auto_count: 10
auto_min: 1m
@ -5140,7 +5160,7 @@ conf:
multi: false
name: osd
options: []
query: label_values(ceph_osd_metadata, id)
query: label_values(ceph_osd_metadata{release_group="$ceph_cluster"}, id)
refresh: 1
regex: ''
type: query
@ -5239,25 +5259,25 @@ conf:
stack: true
steppedLine: false
targets:
- expr: ceph_pool_max_avail{pool_id=~"$pool"}
- expr: ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Total - {{ $pool }}
refId: A
step: 60
- expr: ceph_pool_bytes_used{pool_id=~"$pool"}
- expr: ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Used - {{ $pool }}
refId: B
step: 60
- expr: ceph_pool_max_avail{pool_id=~"$pool"} - ceph_pool_bytes_used{pool_id=~"$pool"}
- expr: ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} - ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Available - {{ $pool }}
refId: C
step: 60
- expr: ceph_pool_raw_bytes_used{pool_id=~"$pool"}
- expr: ceph_pool_raw_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Raw - {{ $pool }}
@ -5333,7 +5353,7 @@ conf:
lineColor: rgb(31, 120, 193)
show: false
targets:
- expr: (ceph_pool_bytes_used{pool_id=~"$pool"} / ceph_pool_max_avail{pool_id=~"$pool"})
- expr: (ceph_pool_bytes_used{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"} / ceph_pool_max_avail{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"})
interval: "$interval"
intervalFactor: 1
refId: A
@ -5388,13 +5408,13 @@ conf:
stack: false
steppedLine: false
targets:
- expr: ceph_pool_objects{pool_id=~"$pool"}
- expr: ceph_pool_objects{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Objects - {{ $pool_name }}
refId: A
step: 60
- expr: ceph_pool_dirty{pool_id=~"$pool"}
- expr: ceph_pool_dirty{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}
interval: "$interval"
intervalFactor: 1
legendFormat: Dirty Objects - {{ $pool_name }}
@ -5462,13 +5482,13 @@ conf:
stack: true
steppedLine: false
targets:
- expr: irate(ceph_pool_rd{pool_id=~"$pool"}[3m])
- expr: irate(ceph_pool_rd{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Read - {{ $pool_name}}
refId: B
step: 60
- expr: irate(ceph_pool_wr{pool_id=~"$pool"}[3m])
- expr: irate(ceph_pool_wr{pool_id=~"$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Write - {{ $pool_name }}
@ -5535,13 +5555,13 @@ conf:
stack: true
steppedLine: false
targets:
- expr: irate(ceph_pool_rd_bytes{pool_id="$pool"}[3m])
- expr: irate(ceph_pool_rd_bytes{pool_id="$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Read Bytes - {{ $pool_name }}
refId: A
step: 60
- expr: irate(ceph_pool_wr_bytes{pool_id="$pool"}[3m])
- expr: irate(ceph_pool_wr_bytes{pool_id="$pool",application="ceph",release_group="$ceph_cluster"}[3m])
interval: "$interval"
intervalFactor: 1
legendFormat: Written Bytes - {{ $pool_name }}
@ -5599,6 +5619,16 @@ conf:
- 30d
templating:
list:
- current: {}
hide: 0
label: Cluster
name: ceph_cluster
options: []
type: query
query: label_values(ceph_health_status, release_group)
refresh: 1
sort: 1
datasource: prometheus
- auto: true
auto_count: 10
auto_min: 1m
@ -5657,7 +5687,7 @@ conf:
multi: false
name: pool
options: []
query: label_values(ceph_pool_objects, pool_id)
query: label_values(ceph_pool_objects{release_group="$ceph_cluster"}, pool_id)
refresh: 1
regex: ''
type: query
@ -5669,7 +5699,7 @@ conf:
multi: false
name: pool_name
options: []
query: label_values(ceph_pool_metadata{pool_id="[[pool]]" }, name)
query: label_values(ceph_pool_metadata{release_group="$ceph_cluster",pool_id="[[pool]]" }, name)
refresh: 1
regex: ''
type: query