Add support for the cos_agent relation
This supports grafana dashboards and metrics scraping from the ceph
mgr prometheus module.
Have to build with charmcraft 2.6 for dependency handling
Also remove zed tests as it's EOL
Change-Id: I8b2f132a4997d205119f7afe2a1ab6b2ae4c0134
cherry-picked from e35d908
Change-Id: Idd479cef04a24ea64af643bd6e142ac40906e86c
func-test-pr: https://github.com/openstack-charmers/zaza-openstack-tests/pull/1208
This commit is contained in:
parent
967559b4df
commit
6cd7be9036
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,348 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "5.3.2"
|
||||
},
|
||||
{
|
||||
"id": "graph",
|
||||
"name": "Graph",
|
||||
"type": "panel",
|
||||
"version": "5.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"showIn": 0,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [ ],
|
||||
"panels": [
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"panels": [ ],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "MDS Performance",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "/.*Reads/",
|
||||
"transform": "negative-Y"
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(ceph_objecter_op_r{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Read Ops",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(ceph_objecter_op_w{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Write Ops",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "MDS Workload - $mds_servers",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "none",
|
||||
"label": "Reads(-) / Writes (+)",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 1
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ceph_mds_server_handle_client_request{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{ceph_daemon}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Client Request Load - $mds_servers",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "none",
|
||||
"label": "Client Requests",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 16,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"ceph-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "MDS Server",
|
||||
"multi": false,
|
||||
"name": "mds_servers",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "MDS Performance",
|
||||
"uid": "tbO9LAiZz",
|
||||
"version": 0
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,880 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "5.3.2"
|
||||
},
|
||||
{
|
||||
"id": "graph",
|
||||
"name": "Graph",
|
||||
"type": "panel",
|
||||
"version": "5.0.0"
|
||||
},
|
||||
{
|
||||
"id": "singlestat",
|
||||
"name": "Singlestat",
|
||||
"type": "panel",
|
||||
"version": "5.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"showIn": 0,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [ ],
|
||||
"panels": [
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "${prometheusds}",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"interval": null,
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(sum by (instance) (ceph_osd_metadata{job=~\"$job\"}))",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "OSD Hosts",
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "N/A",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster",
|
||||
"format": "percentunit",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 4,
|
||||
"x": 4,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"interval": null,
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg(1 - (\n avg by(instance) (\n rate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval]) or\n rate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval])\n )\n))\n",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "AVG CPU Busy",
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "N/A",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)",
|
||||
"format": "percentunit",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 4,
|
||||
"x": 8,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"interval": null,
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg ((\n (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) - ((\n node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (\n node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n )\n )\n) / (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"}\n))\n",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "AVG RAM Utilization",
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "N/A",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "IOPS Load at the device as reported by the OS on all OSD hosts",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 4,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 5,
|
||||
"interval": null,
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum ((\n rate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[$__rate_interval])\n) + (\n rate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[$__rate_interval])\n))\n",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Physical IOPS",
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "N/A",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)",
|
||||
"format": "percent",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 4,
|
||||
"x": 16,
|
||||
"y": 0
|
||||
},
|
||||
"id": 6,
|
||||
"interval": null,
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg (\n label_replace(\n (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or\n (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", instance=~\"($osd_hosts).*\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "AVG Disk Utilization",
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "N/A",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "Total send/receive network load across all hosts in the ceph cluster",
|
||||
"format": "bytes",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 5,
|
||||
"w": 4,
|
||||
"x": 20,
|
||||
"y": 0
|
||||
},
|
||||
"id": 7,
|
||||
"interval": null,
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum (\n (\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n)\n",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Network Load",
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "N/A",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "Show the top 10 busiest hosts by cpu",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 5
|
||||
},
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10,\n 100 * (\n 1 - (\n avg by(instance) (\n rate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval]) or\n rate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval])\n )\n )\n )\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "CPU Busy - Top 10 Hosts",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "percent",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "Top 10 hosts by network load",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 5
|
||||
},
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10, (sum by(instance) (\n(\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) +\n(\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\"))\n))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Network Load - Top 10 Hosts",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 16,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"ceph-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "osd_hosts",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_disk_occupation{job=~\"$job\"}, exported_instance)",
|
||||
"refresh": 1,
|
||||
"regex": "([^.]*).*",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "mon_hosts",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_mon_metadata{job=~\"$job\"}, ceph_daemon)",
|
||||
"refresh": 1,
|
||||
"regex": "mon.(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "mds_hosts",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)",
|
||||
"refresh": 1,
|
||||
"regex": "mds.(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "rgw_hosts",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
|
||||
"refresh": 1,
|
||||
"regex": "rgw.(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "Ceph OSD Host Overview",
|
||||
"uid": "y0KGL0iZz",
|
||||
"version": 0
|
||||
}
|
|
@ -0,0 +1,857 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "5.3.2"
|
||||
},
|
||||
{
|
||||
"id": "graph",
|
||||
"name": "Graph",
|
||||
"type": "panel",
|
||||
"version": "5.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"showIn": 0,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [ ],
|
||||
"panels": [
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"panels": [ ],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "OSD Performance",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "read",
|
||||
"transform": "negative-Y"
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_osd_op_r_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "read",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_osd_op_w_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "write",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "$osd Latency",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"label": "Read (-) / Write (+)",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 1
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "Reads",
|
||||
"transform": "negative-Y"
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_osd_op_r{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Reads",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_osd_op_w{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Writes",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "$osd R/W IOPS",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": "Read (-) / Write (+)",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 1
|
||||
},
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "Read Bytes",
|
||||
"transform": "negative-Y"
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_osd_op_r_out_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Read Bytes",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_osd_op_w_in_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Write Bytes",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "$osd R/W Bytes",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": "Read (-) / Write (+)",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 10
|
||||
},
|
||||
"id": 6,
|
||||
"panels": [ ],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Physical Device Performance",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 11
|
||||
},
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "/.*Reads/",
|
||||
"transform": "negative-Y"
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(\n label_replace(\n rate(node_disk_read_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}}/{{device}} Reads",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "(\n label_replace(\n rate(node_disk_write_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}}/{{device}} Writes",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Physical Device Latency for $osd",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"label": "Read (-) / Write (+)",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 11
|
||||
},
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "/.*Reads/",
|
||||
"transform": "negative-Y"
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{device}} on {{instance}} Writes",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "label_replace(\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{device}} on {{instance}} Reads",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Physical Device R/W IOPS for $osd",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": "Read (-) / Write (+)",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 11
|
||||
},
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "/.*Reads/",
|
||||
"transform": "negative-Y"
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(\n rate(node_disk_read_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}} {{device}} Reads",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "label_replace(\n rate(node_disk_written_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}} {{device}} Writes",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Physical Device R/W Bytes for $osd",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": "Read (-) / Write (+)",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 11
|
||||
},
|
||||
"id": 10,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(\n rate(node_disk_io_time_seconds_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{device}} on {{instance}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Physical Device Util% for $osd",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 16,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"ceph-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "OSD",
|
||||
"multi": false,
|
||||
"name": "osd",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{job=~\"$job\"}, ceph_daemon)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-3h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "OSD device details",
|
||||
"uid": "CrAHE0iZz",
|
||||
"version": 0
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,694 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "5.3.2"
|
||||
},
|
||||
{
|
||||
"id": "graph",
|
||||
"name": "Graph",
|
||||
"type": "panel",
|
||||
"version": "5.0.0"
|
||||
},
|
||||
{
|
||||
"id": "singlestat",
|
||||
"name": "Singlestat",
|
||||
"type": "panel",
|
||||
"version": "5.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"showIn": 0,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [ ],
|
||||
"panels": [
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": true,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "${prometheusds}",
|
||||
"format": "percentunit",
|
||||
"gauge": {
|
||||
"maxValue": 1,
|
||||
"minValue": 0,
|
||||
"show": true,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 7,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"interval": null,
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": true
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(ceph_pool_stored{job=~\"$job\"} / (ceph_pool_stored{job=~\"$job\"} + ceph_pool_max_avail{job=~\"$job\"})) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": ".7,.8",
|
||||
"title": "Capacity used",
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "N/A",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": 100,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "Time till pool is full assuming the average fill rate of the last 6 hours",
|
||||
"format": "s",
|
||||
"gauge": {
|
||||
"maxValue": false,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 5,
|
||||
"x": 7,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"interval": null,
|
||||
"links": [ ],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": ""
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(ceph_pool_max_avail{job=~\"$job\"} / deriv(ceph_pool_stored{job=~\"$job\"}[6h])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"} > 0\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "current",
|
||||
"title": "Time till full",
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "N/A",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": false
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
"read_op_per_sec": "#3F6833",
|
||||
"write_op_per_sec": "#E5AC0E"
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "deriv(ceph_pool_objects{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Objects per second",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "$pool_name Object Ingress/Egress",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ops",
|
||||
"label": "Objects out(-) / in(+) ",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
"read_op_per_sec": "#3F6833",
|
||||
"write_op_per_sec": "#E5AC0E"
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "reads",
|
||||
"transform": "negative-Y"
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "reads",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "writes",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "$pool_name Client IOPS",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "iops",
|
||||
"label": "Read (-) / Write (+)",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
"read_op_per_sec": "#3F6833",
|
||||
"write_op_per_sec": "#E5AC0E"
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 7
|
||||
},
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "reads",
|
||||
"transform": "negative-Y"
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "reads",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "writes",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "$pool_name Client Throughput",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": "Read (-) / Write (+)",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
"read_op_per_sec": "#3F6833",
|
||||
"write_op_per_sec": "#E5AC0E"
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "ceph_pool_objects{job=~\"$job\"} *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Number of Objects",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "$pool_name Objects",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": "Objects",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 22,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"ceph-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Pool Name",
|
||||
"multi": false,
|
||||
"name": "pool_name",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_pool_metadata{job=~\"$job\"}, name)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "Ceph Pool Details",
|
||||
"uid": "-xyV8KCiz",
|
||||
"version": 0
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,522 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "5.0.0"
|
||||
},
|
||||
{
|
||||
"id": "graph",
|
||||
"name": "Graph",
|
||||
"type": "panel",
|
||||
"version": "5.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"showIn": 0,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [ ],
|
||||
"panels": [
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"panels": [ ],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "RGW Host Detail : $rgw_servers",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (instance_id) (\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "GET {{ceph_daemon}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (instance_id) (\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "PUT {{ceph_daemon}}",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "$rgw_servers GET/PUT Latencies",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 7,
|
||||
"x": 6,
|
||||
"y": 1
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "GETs {{ceph_daemon}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "PUTs {{ceph_daemon}}",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Bandwidth by HTTP Operation",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
"GETs": "#7eb26d",
|
||||
"Other": "#447ebc",
|
||||
"PUTs": "#eab839",
|
||||
"Requests": "#3f2b5b",
|
||||
"Requests Failed": "#bf1b00"
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 7,
|
||||
"x": 13,
|
||||
"y": 1
|
||||
},
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\",ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Requests Failed {{ceph_daemon}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "GETs {{ceph_daemon}}",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "PUTs {{ceph_daemon}}",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Other {{ceph_daemon}}",
|
||||
"refId": "D"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "HTTP Request Breakdown",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
"Failures": "#bf1b00",
|
||||
"GETs": "#7eb26d",
|
||||
"Other (HEAD,POST,DELETE)": "#447ebc",
|
||||
"PUTs": "#eab839",
|
||||
"Requests": "#3f2b5b"
|
||||
},
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 4,
|
||||
"x": 20,
|
||||
"y": 1
|
||||
},
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"percentage": true,
|
||||
"show": true,
|
||||
"values": true
|
||||
},
|
||||
"legendType": "Under graph",
|
||||
"pieType": "pie",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Failures {{ceph_daemon}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "GETs {{ceph_daemon}}",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "PUTs {{ceph_daemon}}",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}",
|
||||
"refId": "D"
|
||||
}
|
||||
],
|
||||
"title": "Workload Breakdown",
|
||||
"type": "piechart",
|
||||
"valueName": "current"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 16,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"ceph-mixin",
|
||||
"overview"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "",
|
||||
"multi": false,
|
||||
"name": "rgw_servers",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "RGW Instance Detail",
|
||||
"uid": "x5ARzZtmk",
|
||||
"version": 0
|
||||
}
|
|
@ -0,0 +1,695 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "5.0.0"
|
||||
},
|
||||
{
|
||||
"id": "graph",
|
||||
"name": "Graph",
|
||||
"type": "panel",
|
||||
"version": "5.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"showIn": 0,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [ ],
|
||||
"panels": [
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"panels": [ ],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "RGW Overview - All Gateways",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "GET {{rgw_host}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "PUT {{rgw_host}}",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Average GET/PUT Latencies by RGW Instance",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 7,
|
||||
"x": 8,
|
||||
"y": 1
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{rgw_host}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Total Requests/sec by RGW Instance",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "none",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 6,
|
||||
"x": 15,
|
||||
"y": 1
|
||||
},
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{rgw_host}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "GET Latencies by RGW Instance",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "Total bytes transferred in/out of all radosgw instances within the cluster",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "GETs",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "PUTs",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Bandwidth Consumed by Type",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "Total bytes transferred in/out through get/put operations, by radosgw instance",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 7,
|
||||
"x": 8,
|
||||
"y": 8
|
||||
},
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{rgw_host}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Bandwidth by RGW Instance",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"w": 6,
|
||||
"x": 15,
|
||||
"y": 8
|
||||
},
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{rgw_host}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "PUT Latencies by RGW Instance",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 16,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"ceph-mixin",
|
||||
"overview"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "",
|
||||
"multi": false,
|
||||
"name": "rgw_servers",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
|
||||
"refresh": 1,
|
||||
"regex": "RGW Server",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "RGW Overview",
|
||||
"uid": "WAkugZpiz",
|
||||
"version": 0
|
||||
}
|
|
@ -0,0 +1,490 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "5.0.0"
|
||||
},
|
||||
{
|
||||
"id": "graph",
|
||||
"name": "Graph",
|
||||
"type": "panel",
|
||||
"version": "5.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"showIn": 0,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [ ],
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum{job=~\"$job\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Replication (throughput) from Source Zone",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count{job=~\"$job\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Replication (objects) from Source Zone",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": "Objects/s",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum{job=~\"$job\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Polling Request Latency from Source Zone",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ms",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors{job=~\"$job\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{source_zone}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Unsuccessful Object Replications from Source Zone",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": "Count/s",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 16,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"ceph-mixin",
|
||||
"overview"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "",
|
||||
"multi": false,
|
||||
"name": "rgw_servers",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
|
||||
"refresh": 1,
|
||||
"regex": "RGW Server",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "RGW Sync Overview",
|
||||
"uid": "rgw-sync-overview",
|
||||
"version": 0
|
||||
}
|
|
@ -0,0 +1,444 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "5.3.3"
|
||||
},
|
||||
{
|
||||
"id": "graph",
|
||||
"name": "Graph",
|
||||
"type": "panel",
|
||||
"version": "5.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"showIn": 0,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Detailed Performance of RBD Images (IOPS/Throughput/Latency)",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [ ],
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_rbd_write_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pool}} Write",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rbd_read_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pool}} Read",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "IOPS",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "iops",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "iops",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_rbd_write_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pool}} Write",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rbd_read_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pool}} Read",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Throughput",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(ceph_rbd_write_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_write_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pool}} Write",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "rate(ceph_rbd_read_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_read_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pool}} Read",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Average Latency",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ns",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "ns",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 16,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"ceph-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "",
|
||||
"multi": false,
|
||||
"name": "pool",
|
||||
"options": [ ],
|
||||
"query": "label_values(pool)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "",
|
||||
"multi": false,
|
||||
"name": "image",
|
||||
"options": [ ],
|
||||
"query": "label_values(image)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "RBD Details",
|
||||
"uid": "YhCYGcuZz",
|
||||
"version": 0
|
||||
}
|
|
@ -0,0 +1,723 @@
|
|||
{
|
||||
"__inputs": [ ],
|
||||
"__requires": [
|
||||
{
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"type": "grafana",
|
||||
"version": "5.4.2"
|
||||
},
|
||||
{
|
||||
"id": "graph",
|
||||
"name": "Graph",
|
||||
"type": "panel",
|
||||
"version": "5.0.0"
|
||||
},
|
||||
{
|
||||
"id": "prometheus",
|
||||
"name": "Prometheus",
|
||||
"type": "datasource",
|
||||
"version": "5.0.0"
|
||||
},
|
||||
{
|
||||
"id": "table",
|
||||
"name": "Table",
|
||||
"type": "panel",
|
||||
"version": "5.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"showIn": 0,
|
||||
"tags": [ ],
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "",
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [ ],
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "round(sum(rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Writes",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "round(sum(rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Reads",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "IOPS",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "round(sum(rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Write",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "round(sum(rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Read",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Throughput",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": { },
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [ ],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [ ],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "round(\n sum(rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Write",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "round(\n sum(rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "Read",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [ ],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Average Latency",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [ ]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ns",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"columns": [ ],
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 5,
|
||||
"links": [ ],
|
||||
"sort": {
|
||||
"col": 3,
|
||||
"desc": true
|
||||
},
|
||||
"styles": [
|
||||
{
|
||||
"alias": "Pool",
|
||||
"colorMode": null,
|
||||
"colors": [
|
||||
"rgba(245, 54, 54, 0.9)",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"rgba(50, 172, 45, 0.97)"
|
||||
],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"mappingType": 1,
|
||||
"pattern": "pool",
|
||||
"thresholds": [ ],
|
||||
"type": "string",
|
||||
"unit": "short",
|
||||
"valueMaps": [ ]
|
||||
},
|
||||
{
|
||||
"alias": "Image",
|
||||
"colorMode": null,
|
||||
"colors": [
|
||||
"rgba(245, 54, 54, 0.9)",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"rgba(50, 172, 45, 0.97)"
|
||||
],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"mappingType": 1,
|
||||
"pattern": "image",
|
||||
"thresholds": [ ],
|
||||
"type": "string",
|
||||
"unit": "short",
|
||||
"valueMaps": [ ]
|
||||
},
|
||||
{
|
||||
"alias": "IOPS",
|
||||
"colorMode": null,
|
||||
"colors": [
|
||||
"rgba(245, 54, 54, 0.9)",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"rgba(50, 172, 45, 0.97)"
|
||||
],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"mappingType": 1,
|
||||
"pattern": "Value",
|
||||
"thresholds": [ ],
|
||||
"type": "number",
|
||||
"unit": "iops",
|
||||
"valueMaps": [ ]
|
||||
},
|
||||
{
|
||||
"alias": "",
|
||||
"colorMode": null,
|
||||
"colors": [
|
||||
"rgba(245, 54, 54, 0.9)",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"rgba(50, 172, 45, 0.97)"
|
||||
],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"mappingType": 1,
|
||||
"pattern": "/.*/",
|
||||
"thresholds": [ ],
|
||||
"type": "hidden",
|
||||
"unit": "short",
|
||||
"valueMaps": [ ]
|
||||
}
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10,\n (\n sort((\n rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval]) +\n on (image, pool, namespace) rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])\n ))\n )\n)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Highest IOPS",
|
||||
"transform": "table",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"columns": [ ],
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 7
|
||||
},
|
||||
"id": 6,
|
||||
"links": [ ],
|
||||
"sort": {
|
||||
"col": 3,
|
||||
"desc": true
|
||||
},
|
||||
"styles": [
|
||||
{
|
||||
"alias": "Pool",
|
||||
"colorMode": null,
|
||||
"colors": [
|
||||
"rgba(245, 54, 54, 0.9)",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"rgba(50, 172, 45, 0.97)"
|
||||
],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"mappingType": 1,
|
||||
"pattern": "pool",
|
||||
"thresholds": [ ],
|
||||
"type": "string",
|
||||
"unit": "short",
|
||||
"valueMaps": [ ]
|
||||
},
|
||||
{
|
||||
"alias": "Image",
|
||||
"colorMode": null,
|
||||
"colors": [
|
||||
"rgba(245, 54, 54, 0.9)",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"rgba(50, 172, 45, 0.97)"
|
||||
],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"mappingType": 1,
|
||||
"pattern": "image",
|
||||
"thresholds": [ ],
|
||||
"type": "string",
|
||||
"unit": "short",
|
||||
"valueMaps": [ ]
|
||||
},
|
||||
{
|
||||
"alias": "Throughput",
|
||||
"colorMode": null,
|
||||
"colors": [
|
||||
"rgba(245, 54, 54, 0.9)",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"rgba(50, 172, 45, 0.97)"
|
||||
],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"mappingType": 1,
|
||||
"pattern": "Value",
|
||||
"thresholds": [ ],
|
||||
"type": "number",
|
||||
"unit": "Bps",
|
||||
"valueMaps": [ ]
|
||||
},
|
||||
{
|
||||
"alias": "",
|
||||
"colorMode": null,
|
||||
"colors": [
|
||||
"rgba(245, 54, 54, 0.9)",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"rgba(50, 172, 45, 0.97)"
|
||||
],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"mappingType": 1,
|
||||
"pattern": "/.*/",
|
||||
"thresholds": [ ],
|
||||
"type": "hidden",
|
||||
"unit": "short",
|
||||
"valueMaps": [ ]
|
||||
}
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10,\n sort(\n sum(\n rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])\n ) by (pool, image, namespace)\n )\n)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Highest Throughput",
|
||||
"transform": "table",
|
||||
"type": "table"
|
||||
},
|
||||
{
|
||||
"columns": [ ],
|
||||
"datasource": "${prometheusds}",
|
||||
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 7
|
||||
},
|
||||
"id": 7,
|
||||
"links": [ ],
|
||||
"sort": {
|
||||
"col": 3,
|
||||
"desc": true
|
||||
},
|
||||
"styles": [
|
||||
{
|
||||
"alias": "Pool",
|
||||
"colorMode": null,
|
||||
"colors": [
|
||||
"rgba(245, 54, 54, 0.9)",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"rgba(50, 172, 45, 0.97)"
|
||||
],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"mappingType": 1,
|
||||
"pattern": "pool",
|
||||
"thresholds": [ ],
|
||||
"type": "string",
|
||||
"unit": "short",
|
||||
"valueMaps": [ ]
|
||||
},
|
||||
{
|
||||
"alias": "Image",
|
||||
"colorMode": null,
|
||||
"colors": [
|
||||
"rgba(245, 54, 54, 0.9)",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"rgba(50, 172, 45, 0.97)"
|
||||
],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"mappingType": 1,
|
||||
"pattern": "image",
|
||||
"thresholds": [ ],
|
||||
"type": "string",
|
||||
"unit": "short",
|
||||
"valueMaps": [ ]
|
||||
},
|
||||
{
|
||||
"alias": "Latency",
|
||||
"colorMode": null,
|
||||
"colors": [
|
||||
"rgba(245, 54, 54, 0.9)",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"rgba(50, 172, 45, 0.97)"
|
||||
],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"mappingType": 1,
|
||||
"pattern": "Value",
|
||||
"thresholds": [ ],
|
||||
"type": "number",
|
||||
"unit": "ns",
|
||||
"valueMaps": [ ]
|
||||
},
|
||||
{
|
||||
"alias": "",
|
||||
"colorMode": null,
|
||||
"colors": [
|
||||
"rgba(245, 54, 54, 0.9)",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"rgba(50, 172, 45, 0.97)"
|
||||
],
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"mappingType": 1,
|
||||
"pattern": "/.*/",
|
||||
"thresholds": [ ],
|
||||
"type": "hidden",
|
||||
"unit": "short",
|
||||
"valueMaps": [ ]
|
||||
}
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "topk(10,\n sum(\n rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]), 1) +\n rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]), 1)\n ) by (pool, image, namespace)\n)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Highest Latency",
|
||||
"transform": "table",
|
||||
"type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"rows": [ ],
|
||||
"schemaVersion": 16,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"ceph-mixin",
|
||||
"overview"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"name": "cluster",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": { },
|
||||
"datasource": "${prometheusds}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "job",
|
||||
"multi": true,
|
||||
"name": "job",
|
||||
"options": [ ],
|
||||
"query": "label_values(ceph_osd_metadata{}, job)",
|
||||
"refresh": 1,
|
||||
"regex": "(.*)",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [ ],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "RBD Overview",
|
||||
"uid": "41FrpeUiz",
|
||||
"version": 0
|
||||
}
|
|
@ -0,0 +1,842 @@
|
|||
# Copyright 2023 Canonical Ltd.
|
||||
# See LICENSE file for licensing details.
|
||||
|
||||
r"""## Overview.
|
||||
|
||||
This library can be used to manage the cos_agent relation interface:
|
||||
|
||||
- `COSAgentProvider`: Use in machine charms that need to have a workload's metrics
|
||||
or logs scraped, or forward rule files or dashboards to Prometheus, Loki or Grafana through
|
||||
the Grafana Agent machine charm.
|
||||
|
||||
- `COSAgentConsumer`: Used in the Grafana Agent machine charm to manage the requirer side of
|
||||
the `cos_agent` interface.
|
||||
|
||||
|
||||
## COSAgentProvider Library Usage
|
||||
|
||||
Grafana Agent machine Charmed Operator interacts with its clients using the cos_agent library.
|
||||
Charms seeking to send telemetry, must do so using the `COSAgentProvider` object from
|
||||
this charm library.
|
||||
|
||||
Using the `COSAgentProvider` object only requires instantiating it,
|
||||
typically in the `__init__` method of your charm (the one which sends telemetry).
|
||||
|
||||
The constructor of `COSAgentProvider` has only one required and nine optional parameters:
|
||||
|
||||
```python
|
||||
def __init__(
|
||||
self,
|
||||
charm: CharmType,
|
||||
relation_name: str = DEFAULT_RELATION_NAME,
|
||||
metrics_endpoints: Optional[List[_MetricsEndpointDict]] = None,
|
||||
metrics_rules_dir: str = "./src/prometheus_alert_rules",
|
||||
logs_rules_dir: str = "./src/loki_alert_rules",
|
||||
recurse_rules_dirs: bool = False,
|
||||
log_slots: Optional[List[str]] = None,
|
||||
dashboard_dirs: Optional[List[str]] = None,
|
||||
refresh_events: Optional[List] = None,
|
||||
scrape_configs: Optional[Union[List[Dict], Callable]] = None,
|
||||
):
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
- `charm`: The instance of the charm that instantiates `COSAgentProvider`, typically `self`.
|
||||
|
||||
- `relation_name`: If your charmed operator uses a relation name other than `cos-agent` to use
|
||||
the `cos_agent` interface, this is where you have to specify that.
|
||||
|
||||
- `metrics_endpoints`: In this parameter you can specify the metrics endpoints that Grafana Agent
|
||||
machine Charmed Operator will scrape. The configs of this list will be merged with the configs
|
||||
from `scrape_configs`.
|
||||
|
||||
- `metrics_rules_dir`: The directory in which the Charmed Operator stores its metrics alert rules
|
||||
files.
|
||||
|
||||
- `logs_rules_dir`: The directory in which the Charmed Operator stores its logs alert rules files.
|
||||
|
||||
- `recurse_rules_dirs`: This parameters set whether Grafana Agent machine Charmed Operator has to
|
||||
search alert rules files recursively in the previous two directories or not.
|
||||
|
||||
- `log_slots`: Snap slots to connect to for scraping logs in the form ["snap-name:slot", ...].
|
||||
|
||||
- `dashboard_dirs`: List of directories where the dashboards are stored in the Charmed Operator.
|
||||
|
||||
- `refresh_events`: List of events on which to refresh relation data.
|
||||
|
||||
- `scrape_configs`: List of standard scrape_configs dicts or a callable that returns the list in
|
||||
case the configs need to be generated dynamically. The contents of this list will be merged
|
||||
with the configs from `metrics_endpoints`.
|
||||
|
||||
|
||||
### Example 1 - Minimal instrumentation:
|
||||
|
||||
In order to use this object the following should be in the `charm.py` file.
|
||||
|
||||
```python
|
||||
from charms.grafana_agent.v0.cos_agent import COSAgentProvider
|
||||
...
|
||||
class TelemetryProviderCharm(CharmBase):
|
||||
def __init__(self, *args):
|
||||
...
|
||||
self._grafana_agent = COSAgentProvider(self)
|
||||
```
|
||||
|
||||
### Example 2 - Full instrumentation:
|
||||
|
||||
In order to use this object the following should be in the `charm.py` file.
|
||||
|
||||
```python
|
||||
from charms.grafana_agent.v0.cos_agent import COSAgentProvider
|
||||
...
|
||||
class TelemetryProviderCharm(CharmBase):
|
||||
def __init__(self, *args):
|
||||
...
|
||||
self._grafana_agent = COSAgentProvider(
|
||||
self,
|
||||
relation_name="custom-cos-agent",
|
||||
metrics_endpoints=[
|
||||
# specify "path" and "port" to scrape from localhost
|
||||
{"path": "/metrics", "port": 9000},
|
||||
{"path": "/metrics", "port": 9001},
|
||||
{"path": "/metrics", "port": 9002},
|
||||
],
|
||||
metrics_rules_dir="./src/alert_rules/prometheus",
|
||||
logs_rules_dir="./src/alert_rules/loki",
|
||||
recursive_rules_dir=True,
|
||||
log_slots=["my-app:slot"],
|
||||
dashboard_dirs=["./src/dashboards_1", "./src/dashboards_2"],
|
||||
refresh_events=["update-status", "upgrade-charm"],
|
||||
scrape_configs=[
|
||||
{
|
||||
"job_name": "custom_job",
|
||||
"metrics_path": "/metrics",
|
||||
"authorization": {"credentials": "bearer-token"},
|
||||
"static_configs": [
|
||||
{
|
||||
"targets": ["localhost:9003"]},
|
||||
"labels": {"key": "value"},
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### Example 3 - Dynamic scrape configs generation:
|
||||
|
||||
Pass a function to the `scrape_configs` to decouple the generation of the configs
|
||||
from the instantiation of the COSAgentProvider object.
|
||||
|
||||
```python
|
||||
from charms.grafana_agent.v0.cos_agent import COSAgentProvider
|
||||
...
|
||||
|
||||
class TelemetryProviderCharm(CharmBase):
|
||||
def generate_scrape_configs(self):
|
||||
return [
|
||||
{
|
||||
"job_name": "custom",
|
||||
"metrics_path": "/metrics",
|
||||
"static_configs": [{"targets": ["localhost:9000"]}],
|
||||
},
|
||||
]
|
||||
|
||||
def __init__(self, *args):
|
||||
...
|
||||
self._grafana_agent = COSAgentProvider(
|
||||
self,
|
||||
scrape_configs=self.generate_scrape_configs,
|
||||
)
|
||||
```
|
||||
|
||||
## COSAgentConsumer Library Usage
|
||||
|
||||
This object may be used by any Charmed Operator which gathers telemetry data by
|
||||
implementing the consumer side of the `cos_agent` interface.
|
||||
For instance Grafana Agent machine Charmed Operator.
|
||||
|
||||
For this purpose the charm needs to instantiate the `COSAgentConsumer` object with one mandatory
|
||||
and two optional arguments.
|
||||
|
||||
### Parameters
|
||||
|
||||
- `charm`: A reference to the parent (Grafana Agent machine) charm.
|
||||
|
||||
- `relation_name`: The name of the relation that the charm uses to interact
|
||||
with its clients that provides telemetry data using the `COSAgentProvider` object.
|
||||
|
||||
If provided, this relation name must match a provided relation in metadata.yaml with the
|
||||
`cos_agent` interface.
|
||||
The default value of this argument is "cos-agent".
|
||||
|
||||
- `refresh_events`: List of events on which to refresh relation data.
|
||||
|
||||
|
||||
### Example 1 - Minimal instrumentation:
|
||||
|
||||
In order to use this object the following should be in the `charm.py` file.
|
||||
|
||||
```python
|
||||
from charms.grafana_agent.v0.cos_agent import COSAgentConsumer
|
||||
...
|
||||
class GrafanaAgentMachineCharm(GrafanaAgentCharm)
|
||||
def __init__(self, *args):
|
||||
...
|
||||
self._cos = COSAgentRequirer(self)
|
||||
```
|
||||
|
||||
|
||||
### Example 2 - Full instrumentation:
|
||||
|
||||
In order to use this object the following should be in the `charm.py` file.
|
||||
|
||||
```python
|
||||
from charms.grafana_agent.v0.cos_agent import COSAgentConsumer
|
||||
...
|
||||
class GrafanaAgentMachineCharm(GrafanaAgentCharm)
|
||||
def __init__(self, *args):
|
||||
...
|
||||
self._cos = COSAgentRequirer(
|
||||
self,
|
||||
relation_name="cos-agent-consumer",
|
||||
refresh_events=["update-status", "upgrade-charm"],
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import lzma
|
||||
from collections import namedtuple
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Dict, List, Optional, Set, Union
|
||||
|
||||
import pydantic
|
||||
from cosl import JujuTopology
|
||||
from cosl.rules import AlertRules
|
||||
from ops.charm import RelationChangedEvent
|
||||
from ops.framework import EventBase, EventSource, Object, ObjectEvents
|
||||
from ops.model import Relation, Unit
|
||||
from ops.testing import CharmType
|
||||
|
||||
if TYPE_CHECKING:
|
||||
try:
|
||||
from typing import TypedDict
|
||||
|
||||
class _MetricsEndpointDict(TypedDict):
|
||||
path: str
|
||||
port: int
|
||||
|
||||
except ModuleNotFoundError:
|
||||
_MetricsEndpointDict = Dict # pyright: ignore
|
||||
|
||||
LIBID = "dc15fa84cef84ce58155fb84f6c6213a"
|
||||
LIBAPI = 0
|
||||
LIBPATCH = 6
|
||||
|
||||
PYDEPS = ["cosl", "pydantic < 2"]
|
||||
|
||||
DEFAULT_RELATION_NAME = "cos-agent"
|
||||
DEFAULT_PEER_RELATION_NAME = "peers"
|
||||
DEFAULT_SCRAPE_CONFIG = {
|
||||
"static_configs": [{"targets": ["localhost:80"]}],
|
||||
"metrics_path": "/metrics",
|
||||
}
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
SnapEndpoint = namedtuple("SnapEndpoint", "owner, name")
|
||||
|
||||
|
||||
class GrafanaDashboard(str):
|
||||
"""Grafana Dashboard encoded json; lzma-compressed."""
|
||||
|
||||
# TODO Replace this with a custom type when pydantic v2 released (end of 2023 Q1?)
|
||||
# https://github.com/pydantic/pydantic/issues/4887
|
||||
@staticmethod
|
||||
def _serialize(raw_json: Union[str, bytes]) -> "GrafanaDashboard":
|
||||
if not isinstance(raw_json, bytes):
|
||||
raw_json = raw_json.encode("utf-8")
|
||||
encoded = base64.b64encode(lzma.compress(raw_json)).decode("utf-8")
|
||||
return GrafanaDashboard(encoded)
|
||||
|
||||
def _deserialize(self) -> Dict:
|
||||
try:
|
||||
raw = lzma.decompress(base64.b64decode(self.encode("utf-8"))).decode()
|
||||
return json.loads(raw)
|
||||
except json.decoder.JSONDecodeError as e:
|
||||
logger.error("Invalid Dashboard format: %s", e)
|
||||
return {}
|
||||
|
||||
def __repr__(self):
|
||||
"""Return string representation of self."""
|
||||
return "<GrafanaDashboard>"
|
||||
|
||||
|
||||
class CosAgentProviderUnitData(pydantic.BaseModel):
|
||||
"""Unit databag model for `cos-agent` relation."""
|
||||
|
||||
# The following entries are the same for all units of the same principal.
|
||||
# Note that the same grafana agent subordinate may be related to several apps.
|
||||
# this needs to make its way to the gagent leader
|
||||
metrics_alert_rules: dict
|
||||
log_alert_rules: dict
|
||||
dashboards: List[GrafanaDashboard]
|
||||
subordinate: Optional[bool]
|
||||
|
||||
# The following entries may vary across units of the same principal app.
|
||||
# this data does not need to be forwarded to the gagent leader
|
||||
metrics_scrape_jobs: List[Dict]
|
||||
log_slots: List[str]
|
||||
|
||||
# when this whole datastructure is dumped into a databag, it will be nested under this key.
|
||||
# while not strictly necessary (we could have it 'flattened out' into the databag),
|
||||
# this simplifies working with the model.
|
||||
KEY: ClassVar[str] = "config"
|
||||
|
||||
|
||||
class CosAgentPeersUnitData(pydantic.BaseModel):
|
||||
"""Unit databag model for `peers` cos-agent machine charm peer relation."""
|
||||
|
||||
# We need the principal unit name and relation metadata to be able to render identifiers
|
||||
# (e.g. topology) on the leader side, after all the data moves into peer data (the grafana
|
||||
# agent leader can only see its own principal, because it is a subordinate charm).
|
||||
principal_unit_name: str
|
||||
principal_relation_id: str
|
||||
principal_relation_name: str
|
||||
|
||||
# The only data that is forwarded to the leader is data that needs to go into the app databags
|
||||
# of the outgoing o11y relations.
|
||||
metrics_alert_rules: Optional[dict]
|
||||
log_alert_rules: Optional[dict]
|
||||
dashboards: Optional[List[GrafanaDashboard]]
|
||||
|
||||
# when this whole datastructure is dumped into a databag, it will be nested under this key.
|
||||
# while not strictly necessary (we could have it 'flattened out' into the databag),
|
||||
# this simplifies working with the model.
|
||||
KEY: ClassVar[str] = "config"
|
||||
|
||||
@property
|
||||
def app_name(self) -> str:
|
||||
"""Parse out the app name from the unit name.
|
||||
|
||||
TODO: Switch to using `model_post_init` when pydantic v2 is released?
|
||||
https://github.com/pydantic/pydantic/issues/1729#issuecomment-1300576214
|
||||
"""
|
||||
return self.principal_unit_name.split("/")[0]
|
||||
|
||||
|
||||
class COSAgentProvider(Object):
|
||||
"""Integration endpoint wrapper for the provider side of the cos_agent interface."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
charm: CharmType,
|
||||
relation_name: str = DEFAULT_RELATION_NAME,
|
||||
metrics_endpoints: Optional[List["_MetricsEndpointDict"]] = None,
|
||||
metrics_rules_dir: str = "./src/prometheus_alert_rules",
|
||||
logs_rules_dir: str = "./src/loki_alert_rules",
|
||||
recurse_rules_dirs: bool = False,
|
||||
log_slots: Optional[List[str]] = None,
|
||||
dashboard_dirs: Optional[List[str]] = None,
|
||||
refresh_events: Optional[List] = None,
|
||||
*,
|
||||
scrape_configs: Optional[Union[List[dict], Callable]] = None,
|
||||
):
|
||||
"""Create a COSAgentProvider instance.
|
||||
|
||||
Args:
|
||||
charm: The `CharmBase` instance that is instantiating this object.
|
||||
relation_name: The name of the relation to communicate over.
|
||||
metrics_endpoints: List of endpoints in the form [{"path": path, "port": port}, ...].
|
||||
This argument is a simplified form of the `scrape_configs`.
|
||||
The contents of this list will be merged with the contents of `scrape_configs`.
|
||||
metrics_rules_dir: Directory where the metrics rules are stored.
|
||||
logs_rules_dir: Directory where the logs rules are stored.
|
||||
recurse_rules_dirs: Whether to recurse into rule paths.
|
||||
log_slots: Snap slots to connect to for scraping logs
|
||||
in the form ["snap-name:slot", ...].
|
||||
dashboard_dirs: Directory where the dashboards are stored.
|
||||
refresh_events: List of events on which to refresh relation data.
|
||||
scrape_configs: List of standard scrape_configs dicts or a callable
|
||||
that returns the list in case the configs need to be generated dynamically.
|
||||
The contents of this list will be merged with the contents of `metrics_endpoints`.
|
||||
"""
|
||||
super().__init__(charm, relation_name)
|
||||
dashboard_dirs = dashboard_dirs or ["./src/grafana_dashboards"]
|
||||
|
||||
self._charm = charm
|
||||
self._relation_name = relation_name
|
||||
self._metrics_endpoints = metrics_endpoints or []
|
||||
self._scrape_configs = scrape_configs or []
|
||||
self._metrics_rules = metrics_rules_dir
|
||||
self._logs_rules = logs_rules_dir
|
||||
self._recursive = recurse_rules_dirs
|
||||
self._log_slots = log_slots or []
|
||||
self._dashboard_dirs = dashboard_dirs
|
||||
self._refresh_events = refresh_events or [self._charm.on.config_changed]
|
||||
|
||||
events = self._charm.on[relation_name]
|
||||
self.framework.observe(events.relation_joined, self._on_refresh)
|
||||
self.framework.observe(events.relation_changed, self._on_refresh)
|
||||
for event in self._refresh_events:
|
||||
self.framework.observe(event, self._on_refresh)
|
||||
|
||||
def _on_refresh(self, event):
|
||||
"""Trigger the class to update relation data."""
|
||||
relations = self._charm.model.relations[self._relation_name]
|
||||
|
||||
for relation in relations:
|
||||
# Before a principal is related to the grafana-agent subordinate, we'd get
|
||||
# ModelError: ERROR cannot read relation settings: unit "zk/2": settings not found
|
||||
# Add a guard to make sure it doesn't happen.
|
||||
if relation.data and self._charm.unit in relation.data:
|
||||
# Subordinate relations can communicate only over unit data.
|
||||
try:
|
||||
data = CosAgentProviderUnitData(
|
||||
metrics_alert_rules=self._metrics_alert_rules,
|
||||
log_alert_rules=self._log_alert_rules,
|
||||
dashboards=self._dashboards,
|
||||
metrics_scrape_jobs=self._scrape_jobs,
|
||||
log_slots=self._log_slots,
|
||||
subordinate=self._charm.meta.subordinate,
|
||||
)
|
||||
relation.data[self._charm.unit][data.KEY] = data.json()
|
||||
except (
|
||||
pydantic.ValidationError,
|
||||
json.decoder.JSONDecodeError,
|
||||
) as e:
|
||||
logger.error("Invalid relation data provided: %s", e)
|
||||
|
||||
@property
|
||||
def _scrape_jobs(self) -> List[Dict]:
|
||||
"""Return a prometheus_scrape-like data structure for jobs.
|
||||
|
||||
https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config
|
||||
"""
|
||||
if callable(self._scrape_configs):
|
||||
scrape_configs = self._scrape_configs()
|
||||
else:
|
||||
# Create a copy of the user scrape_configs, since we will mutate this object
|
||||
scrape_configs = self._scrape_configs.copy()
|
||||
|
||||
# Convert "metrics_endpoints" to standard scrape_configs, and add them in
|
||||
for endpoint in self._metrics_endpoints:
|
||||
scrape_configs.append(
|
||||
{
|
||||
"metrics_path": endpoint["path"],
|
||||
"static_configs": [{"targets": [f"localhost:{endpoint['port']}"]}],
|
||||
}
|
||||
)
|
||||
|
||||
scrape_configs = scrape_configs or [DEFAULT_SCRAPE_CONFIG]
|
||||
|
||||
# Augment job name to include the app name and a unique id (index)
|
||||
for idx, scrape_config in enumerate(scrape_configs):
|
||||
scrape_config["job_name"] = "_".join(
|
||||
[self._charm.app.name, str(idx), scrape_config.get("job_name", "default")]
|
||||
)
|
||||
|
||||
return scrape_configs
|
||||
|
||||
@property
|
||||
def _metrics_alert_rules(self) -> Dict:
|
||||
"""Use (for now) the prometheus_scrape AlertRules to initialize this."""
|
||||
alert_rules = AlertRules(
|
||||
query_type="promql", topology=JujuTopology.from_charm(self._charm)
|
||||
)
|
||||
alert_rules.add_path(self._metrics_rules, recursive=self._recursive)
|
||||
return alert_rules.as_dict()
|
||||
|
||||
@property
|
||||
def _log_alert_rules(self) -> Dict:
|
||||
"""Use (for now) the loki_push_api AlertRules to initialize this."""
|
||||
alert_rules = AlertRules(query_type="logql", topology=JujuTopology.from_charm(self._charm))
|
||||
alert_rules.add_path(self._logs_rules, recursive=self._recursive)
|
||||
return alert_rules.as_dict()
|
||||
|
||||
@property
|
||||
def _dashboards(self) -> List[GrafanaDashboard]:
|
||||
dashboards: List[GrafanaDashboard] = []
|
||||
for d in self._dashboard_dirs:
|
||||
for path in Path(d).glob("*"):
|
||||
dashboard = GrafanaDashboard._serialize(path.read_bytes())
|
||||
dashboards.append(dashboard)
|
||||
return dashboards
|
||||
|
||||
|
||||
class COSAgentDataChanged(EventBase):
|
||||
"""Event emitted by `COSAgentRequirer` when relation data changes."""
|
||||
|
||||
|
||||
class COSAgentValidationError(EventBase):
|
||||
"""Event emitted by `COSAgentRequirer` when there is an error in the relation data."""
|
||||
|
||||
def __init__(self, handle, message: str = ""):
|
||||
super().__init__(handle)
|
||||
self.message = message
|
||||
|
||||
def snapshot(self) -> Dict:
|
||||
"""Save COSAgentValidationError source information."""
|
||||
return {"message": self.message}
|
||||
|
||||
def restore(self, snapshot):
|
||||
"""Restore COSAgentValidationError source information."""
|
||||
self.message = snapshot["message"]
|
||||
|
||||
|
||||
class COSAgentRequirerEvents(ObjectEvents):
|
||||
"""`COSAgentRequirer` events."""
|
||||
|
||||
data_changed = EventSource(COSAgentDataChanged)
|
||||
validation_error = EventSource(COSAgentValidationError)
|
||||
|
||||
|
||||
class MultiplePrincipalsError(Exception):
|
||||
"""Custom exception for when there are multiple principal applications."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class COSAgentRequirer(Object):
|
||||
"""Integration endpoint wrapper for the Requirer side of the cos_agent interface."""
|
||||
|
||||
on = COSAgentRequirerEvents() # pyright: ignore
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
charm: CharmType,
|
||||
*,
|
||||
relation_name: str = DEFAULT_RELATION_NAME,
|
||||
peer_relation_name: str = DEFAULT_PEER_RELATION_NAME,
|
||||
refresh_events: Optional[List[str]] = None,
|
||||
):
|
||||
"""Create a COSAgentRequirer instance.
|
||||
|
||||
Args:
|
||||
charm: The `CharmBase` instance that is instantiating this object.
|
||||
relation_name: The name of the relation to communicate over.
|
||||
peer_relation_name: The name of the peer relation to communicate over.
|
||||
refresh_events: List of events on which to refresh relation data.
|
||||
"""
|
||||
super().__init__(charm, relation_name)
|
||||
self._charm = charm
|
||||
self._relation_name = relation_name
|
||||
self._peer_relation_name = peer_relation_name
|
||||
self._refresh_events = refresh_events or [self._charm.on.config_changed]
|
||||
|
||||
events = self._charm.on[relation_name]
|
||||
self.framework.observe(
|
||||
events.relation_joined, self._on_relation_data_changed
|
||||
) # TODO: do we need this?
|
||||
self.framework.observe(events.relation_changed, self._on_relation_data_changed)
|
||||
for event in self._refresh_events:
|
||||
self.framework.observe(event, self.trigger_refresh) # pyright: ignore
|
||||
|
||||
# Peer relation events
|
||||
# A peer relation is needed as it is the only mechanism for exchanging data across
|
||||
# subordinate units.
|
||||
# self.framework.observe(
|
||||
# self.on[self._peer_relation_name].relation_joined, self._on_peer_relation_joined
|
||||
# )
|
||||
peer_events = self._charm.on[peer_relation_name]
|
||||
self.framework.observe(peer_events.relation_changed, self._on_peer_relation_changed)
|
||||
|
||||
@property
|
||||
def peer_relation(self) -> Optional["Relation"]:
|
||||
"""Helper function for obtaining the peer relation object.
|
||||
|
||||
Returns: peer relation object
|
||||
(NOTE: would return None if called too early, e.g. during install).
|
||||
"""
|
||||
return self.model.get_relation(self._peer_relation_name)
|
||||
|
||||
def _on_peer_relation_changed(self, _):
|
||||
# Peer data is used for forwarding data from principal units to the grafana agent
|
||||
# subordinate leader, for updating the app data of the outgoing o11y relations.
|
||||
if self._charm.unit.is_leader():
|
||||
self.on.data_changed.emit() # pyright: ignore
|
||||
|
||||
def _on_relation_data_changed(self, event: RelationChangedEvent):
|
||||
# Peer data is the only means of communication between subordinate units.
|
||||
if not self.peer_relation:
|
||||
event.defer()
|
||||
return
|
||||
|
||||
cos_agent_relation = event.relation
|
||||
if not event.unit or not cos_agent_relation.data.get(event.unit):
|
||||
return
|
||||
principal_unit = event.unit
|
||||
|
||||
# Coherence check
|
||||
units = cos_agent_relation.units
|
||||
if len(units) > 1:
|
||||
# should never happen
|
||||
raise ValueError(
|
||||
f"unexpected error: subordinate relation {cos_agent_relation} "
|
||||
f"should have exactly one unit"
|
||||
)
|
||||
|
||||
if not (raw := cos_agent_relation.data[principal_unit].get(CosAgentProviderUnitData.KEY)):
|
||||
return
|
||||
|
||||
if not (provider_data := self._validated_provider_data(raw)):
|
||||
return
|
||||
|
||||
# Copy data from the principal relation to the peer relation, so the leader could
|
||||
# follow up.
|
||||
# Save the originating unit name, so it could be used for topology later on by the leader.
|
||||
data = CosAgentPeersUnitData( # peer relation databag model
|
||||
principal_unit_name=event.unit.name,
|
||||
principal_relation_id=str(event.relation.id),
|
||||
principal_relation_name=event.relation.name,
|
||||
metrics_alert_rules=provider_data.metrics_alert_rules,
|
||||
log_alert_rules=provider_data.log_alert_rules,
|
||||
dashboards=provider_data.dashboards,
|
||||
)
|
||||
self.peer_relation.data[self._charm.unit][
|
||||
f"{CosAgentPeersUnitData.KEY}-{event.unit.name}"
|
||||
] = data.json()
|
||||
|
||||
# We can't easily tell if the data that was changed is limited to only the data
|
||||
# that goes into peer relation (in which case, if this is not a leader unit, we wouldn't
|
||||
# need to emit `on.data_changed`), so we're emitting `on.data_changed` either way.
|
||||
self.on.data_changed.emit() # pyright: ignore
|
||||
|
||||
def _validated_provider_data(self, raw) -> Optional[CosAgentProviderUnitData]:
|
||||
try:
|
||||
return CosAgentProviderUnitData(**json.loads(raw))
|
||||
except (pydantic.ValidationError, json.decoder.JSONDecodeError) as e:
|
||||
self.on.validation_error.emit(message=str(e)) # pyright: ignore
|
||||
return None
|
||||
|
||||
def trigger_refresh(self, _):
|
||||
"""Trigger a refresh of relation data."""
|
||||
# FIXME: Figure out what we should do here
|
||||
self.on.data_changed.emit() # pyright: ignore
|
||||
|
||||
@property
|
||||
def _principal_unit(self) -> Optional[Unit]:
|
||||
"""Return the principal unit for a relation.
|
||||
|
||||
Assumes that the relation is of type subordinate.
|
||||
Relies on the fact that, for subordinate relations, the only remote unit visible to
|
||||
*this unit* is the principal unit that this unit is attached to.
|
||||
"""
|
||||
if relations := self._principal_relations:
|
||||
# Technically it's a list, but for subordinates there can only be one relation
|
||||
principal_relation = next(iter(relations))
|
||||
if units := principal_relation.units:
|
||||
# Technically it's a list, but for subordinates there can only be one
|
||||
return next(iter(units))
|
||||
|
||||
return None
|
||||
|
||||
@property
|
||||
def _principal_relations(self):
|
||||
relations = []
|
||||
for relation in self._charm.model.relations[self._relation_name]:
|
||||
if not json.loads(relation.data[next(iter(relation.units))]["config"]).get(
|
||||
["subordinate"], False
|
||||
):
|
||||
relations.append(relation)
|
||||
if len(relations) > 1:
|
||||
logger.error(
|
||||
"Multiple applications claiming to be principal. Update the cos-agent library in the client application charms."
|
||||
)
|
||||
raise MultiplePrincipalsError("Multiple principal applications.")
|
||||
return relations
|
||||
|
||||
@property
|
||||
def _remote_data(self) -> List[CosAgentProviderUnitData]:
|
||||
"""Return a list of remote data from each of the related units.
|
||||
|
||||
Assumes that the relation is of type subordinate.
|
||||
Relies on the fact that, for subordinate relations, the only remote unit visible to
|
||||
*this unit* is the principal unit that this unit is attached to.
|
||||
"""
|
||||
all_data = []
|
||||
|
||||
for relation in self._charm.model.relations[self._relation_name]:
|
||||
if not relation.units:
|
||||
continue
|
||||
unit = next(iter(relation.units))
|
||||
if not (raw := relation.data[unit].get(CosAgentProviderUnitData.KEY)):
|
||||
continue
|
||||
if not (provider_data := self._validated_provider_data(raw)):
|
||||
continue
|
||||
all_data.append(provider_data)
|
||||
|
||||
return all_data
|
||||
|
||||
def _gather_peer_data(self) -> List[CosAgentPeersUnitData]:
|
||||
"""Collect data from the peers.
|
||||
|
||||
Returns a trimmed-down list of CosAgentPeersUnitData.
|
||||
"""
|
||||
relation = self.peer_relation
|
||||
|
||||
# Ensure that whatever context we're running this in, we take the necessary precautions:
|
||||
if not relation or not relation.data or not relation.app:
|
||||
return []
|
||||
|
||||
# Iterate over all peer unit data and only collect every principal once.
|
||||
peer_data: List[CosAgentPeersUnitData] = []
|
||||
app_names: Set[str] = set()
|
||||
|
||||
for unit in chain((self._charm.unit,), relation.units):
|
||||
if not relation.data.get(unit):
|
||||
continue
|
||||
|
||||
for unit_name in relation.data.get(unit): # pyright: ignore
|
||||
if not unit_name.startswith(CosAgentPeersUnitData.KEY):
|
||||
continue
|
||||
raw = relation.data[unit].get(unit_name)
|
||||
if raw is None:
|
||||
continue
|
||||
data = CosAgentPeersUnitData(**json.loads(raw))
|
||||
# Have we already seen this principal app?
|
||||
if (app_name := data.app_name) in app_names:
|
||||
continue
|
||||
peer_data.append(data)
|
||||
app_names.add(app_name)
|
||||
|
||||
return peer_data
|
||||
|
||||
@property
|
||||
def metrics_alerts(self) -> Dict[str, Any]:
|
||||
"""Fetch metrics alerts."""
|
||||
alert_rules = {}
|
||||
|
||||
seen_apps: List[str] = []
|
||||
for data in self._gather_peer_data():
|
||||
if rules := data.metrics_alert_rules:
|
||||
app_name = data.app_name
|
||||
if app_name in seen_apps:
|
||||
continue # dedup!
|
||||
seen_apps.append(app_name)
|
||||
# This is only used for naming the file, so be as specific as we can be
|
||||
identifier = JujuTopology(
|
||||
model=self._charm.model.name,
|
||||
model_uuid=self._charm.model.uuid,
|
||||
application=app_name,
|
||||
# For the topology unit, we could use `data.principal_unit_name`, but that unit
|
||||
# name may not be very stable: `_gather_peer_data` de-duplicates by app name so
|
||||
# the exact unit name that turns up first in the iterator may vary from time to
|
||||
# time. So using the grafana-agent unit name instead.
|
||||
unit=self._charm.unit.name,
|
||||
).identifier
|
||||
|
||||
alert_rules[identifier] = rules
|
||||
|
||||
return alert_rules
|
||||
|
||||
@property
|
||||
def metrics_jobs(self) -> List[Dict]:
|
||||
"""Parse the relation data contents and extract the metrics jobs."""
|
||||
scrape_jobs = []
|
||||
for data in self._remote_data:
|
||||
for job in data.metrics_scrape_jobs:
|
||||
# In #220, relation schema changed from a simplified dict to the standard
|
||||
# `scrape_configs`.
|
||||
# This is to ensure backwards compatibility with Providers older than v0.5.
|
||||
if "path" in job and "port" in job and "job_name" in job:
|
||||
job = {
|
||||
"job_name": job["job_name"],
|
||||
"metrics_path": job["path"],
|
||||
"static_configs": [{"targets": [f"localhost:{job['port']}"]}],
|
||||
}
|
||||
|
||||
scrape_jobs.append(job)
|
||||
|
||||
return scrape_jobs
|
||||
|
||||
@property
|
||||
def snap_log_endpoints(self) -> List[SnapEndpoint]:
|
||||
"""Fetch logging endpoints exposed by related snaps."""
|
||||
plugs = []
|
||||
for data in self._remote_data:
|
||||
targets = data.log_slots
|
||||
if targets:
|
||||
for target in targets:
|
||||
if target in plugs:
|
||||
logger.warning(
|
||||
f"plug {target} already listed. "
|
||||
"The same snap is being passed from multiple "
|
||||
"endpoints; this should not happen."
|
||||
)
|
||||
else:
|
||||
plugs.append(target)
|
||||
|
||||
endpoints = []
|
||||
for plug in plugs:
|
||||
if ":" not in plug:
|
||||
logger.error(f"invalid plug definition received: {plug}. Ignoring...")
|
||||
else:
|
||||
endpoint = SnapEndpoint(*plug.split(":"))
|
||||
endpoints.append(endpoint)
|
||||
return endpoints
|
||||
|
||||
@property
|
||||
def logs_alerts(self) -> Dict[str, Any]:
|
||||
"""Fetch log alerts."""
|
||||
alert_rules = {}
|
||||
seen_apps: List[str] = []
|
||||
|
||||
for data in self._gather_peer_data():
|
||||
if rules := data.log_alert_rules:
|
||||
# This is only used for naming the file, so be as specific as we can be
|
||||
app_name = data.app_name
|
||||
if app_name in seen_apps:
|
||||
continue # dedup!
|
||||
seen_apps.append(app_name)
|
||||
|
||||
identifier = JujuTopology(
|
||||
model=self._charm.model.name,
|
||||
model_uuid=self._charm.model.uuid,
|
||||
application=app_name,
|
||||
# For the topology unit, we could use `data.principal_unit_name`, but that unit
|
||||
# name may not be very stable: `_gather_peer_data` de-duplicates by app name so
|
||||
# the exact unit name that turns up first in the iterator may vary from time to
|
||||
# time. So using the grafana-agent unit name instead.
|
||||
unit=self._charm.unit.name,
|
||||
).identifier
|
||||
|
||||
alert_rules[identifier] = rules
|
||||
|
||||
return alert_rules
|
||||
|
||||
@property
|
||||
def dashboards(self) -> List[Dict[str, str]]:
|
||||
"""Fetch dashboards as encoded content.
|
||||
|
||||
Dashboards are assumed not to vary across units of the same primary.
|
||||
"""
|
||||
dashboards: List[Dict[str, Any]] = []
|
||||
|
||||
seen_apps: List[str] = []
|
||||
for data in self._gather_peer_data():
|
||||
app_name = data.app_name
|
||||
if app_name in seen_apps:
|
||||
continue # dedup!
|
||||
seen_apps.append(app_name)
|
||||
|
||||
for encoded_dashboard in data.dashboards or ():
|
||||
content = GrafanaDashboard(encoded_dashboard)._deserialize()
|
||||
|
||||
title = content.get("title", "no_title")
|
||||
|
||||
dashboards.append(
|
||||
{
|
||||
"relation_id": data.principal_relation_id,
|
||||
# We have the remote charm name - use it for the identifier
|
||||
"charm": f"{data.principal_relation_name}-{app_name}",
|
||||
"content": content,
|
||||
"title": title,
|
||||
}
|
||||
)
|
||||
|
||||
return dashboards
|
|
@ -42,6 +42,9 @@ provides:
|
|||
interface: prometheus_scrape
|
||||
dashboard:
|
||||
interface: ceph-dashboard
|
||||
cos-agent:
|
||||
interface: cos_agent
|
||||
|
||||
requires:
|
||||
bootstrap-source:
|
||||
interface: ceph-bootstrap
|
||||
|
|
|
@ -3,12 +3,11 @@
|
|||
- charm-unit-jobs-py38
|
||||
- charm-unit-jobs-py310
|
||||
- charm-yoga-functional-jobs
|
||||
- charm-zed-functional-jobs
|
||||
vars:
|
||||
needs_charm_build: true
|
||||
charm_build_name: ceph-mon
|
||||
build_type: charmcraft
|
||||
charmcraft_channel: 2.0/stable
|
||||
charmcraft_channel: 2.x/stable
|
||||
check:
|
||||
jobs:
|
||||
- new-install-focal-yoga
|
||||
|
|
|
@ -9,6 +9,8 @@ import json
|
|||
import logging
|
||||
import os.path
|
||||
import pathlib
|
||||
import socket
|
||||
|
||||
from typing import Optional, Union, List, TYPE_CHECKING
|
||||
|
||||
import ops.model
|
||||
|
@ -17,6 +19,7 @@ if TYPE_CHECKING:
|
|||
import charm
|
||||
|
||||
from charms.prometheus_k8s.v0 import prometheus_scrape
|
||||
from charms.grafana_agent.v0 import cos_agent
|
||||
from charms_ceph import utils as ceph_utils
|
||||
from ops.framework import BoundEvent
|
||||
from utils import mgr_config_set_rbd_stats_pools
|
||||
|
@ -28,6 +31,10 @@ DEFAULT_CEPH_JOB = {
|
|||
"metrics_path": "/metrics",
|
||||
"static_configs": [{"targets": ["*:9283"]}],
|
||||
}
|
||||
DEFAULT_CEPH_METRICS_ENDPOINT = {
|
||||
"path": "/metrics",
|
||||
"port": 9283,
|
||||
}
|
||||
DEFAULT_ALERT_RULES_RELATIVE_PATH = "files/prometheus_alert_rules"
|
||||
|
||||
|
||||
|
@ -144,3 +151,77 @@ class CephMetricsEndpointProvider(prometheus_scrape.MetricsEndpointProvider):
|
|||
self._charm._stored.alert_rule_errors = msg
|
||||
return
|
||||
self._set_alert_rules(alert_rules_as_dict)
|
||||
|
||||
|
||||
class CephCOSAgentProvider(cos_agent.COSAgentProvider):
|
||||
|
||||
def __init__(self, charm):
|
||||
super().__init__(
|
||||
charm,
|
||||
metrics_rules_dir="./files/prometheus_alert_rules",
|
||||
dashboard_dirs=["./files/grafana_dashboards"],
|
||||
scrape_configs=self._custom_scrape_configs,
|
||||
)
|
||||
events = self._charm.on[cos_agent.DEFAULT_RELATION_NAME]
|
||||
self.framework.observe(
|
||||
events.relation_departed, self._on_relation_departed
|
||||
)
|
||||
|
||||
def _on_refresh(self, event):
|
||||
"""Enable prometheus on relation change"""
|
||||
if self._charm.unit.is_leader() and ceph_utils.is_bootstrapped():
|
||||
logger.debug("refreshing cos_agent relation")
|
||||
mgr_config_set_rbd_stats_pools()
|
||||
ceph_utils.mgr_enable_module("prometheus")
|
||||
super()._on_refresh(event)
|
||||
|
||||
def _on_relation_departed(self, event):
|
||||
"""Disable prometheus on depart of relation"""
|
||||
if self._charm.unit.is_leader() and ceph_utils.is_bootstrapped():
|
||||
logger.debug(
|
||||
"is_leader and is_bootstrapped, running rel departed: %s",
|
||||
event,
|
||||
)
|
||||
ceph_utils.mgr_disable_module("prometheus")
|
||||
logger.debug("module_disabled")
|
||||
|
||||
def _custom_scrape_configs(self):
|
||||
fqdn = socket.getfqdn()
|
||||
fqdn_parts = fqdn.split('.')
|
||||
domain = '.'.join(fqdn_parts[1:]) if len(fqdn_parts) > 1 else fqdn
|
||||
return [
|
||||
{
|
||||
"metrics_path": "/metrics",
|
||||
"static_configs": [{"targets": ["localhost:9283"]}],
|
||||
"honor_labels": True,
|
||||
"metric_relabel_configs": [
|
||||
{
|
||||
# localhost:9283 is the generic default instance label
|
||||
# added by grafana-agent which is kinda useless.
|
||||
# Replace it with a somewhat more meaningful label
|
||||
"source_labels": ["instance"],
|
||||
"regex": "^localhost:9283$",
|
||||
"target_label": "instance",
|
||||
"action": "replace",
|
||||
"replacement": "ceph_cluster",
|
||||
},
|
||||
{ # if we have a non-empty hostname label, use it as the
|
||||
# instance label
|
||||
"source_labels": ["hostname"],
|
||||
"regex": "(.+)",
|
||||
"target_label": "instance",
|
||||
"action": "replace",
|
||||
"replacement": "${1}",
|
||||
},
|
||||
{ # tack on the domain to the instance label to make it
|
||||
# conform to grafana-agent's node-exporter expectations
|
||||
"source_labels": ["instance"],
|
||||
"regex": "(.*)",
|
||||
"target_label": "instance",
|
||||
"action": "replace",
|
||||
"replacement": "${1}." + domain,
|
||||
},
|
||||
]
|
||||
},
|
||||
|
||||
]
|
||||
|
|
|
@ -23,7 +23,6 @@ import ceph_metrics
|
|||
|
||||
import ops_actions
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -222,6 +221,7 @@ class CephMonCharm(ops_openstack.core.OSBaseCharm):
|
|||
|
||||
self.clients = ceph_client.CephClientProvides(self)
|
||||
self.metrics_endpoint = ceph_metrics.CephMetricsEndpointProvider(self)
|
||||
self.cos_agent = ceph_metrics.CephCOSAgentProvider(self)
|
||||
self.ceph_status = ceph_status.StatusAssessor(self)
|
||||
|
||||
self._observe_action(self.on.change_osd_weight_action,
|
||||
|
|
|
@ -99,13 +99,45 @@ def _handle_rgw_key_rotation(entity, event, model):
|
|||
event.fail("Entity %s not found" % entity)
|
||||
|
||||
|
||||
def _find_mds_unit(relations, mds_name):
|
||||
for relation in relations:
|
||||
for unit in relation.units:
|
||||
try:
|
||||
if mds_name == relation.data[unit]['mds-name']:
|
||||
return relation.data
|
||||
except KeyError:
|
||||
logger.exception('mds name not found in relation data bag')
|
||||
|
||||
|
||||
def _handle_mds_key_rotation(entity, event, model):
|
||||
mds_name = entity[4:]
|
||||
relations = model.relations.get('mds')
|
||||
if not relations:
|
||||
event.fail('No mds relations found')
|
||||
return
|
||||
|
||||
bag = _find_mds_unit(relations, mds_name)
|
||||
if bag is None:
|
||||
event.fail('No unit found for entity: %s' % entity)
|
||||
return
|
||||
|
||||
pending_key = _create_key(entity, event)
|
||||
bag[model.unit][mds_name + "_mds_key"] = pending_key
|
||||
event.set_results({'message': 'success'})
|
||||
|
||||
|
||||
def _get_osd_tree():
|
||||
out = subprocess.check_output(["sudo", "ceph", "osd", "dump",
|
||||
"--format=json"])
|
||||
return json.loads(out.decode("utf8")).get("osds", ())
|
||||
|
||||
|
||||
def _get_osd_addr(osd_id, tree=None):
|
||||
def _clean_address(addr):
|
||||
ix = addr.find(":")
|
||||
return addr if ix < 0 else addr[0:ix]
|
||||
|
||||
|
||||
def _get_osd_addrs(osd_id, tree=None):
|
||||
if tree is None:
|
||||
tree = _get_osd_tree()
|
||||
|
||||
|
@ -113,9 +145,9 @@ def _get_osd_addr(osd_id, tree=None):
|
|||
if osd.get("osd") != osd_id:
|
||||
continue
|
||||
|
||||
addr = osd["public_addr"]
|
||||
ix = addr.find(":")
|
||||
return addr if ix < 0 else addr[0:ix]
|
||||
return [_clean_address(osd[x])
|
||||
for x in ("public_addr", "cluster_addr")
|
||||
if x in osd]
|
||||
|
||||
|
||||
def _get_unit_addr(unit, rel_id):
|
||||
|
@ -125,13 +157,13 @@ def _get_unit_addr(unit, rel_id):
|
|||
|
||||
|
||||
def _find_osd_unit(relations, model, osd_id, tree):
|
||||
addr = _get_osd_addr(osd_id, tree)
|
||||
if not addr:
|
||||
addrs = _get_osd_addrs(osd_id, tree)
|
||||
if not addrs:
|
||||
return None
|
||||
|
||||
for relation in relations:
|
||||
for unit in relation.units:
|
||||
if _get_unit_addr(unit.name, relation.id) == addr:
|
||||
if _get_unit_addr(unit.name, relation.id) in addrs:
|
||||
return relation.data[model.unit]
|
||||
|
||||
|
||||
|
@ -225,6 +257,8 @@ def rotate_key(event, model=None) -> None:
|
|||
event.set_results({"message": "success"})
|
||||
elif entity.startswith("client.rgw."):
|
||||
_handle_rgw_key_rotation(entity, event, model)
|
||||
elif entity.startswith('mds.'):
|
||||
_handle_mds_key_rotation(entity, event, model)
|
||||
elif entity == "osd":
|
||||
_rotate_all_osds(event, model)
|
||||
elif entity.startswith("osd."):
|
||||
|
|
|
@ -57,3 +57,6 @@ pyopenssl<=22.0.0
|
|||
|
||||
# newer jsonschema needs rustc and cargo
|
||||
jsonschema<4.18.0
|
||||
|
||||
pydantic < 2
|
||||
cosl
|
||||
|
|
|
@ -1,259 +0,0 @@
|
|||
variables:
|
||||
openstack-origin: &openstack-origin cloud:jammy-zed
|
||||
|
||||
series: jammy
|
||||
|
||||
comment:
|
||||
- 'machines section to decide order of deployment. database sooner = faster'
|
||||
machines:
|
||||
'0':
|
||||
constraints: mem=3072M
|
||||
'1':
|
||||
constraints: mem=3072M
|
||||
'2':
|
||||
constraints: mem=3072M
|
||||
'3':
|
||||
'4':
|
||||
'5':
|
||||
'6':
|
||||
'7':
|
||||
'8':
|
||||
'9':
|
||||
'10':
|
||||
'11':
|
||||
'12':
|
||||
'13':
|
||||
'14':
|
||||
'15':
|
||||
'16':
|
||||
'17':
|
||||
|
||||
|
||||
applications:
|
||||
|
||||
keystone-mysql-router:
|
||||
charm: ch:mysql-router
|
||||
channel: 8.0/edge
|
||||
glance-mysql-router:
|
||||
charm: ch:mysql-router
|
||||
channel: 8.0/edge
|
||||
cinder-mysql-router:
|
||||
charm: ch:mysql-router
|
||||
channel: 8.0/edge
|
||||
nova-cloud-controller-mysql-router:
|
||||
charm: ch:mysql-router
|
||||
channel: 8.0/edge
|
||||
placement-mysql-router:
|
||||
charm: ch:mysql-router
|
||||
channel: 8.0/edge
|
||||
|
||||
mysql-innodb-cluster:
|
||||
charm: ch:mysql-innodb-cluster
|
||||
num_units: 3
|
||||
to:
|
||||
- '0'
|
||||
- '1'
|
||||
- '2'
|
||||
channel: 8.0/edge
|
||||
|
||||
ceph-osd:
|
||||
charm: ch:ceph-osd
|
||||
num_units: 3
|
||||
storage:
|
||||
osd-devices: '10G'
|
||||
options:
|
||||
source: *openstack-origin
|
||||
osd-devices: '/dev/test-non-existent'
|
||||
to:
|
||||
- '3'
|
||||
- '4'
|
||||
- '5'
|
||||
channel: quincy/edge
|
||||
|
||||
ceph-mon:
|
||||
charm: ch:ceph-mon
|
||||
channel: quincy/edge
|
||||
num_units: 3
|
||||
options:
|
||||
source: *openstack-origin
|
||||
monitor-count: '3'
|
||||
to:
|
||||
- '6'
|
||||
- '7'
|
||||
- '8'
|
||||
|
||||
ceph-fs:
|
||||
charm: ch:ceph-fs
|
||||
num_units: 1
|
||||
options:
|
||||
source: *openstack-origin
|
||||
channel: quincy/edge
|
||||
to:
|
||||
- '17'
|
||||
|
||||
rabbitmq-server:
|
||||
charm: ch:rabbitmq-server
|
||||
num_units: 1
|
||||
to:
|
||||
- '9'
|
||||
channel: 3.9/edge
|
||||
|
||||
keystone:
|
||||
expose: True
|
||||
charm: ch:keystone
|
||||
num_units: 1
|
||||
options:
|
||||
openstack-origin: *openstack-origin
|
||||
to:
|
||||
- '10'
|
||||
channel: zed/edge
|
||||
|
||||
nova-compute:
|
||||
charm: ch:nova-compute
|
||||
num_units: 1
|
||||
options:
|
||||
openstack-origin: *openstack-origin
|
||||
libvirt-image-backend: rbd
|
||||
to:
|
||||
- '11'
|
||||
channel: zed/edge
|
||||
|
||||
glance:
|
||||
expose: True
|
||||
charm: ch:glance
|
||||
num_units: 1
|
||||
options:
|
||||
openstack-origin: *openstack-origin
|
||||
to:
|
||||
- '12'
|
||||
channel: zed/edge
|
||||
|
||||
cinder:
|
||||
expose: True
|
||||
charm: ch:cinder
|
||||
num_units: 1
|
||||
options:
|
||||
block-device: 'None'
|
||||
glance-api-version: '2'
|
||||
openstack-origin: *openstack-origin
|
||||
to:
|
||||
- '13'
|
||||
channel: zed/edge
|
||||
|
||||
cinder-ceph:
|
||||
charm: ch:cinder-ceph
|
||||
channel: zed/edge
|
||||
|
||||
nova-cloud-controller:
|
||||
expose: True
|
||||
charm: ch:nova-cloud-controller
|
||||
num_units: 1
|
||||
options:
|
||||
openstack-origin: *openstack-origin
|
||||
to:
|
||||
- '14'
|
||||
channel: zed/edge
|
||||
|
||||
placement:
|
||||
charm: ch:placement
|
||||
num_units: 1
|
||||
options:
|
||||
openstack-origin: *openstack-origin
|
||||
to:
|
||||
- '15'
|
||||
channel: zed/edge
|
||||
|
||||
prometheus2:
|
||||
charm: ch:prometheus2
|
||||
num_units: 1
|
||||
to:
|
||||
- '16'
|
||||
|
||||
relations:
|
||||
- - 'nova-compute:amqp'
|
||||
- 'rabbitmq-server:amqp'
|
||||
|
||||
- - 'nova-compute:image-service'
|
||||
- 'glance:image-service'
|
||||
|
||||
- - 'nova-compute:ceph'
|
||||
- 'ceph-mon:client'
|
||||
|
||||
- - nova-compute:ceph-access
|
||||
- cinder-ceph:ceph-access
|
||||
|
||||
- - 'keystone:shared-db'
|
||||
- 'keystone-mysql-router:shared-db'
|
||||
- - 'keystone-mysql-router:db-router'
|
||||
- 'mysql-innodb-cluster:db-router'
|
||||
|
||||
- - 'glance:shared-db'
|
||||
- 'glance-mysql-router:shared-db'
|
||||
- - 'glance-mysql-router:db-router'
|
||||
- 'mysql-innodb-cluster:db-router'
|
||||
|
||||
- - 'glance:identity-service'
|
||||
- 'keystone:identity-service'
|
||||
|
||||
- - 'glance:amqp'
|
||||
- 'rabbitmq-server:amqp'
|
||||
|
||||
- - 'glance:ceph'
|
||||
- 'ceph-mon:client'
|
||||
|
||||
- - 'cinder:shared-db'
|
||||
- 'cinder-mysql-router:shared-db'
|
||||
- - 'cinder-mysql-router:db-router'
|
||||
- 'mysql-innodb-cluster:db-router'
|
||||
|
||||
- - 'cinder:identity-service'
|
||||
- 'keystone:identity-service'
|
||||
|
||||
- - 'cinder:amqp'
|
||||
- 'rabbitmq-server:amqp'
|
||||
|
||||
- - 'cinder:image-service'
|
||||
- 'glance:image-service'
|
||||
|
||||
- - 'cinder-ceph:storage-backend'
|
||||
- 'cinder:storage-backend'
|
||||
|
||||
- - 'cinder-ceph:ceph'
|
||||
- 'ceph-mon:client'
|
||||
|
||||
- - 'ceph-osd:mon'
|
||||
- 'ceph-mon:osd'
|
||||
|
||||
- - 'ceph-mon:mds'
|
||||
- 'ceph-fs:ceph-mds'
|
||||
|
||||
- - 'nova-cloud-controller:shared-db'
|
||||
- 'nova-cloud-controller-mysql-router:shared-db'
|
||||
- - 'nova-cloud-controller-mysql-router:db-router'
|
||||
- 'mysql-innodb-cluster:db-router'
|
||||
|
||||
- - 'nova-cloud-controller:identity-service'
|
||||
- 'keystone:identity-service'
|
||||
|
||||
- - 'nova-cloud-controller:amqp'
|
||||
- 'rabbitmq-server:amqp'
|
||||
|
||||
- - 'nova-cloud-controller:cloud-compute'
|
||||
- 'nova-compute:cloud-compute'
|
||||
|
||||
- - 'nova-cloud-controller:image-service'
|
||||
- 'glance:image-service'
|
||||
|
||||
- - 'placement:shared-db'
|
||||
- 'placement-mysql-router:shared-db'
|
||||
- - 'placement-mysql-router:db-router'
|
||||
- 'mysql-innodb-cluster:db-router'
|
||||
|
||||
- - 'placement'
|
||||
- 'keystone'
|
||||
|
||||
- - 'placement'
|
||||
- 'nova-cloud-controller'
|
||||
|
||||
- - 'ceph-mon:prometheus'
|
||||
- 'prometheus2:target'
|
|
@ -1,265 +0,0 @@
|
|||
variables:
|
||||
openstack-origin: &openstack-origin distro
|
||||
|
||||
series: jammy
|
||||
|
||||
comment:
|
||||
- 'machines section to decide order of deployment. database sooner = faster'
|
||||
machines:
|
||||
'0':
|
||||
constraints: mem=3072M
|
||||
'1':
|
||||
constraints: mem=3072M
|
||||
'2':
|
||||
constraints: mem=3072M
|
||||
'3':
|
||||
'4':
|
||||
'5':
|
||||
'6':
|
||||
'7':
|
||||
'8':
|
||||
'9':
|
||||
'10':
|
||||
'11':
|
||||
'12':
|
||||
'13':
|
||||
'14':
|
||||
'15':
|
||||
'16':
|
||||
series: focal
|
||||
'17':
|
||||
|
||||
|
||||
applications:
|
||||
|
||||
keystone-mysql-router:
|
||||
charm: ch:mysql-router
|
||||
channel: 8.0.19/edge
|
||||
glance-mysql-router:
|
||||
charm: ch:mysql-router
|
||||
channel: 8.0.19/edge
|
||||
cinder-mysql-router:
|
||||
charm: ch:mysql-router
|
||||
channel: 8.0.19/edge
|
||||
nova-cloud-controller-mysql-router:
|
||||
charm: ch:mysql-router
|
||||
channel: 8.0.19/edge
|
||||
placement-mysql-router:
|
||||
charm: ch:mysql-router
|
||||
channel: 8.0.19/edge
|
||||
|
||||
mysql-innodb-cluster:
|
||||
charm: ch:mysql-innodb-cluster
|
||||
num_units: 3
|
||||
options:
|
||||
source: *openstack-origin
|
||||
to:
|
||||
- '0'
|
||||
- '1'
|
||||
- '2'
|
||||
channel: 8.0.19/edge
|
||||
|
||||
ceph-osd:
|
||||
charm: ch:ceph-osd
|
||||
num_units: 3
|
||||
storage:
|
||||
osd-devices: '10G'
|
||||
options:
|
||||
source: *openstack-origin
|
||||
osd-devices: '/dev/test-non-existent'
|
||||
to:
|
||||
- '3'
|
||||
- '4'
|
||||
- '5'
|
||||
channel: quincy/edge
|
||||
|
||||
ceph-mon:
|
||||
charm: ch:ceph-mon
|
||||
channel: quincy/edge
|
||||
num_units: 3
|
||||
options:
|
||||
source: *openstack-origin
|
||||
monitor-count: '3'
|
||||
to:
|
||||
- '6'
|
||||
- '7'
|
||||
- '8'
|
||||
|
||||
ceph-fs:
|
||||
charm: ch:ceph-fs
|
||||
num_units: 1
|
||||
options:
|
||||
source: *openstack-origin
|
||||
channel: quincy/edge
|
||||
to:
|
||||
- '17'
|
||||
|
||||
rabbitmq-server:
|
||||
charm: ch:rabbitmq-server
|
||||
num_units: 1
|
||||
options:
|
||||
source: *openstack-origin
|
||||
to:
|
||||
- '9'
|
||||
channel: 3.9/edge
|
||||
|
||||
keystone:
|
||||
expose: True
|
||||
charm: ch:keystone
|
||||
num_units: 1
|
||||
options:
|
||||
openstack-origin: *openstack-origin
|
||||
to:
|
||||
- '10'
|
||||
channel: yoga/edge
|
||||
|
||||
nova-compute:
|
||||
charm: ch:nova-compute
|
||||
num_units: 1
|
||||
options:
|
||||
openstack-origin: *openstack-origin
|
||||
libvirt-image-backend: rbd
|
||||
to:
|
||||
- '11'
|
||||
channel: yoga/edge
|
||||
|
||||
glance:
|
||||
expose: True
|
||||
charm: ch:glance
|
||||
num_units: 1
|
||||
options:
|
||||
openstack-origin: *openstack-origin
|
||||
to:
|
||||
- '12'
|
||||
channel: yoga/edge
|
||||
|
||||
cinder:
|
||||
expose: True
|
||||
charm: ch:cinder
|
||||
num_units: 1
|
||||
options:
|
||||
block-device: 'None'
|
||||
glance-api-version: '2'
|
||||
openstack-origin: *openstack-origin
|
||||
to:
|
||||
- '13'
|
||||
channel: yoga/edge
|
||||
|
||||
cinder-ceph:
|
||||
charm: ch:cinder-ceph
|
||||
channel: yoga/edge
|
||||
|
||||
nova-cloud-controller:
|
||||
expose: True
|
||||
charm: ch:nova-cloud-controller
|
||||
num_units: 1
|
||||
options:
|
||||
openstack-origin: *openstack-origin
|
||||
to:
|
||||
- '14'
|
||||
channel: yoga/edge
|
||||
|
||||
placement:
|
||||
charm: ch:placement
|
||||
num_units: 1
|
||||
options:
|
||||
openstack-origin: *openstack-origin
|
||||
to:
|
||||
- '15'
|
||||
channel: yoga/edge
|
||||
|
||||
prometheus2:
|
||||
charm: ch:prometheus2
|
||||
num_units: 1
|
||||
series: focal
|
||||
to:
|
||||
- '16'
|
||||
|
||||
relations:
|
||||
- - 'nova-compute:amqp'
|
||||
- 'rabbitmq-server:amqp'
|
||||
|
||||
- - 'nova-compute:image-service'
|
||||
- 'glance:image-service'
|
||||
|
||||
- - 'nova-compute:ceph'
|
||||
- 'ceph-mon:client'
|
||||
|
||||
- - nova-compute:ceph-access
|
||||
- cinder-ceph:ceph-access
|
||||
|
||||
- - 'keystone:shared-db'
|
||||
- 'keystone-mysql-router:shared-db'
|
||||
- - 'keystone-mysql-router:db-router'
|
||||
- 'mysql-innodb-cluster:db-router'
|
||||
|
||||
- - 'glance:shared-db'
|
||||
- 'glance-mysql-router:shared-db'
|
||||
- - 'glance-mysql-router:db-router'
|
||||
- 'mysql-innodb-cluster:db-router'
|
||||
|
||||
- - 'glance:identity-service'
|
||||
- 'keystone:identity-service'
|
||||
|
||||
- - 'glance:amqp'
|
||||
- 'rabbitmq-server:amqp'
|
||||
|
||||
- - 'glance:ceph'
|
||||
- 'ceph-mon:client'
|
||||
|
||||
- - 'cinder:shared-db'
|
||||
- 'cinder-mysql-router:shared-db'
|
||||
- - 'cinder-mysql-router:db-router'
|
||||
- 'mysql-innodb-cluster:db-router'
|
||||
|
||||
- - 'cinder:identity-service'
|
||||
- 'keystone:identity-service'
|
||||
|
||||
- - 'cinder:amqp'
|
||||
- 'rabbitmq-server:amqp'
|
||||
|
||||
- - 'cinder:image-service'
|
||||
- 'glance:image-service'
|
||||
|
||||
- - 'cinder-ceph:storage-backend'
|
||||
- 'cinder:storage-backend'
|
||||
|
||||
- - 'cinder-ceph:ceph'
|
||||
- 'ceph-mon:client'
|
||||
|
||||
- - 'ceph-osd:mon'
|
||||
- 'ceph-mon:osd'
|
||||
|
||||
- - 'ceph-mon:mds'
|
||||
- 'ceph-fs:ceph-mds'
|
||||
|
||||
- - 'nova-cloud-controller:shared-db'
|
||||
- 'nova-cloud-controller-mysql-router:shared-db'
|
||||
- - 'nova-cloud-controller-mysql-router:db-router'
|
||||
- 'mysql-innodb-cluster:db-router'
|
||||
|
||||
- - 'nova-cloud-controller:identity-service'
|
||||
- 'keystone:identity-service'
|
||||
|
||||
- - 'nova-cloud-controller:amqp'
|
||||
- 'rabbitmq-server:amqp'
|
||||
|
||||
- - 'nova-cloud-controller:cloud-compute'
|
||||
- 'nova-compute:cloud-compute'
|
||||
|
||||
- - 'nova-cloud-controller:image-service'
|
||||
- 'glance:image-service'
|
||||
|
||||
- - 'placement:shared-db'
|
||||
- 'placement-mysql-router:shared-db'
|
||||
- - 'placement-mysql-router:db-router'
|
||||
- 'mysql-innodb-cluster:db-router'
|
||||
|
||||
- - 'placement'
|
||||
- 'keystone'
|
||||
|
||||
- - 'placement'
|
||||
- 'nova-cloud-controller'
|
||||
|
||||
- - 'ceph-mon:prometheus'
|
||||
- 'prometheus2:target'
|
|
@ -17,8 +17,7 @@ import charm
|
|||
import helpers
|
||||
|
||||
|
||||
@helpers.patch_network_get()
|
||||
class TestCephMetrics(unittest.TestCase):
|
||||
class CephMetricsTestBase(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
"""Run once before tests begin."""
|
||||
|
@ -33,11 +32,17 @@ class TestCephMetrics(unittest.TestCase):
|
|||
rules: []
|
||||
"""
|
||||
)
|
||||
rules_file = cls.rules_dir / "alert-rules.yaml"
|
||||
with rules_file.open("w") as f:
|
||||
f.write(cls.rules)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
cls.tempdir.cleanup()
|
||||
|
||||
|
||||
@helpers.patch_network_get()
|
||||
class TestCephMetrics(CephMetricsTestBase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.harness = Harness(charm.CephMonCharm)
|
||||
|
@ -134,3 +139,60 @@ class TestCephMetrics(unittest.TestCase):
|
|||
self.harness.charm.metrics_endpoint.update_alert_rules()
|
||||
alert_rules = self.get_alert_rules(rel_id)
|
||||
self.assertTrue(alert_rules.get("groups"))
|
||||
|
||||
|
||||
class TestCephCOSAgentProvider(CephMetricsTestBase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.harness = Harness(charm.CephMonCharm)
|
||||
self.addCleanup(self.harness.cleanup)
|
||||
self.harness.begin()
|
||||
self.harness.set_leader(True)
|
||||
self.harness.charm.cos_agent._metrics_rules = self.rules_dir
|
||||
|
||||
def test_init(self):
|
||||
self.assertEqual(
|
||||
self.harness.charm.cos_agent._relation_name,
|
||||
"cos-agent",
|
||||
)
|
||||
|
||||
@patch("ceph_metrics.mgr_config_set_rbd_stats_pools", lambda: None)
|
||||
@patch("ceph_metrics.ceph_utils.is_bootstrapped", return_value=True)
|
||||
@patch("ceph_metrics.ceph_utils.is_mgr_module_enabled", return_value=False)
|
||||
@patch("ceph_metrics.ceph_utils.mgr_enable_module")
|
||||
@patch("ceph_metrics.ceph_utils.mgr_disable_module")
|
||||
def test_add_remove_rel(
|
||||
self,
|
||||
mgr_disable_module,
|
||||
mgr_enable_module,
|
||||
_is_mgr_module_enable,
|
||||
_is_bootstrapped,
|
||||
):
|
||||
rel_id = self.harness.add_relation("cos-agent", "grafana-agent")
|
||||
self.harness.add_relation_unit(rel_id, "grafana-agent/0")
|
||||
|
||||
unit_rel_data = self.harness.get_relation_data(
|
||||
rel_id, self.harness.model.unit
|
||||
)
|
||||
data = json.loads(unit_rel_data["config"])
|
||||
self.assertTrue("metrics_scrape_jobs" in data)
|
||||
self.assertEqual(
|
||||
data["metrics_scrape_jobs"][0]["metrics_path"], "/metrics"
|
||||
)
|
||||
self.assertTrue("metrics_alert_rules" in data)
|
||||
self.assertTrue("groups" in data["metrics_alert_rules"])
|
||||
mgr_enable_module.assert_called_once()
|
||||
|
||||
self.harness.remove_relation(rel_id)
|
||||
mgr_disable_module.assert_called_once()
|
||||
|
||||
@patch("socket.getfqdn", return_value="node1.ceph.example.com")
|
||||
def test_custom_scrape_configs(self, _mock_getfqdn):
|
||||
configs = self.harness.charm.cos_agent._custom_scrape_configs()
|
||||
self.assertEqual(
|
||||
configs[0]["static_configs"][0]["targets"], ["localhost:9283"]
|
||||
)
|
||||
self.assertEqual(
|
||||
configs[0]["metric_relabel_configs"][0]["replacement"],
|
||||
"ceph_cluster",
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue