Add support for the cos_agent relation

This supports grafana dashboards and metrics scraping from the ceph
mgr prometheus module.

Have to build with charmcraft 2.6 for dependency handling

Also remove zed tests as it's EOL

Change-Id: I8b2f132a4997d205119f7afe2a1ab6b2ae4c0134
cherry-picked from e35d908

Change-Id: Idd479cef04a24ea64af643bd6e142ac40906e86c
func-test-pr: https://github.com/openstack-charmers/zaza-openstack-tests/pull/1208
This commit is contained in:
Peter Sabaini 2023-11-22 14:53:54 +01:00 committed by Luciano Lo Giudice
parent 967559b4df
commit 6cd7be9036
25 changed files with 15803 additions and 536 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,348 @@
{
"__inputs": [ ],
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "5.3.2"
},
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": "5.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"showIn": 0,
"tags": [ ],
"type": "dashboard"
}
]
},
"description": "",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [ ],
"panels": [
{
"collapse": false,
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"panels": [ ],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "MDS Performance",
"titleSize": "h6",
"type": "row"
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 1
},
"id": 3,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
{
"alias": "/.*Reads/",
"transform": "negative-Y"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(ceph_objecter_op_r{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Read Ops",
"refId": "A"
},
{
"expr": "sum(rate(ceph_objecter_op_w{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Write Ops",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "MDS Workload - $mds_servers",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "none",
"label": "Reads(-) / Writes (+)",
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 1
},
"id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "ceph_mds_server_handle_client_request{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ceph_daemon}}",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Client Request Load - $mds_servers",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "none",
"label": "Client Requests",
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
}
],
"refresh": "30s",
"rows": [ ],
"schemaVersion": 16,
"style": "dark",
"tags": [
"ceph-mixin"
],
"templating": {
"list": [
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 2,
"includeAll": true,
"label": "cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(ceph_osd_metadata, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": "job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(ceph_osd_metadata{}, job)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": "MDS Server",
"multi": false,
"name": "mds_servers",
"options": [ ],
"query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "MDS Performance",
"uid": "tbO9LAiZz",
"version": 0
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,880 @@
{
"__inputs": [ ],
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "5.3.2"
},
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": "5.0.0"
},
{
"id": "singlestat",
"name": "Singlestat",
"type": "panel",
"version": "5.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"showIn": 0,
"tags": [ ],
"type": "dashboard"
}
]
},
"description": "",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [ ],
"panels": [
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "${prometheusds}",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 5,
"w": 4,
"x": 0,
"y": 0
},
"id": 2,
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "count(sum by (instance) (ceph_osd_metadata{job=~\"$job\"}))",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": "",
"title": "OSD Hosts",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "${prometheusds}",
"description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster",
"format": "percentunit",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 5,
"w": 4,
"x": 4,
"y": 0
},
"id": 3,
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "avg(1 - (\n avg by(instance) (\n rate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval]) or\n rate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval])\n )\n))\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": "",
"title": "AVG CPU Busy",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "${prometheusds}",
"description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)",
"format": "percentunit",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 5,
"w": 4,
"x": 8,
"y": 0
},
"id": 4,
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "avg ((\n (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) - ((\n node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (\n node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n )\n )\n) / (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"}\n))\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": "",
"title": "AVG RAM Utilization",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "${prometheusds}",
"description": "IOPS Load at the device as reported by the OS on all OSD hosts",
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 5,
"w": 4,
"x": 12,
"y": 0
},
"id": 5,
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "sum ((\n rate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[$__rate_interval])\n) + (\n rate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[$__rate_interval])\n))\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": "",
"title": "Physical IOPS",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "${prometheusds}",
"description": "Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)",
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 5,
"w": 4,
"x": 16,
"y": 0
},
"id": 6,
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "avg (\n label_replace(\n (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or\n (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", instance=~\"($osd_hosts).*\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": "",
"title": "AVG Disk Utilization",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "${prometheusds}",
"description": "Total send/receive network load across all hosts in the ceph cluster",
"format": "bytes",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 5,
"w": 4,
"x": 20,
"y": 0
},
"id": 7,
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "sum (\n (\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n)\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": "",
"title": "Network Load",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "Show the top 10 busiest hosts by cpu",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 5
},
"id": 8,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "topk(10,\n 100 * (\n 1 - (\n avg by(instance) (\n rate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval]) or\n rate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval])\n )\n )\n )\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "CPU Busy - Top 10 Hosts",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "percent",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "Top 10 hosts by network load",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 5
},
"id": 9,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "topk(10, (sum by(instance) (\n(\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) +\n(\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\"))\n))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Network Load - Top 10 Hosts",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
}
],
"refresh": "30s",
"rows": [ ],
"schemaVersion": 16,
"style": "dark",
"tags": [
"ceph-mixin"
],
"templating": {
"list": [
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 2,
"includeAll": true,
"label": "cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(ceph_osd_metadata, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": "job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(ceph_osd_metadata{}, job)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": null,
"multi": false,
"name": "osd_hosts",
"options": [ ],
"query": "label_values(ceph_disk_occupation{job=~\"$job\"}, exported_instance)",
"refresh": 1,
"regex": "([^.]*).*",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": null,
"multi": false,
"name": "mon_hosts",
"options": [ ],
"query": "label_values(ceph_mon_metadata{job=~\"$job\"}, ceph_daemon)",
"refresh": 1,
"regex": "mon.(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": null,
"multi": false,
"name": "mds_hosts",
"options": [ ],
"query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)",
"refresh": 1,
"regex": "mds.(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": null,
"multi": false,
"name": "rgw_hosts",
"options": [ ],
"query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
"refresh": 1,
"regex": "rgw.(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "Ceph OSD Host Overview",
"uid": "y0KGL0iZz",
"version": 0
}

View File

@ -0,0 +1,857 @@
{
"__inputs": [ ],
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "5.3.2"
},
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": "5.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"showIn": 0,
"tags": [ ],
"type": "dashboard"
}
]
},
"description": "",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [ ],
"panels": [
{
"collapse": false,
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"panels": [ ],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "OSD Performance",
"titleSize": "h6",
"type": "row"
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 6,
"x": 0,
"y": 1
},
"id": 3,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
{
"alias": "read",
"transform": "negative-Y"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(ceph_osd_op_r_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "read",
"refId": "A"
},
{
"expr": "rate(ceph_osd_op_w_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "write",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "$osd Latency",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "s",
"label": "Read (-) / Write (+)",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 6,
"x": 6,
"y": 1
},
"id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
{
"alias": "Reads",
"transform": "negative-Y"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(ceph_osd_op_r{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Reads",
"refId": "A"
},
{
"expr": "rate(ceph_osd_op_w{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Writes",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "$osd R/W IOPS",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": "Read (-) / Write (+)",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 6,
"x": 12,
"y": 1
},
"id": 5,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
{
"alias": "Read Bytes",
"transform": "negative-Y"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(ceph_osd_op_r_out_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Read Bytes",
"refId": "A"
},
{
"expr": "rate(ceph_osd_op_w_in_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Write Bytes",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "$osd R/W Bytes",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "bytes",
"label": "Read (-) / Write (+)",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"collapse": false,
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 10
},
"id": 6,
"panels": [ ],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "Physical Device Performance",
"titleSize": "h6",
"type": "row"
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 6,
"x": 0,
"y": 11
},
"id": 7,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
{
"alias": "/.*Reads/",
"transform": "negative-Y"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "(\n label_replace(\n rate(node_disk_read_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}/{{device}} Reads",
"refId": "A"
},
{
"expr": "(\n label_replace(\n rate(node_disk_write_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}/{{device}} Writes",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Physical Device Latency for $osd",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "s",
"label": "Read (-) / Write (+)",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 6,
"x": 6,
"y": 11
},
"id": 8,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
{
"alias": "/.*Reads/",
"transform": "negative-Y"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "label_replace(\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}} on {{instance}} Writes",
"refId": "A"
},
{
"expr": "label_replace(\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}} on {{instance}} Reads",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Physical Device R/W IOPS for $osd",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": "Read (-) / Write (+)",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 6,
"x": 12,
"y": 11
},
"id": 9,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
{
"alias": "/.*Reads/",
"transform": "negative-Y"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "label_replace(\n rate(node_disk_read_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}} {{device}} Reads",
"refId": "A"
},
{
"expr": "label_replace(\n rate(node_disk_written_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}} {{device}} Writes",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Physical Device R/W Bytes for $osd",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "Bps",
"label": "Read (-) / Write (+)",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 6,
"x": 18,
"y": 11
},
"id": 10,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "label_replace(\n rate(node_disk_io_time_seconds_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}} on {{instance}}",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Physical Device Util% for $osd",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"refresh": "30s",
"rows": [ ],
"schemaVersion": 16,
"style": "dark",
"tags": [
"ceph-mixin"
],
"templating": {
"list": [
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 2,
"includeAll": true,
"label": "cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(ceph_osd_metadata, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": "job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(ceph_osd_metadata{}, job)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": false,
"label": "OSD",
"multi": false,
"name": "osd",
"options": [ ],
"query": "label_values(ceph_osd_metadata{job=~\"$job\"}, ceph_daemon)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-3h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "OSD device details",
"uid": "CrAHE0iZz",
"version": 0
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,694 @@
{
"__inputs": [ ],
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "5.3.2"
},
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": "5.0.0"
},
{
"id": "singlestat",
"name": "Singlestat",
"type": "panel",
"version": "5.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"showIn": 0,
"tags": [ ],
"type": "dashboard"
}
]
},
"description": "",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [ ],
"panels": [
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": true,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "${prometheusds}",
"format": "percentunit",
"gauge": {
"maxValue": 1,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 7,
"w": 7,
"x": 0,
"y": 0
},
"id": 2,
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": true
},
"tableColumn": "",
"targets": [
{
"expr": "(ceph_pool_stored{job=~\"$job\"} / (ceph_pool_stored{job=~\"$job\"} + ceph_pool_max_avail{job=~\"$job\"})) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": ".7,.8",
"title": "Capacity used",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": 100,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": "${prometheusds}",
"description": "Time till pool is full assuming the average fill rate of the last 6 hours",
"format": "s",
"gauge": {
"maxValue": false,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 7,
"w": 5,
"x": 7,
"y": 0
},
"id": 3,
"interval": null,
"links": [ ],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": ""
},
"tableColumn": "",
"targets": [
{
"expr": "(ceph_pool_max_avail{job=~\"$job\"} / deriv(ceph_pool_stored{job=~\"$job\"}[6h])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"} > 0\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"thresholds": "current",
"title": "Time till full",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": false
},
{
"aliasColors": {
"read_op_per_sec": "#3F6833",
"write_op_per_sec": "#E5AC0E"
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 0
},
"id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "deriv(ceph_pool_objects{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Objects per second",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "$pool_name Object Ingress/Egress",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "ops",
"label": "Objects out(-) / in(+) ",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {
"read_op_per_sec": "#3F6833",
"write_op_per_sec": "#E5AC0E"
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 7
},
"id": 5,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
{
"alias": "reads",
"transform": "negative-Y"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "reads",
"refId": "A"
},
{
"expr": "rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "writes",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "$pool_name Client IOPS",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "iops",
"label": "Read (-) / Write (+)",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {
"read_op_per_sec": "#3F6833",
"write_op_per_sec": "#E5AC0E"
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 7
},
"id": 6,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
{
"alias": "reads",
"transform": "negative-Y"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "reads",
"refId": "A"
},
{
"expr": "rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "writes",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "$pool_name Client Throughput",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "Bps",
"label": "Read (-) / Write (+)",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {
"read_op_per_sec": "#3F6833",
"write_op_per_sec": "#E5AC0E"
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 14
},
"id": 7,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "ceph_pool_objects{job=~\"$job\"} *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Number of Objects",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "$pool_name Objects",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": "Objects",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"refresh": "30s",
"rows": [ ],
"schemaVersion": 22,
"style": "dark",
"tags": [
"ceph-mixin"
],
"templating": {
"list": [
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 2,
"includeAll": true,
"label": "cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(ceph_osd_metadata, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": "job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(ceph_osd_metadata{}, job)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": false,
"label": "Pool Name",
"multi": false,
"name": "pool_name",
"options": [ ],
"query": "label_values(ceph_pool_metadata{job=~\"$job\"}, name)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "Ceph Pool Details",
"uid": "-xyV8KCiz",
"version": 0
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,522 @@
{
"__inputs": [ ],
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "5.0.0"
},
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": "5.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"showIn": 0,
"tags": [ ],
"type": "dashboard"
}
]
},
"description": "",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [ ],
"panels": [
{
"collapse": false,
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"panels": [ ],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "RGW Host Detail : $rgw_servers",
"titleSize": "h6",
"type": "row"
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 6,
"x": 0,
"y": 1
},
"id": 3,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (instance_id) (\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GET {{ceph_daemon}}",
"refId": "A"
},
{
"expr": "sum by (instance_id) (\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUT {{ceph_daemon}}",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "$rgw_servers GET/PUT Latencies",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 7,
"x": 6,
"y": 1
},
"id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GETs {{ceph_daemon}}",
"refId": "A"
},
{
"expr": "rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUTs {{ceph_daemon}}",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Bandwidth by HTTP Operation",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": {
"GETs": "#7eb26d",
"Other": "#447ebc",
"PUTs": "#eab839",
"Requests": "#3f2b5b",
"Requests Failed": "#bf1b00"
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 7,
"x": 13,
"y": 1
},
"id": 5,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\",ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Requests Failed {{ceph_daemon}}",
"refId": "A"
},
{
"expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GETs {{ceph_daemon}}",
"refId": "B"
},
{
"expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUTs {{ceph_daemon}}",
"refId": "C"
},
{
"expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Other {{ceph_daemon}}",
"refId": "D"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "HTTP Request Breakdown",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": {
"Failures": "#bf1b00",
"GETs": "#7eb26d",
"Other (HEAD,POST,DELETE)": "#447ebc",
"PUTs": "#eab839",
"Requests": "#3f2b5b"
},
"datasource": "${prometheusds}",
"description": "",
"gridPos": {
"h": 8,
"w": 4,
"x": 20,
"y": 1
},
"id": 6,
"legend": {
"percentage": true,
"show": true,
"values": true
},
"legendType": "Under graph",
"pieType": "pie",
"targets": [
{
"expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Failures {{ceph_daemon}}",
"refId": "A"
},
{
"expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GETs {{ceph_daemon}}",
"refId": "B"
},
{
"expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUTs {{ceph_daemon}}",
"refId": "C"
},
{
"expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}",
"refId": "D"
}
],
"title": "Workload Breakdown",
"type": "piechart",
"valueName": "current"
}
],
"refresh": "30s",
"rows": [ ],
"schemaVersion": 16,
"style": "dark",
"tags": [
"ceph-mixin",
"overview"
],
"templating": {
"list": [
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 2,
"includeAll": true,
"label": "cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(ceph_osd_metadata, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": "job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(ceph_osd_metadata{}, job)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": "",
"multi": false,
"name": "rgw_servers",
"options": [ ],
"query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
"refresh": 1,
"regex": "",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "RGW Instance Detail",
"uid": "x5ARzZtmk",
"version": 0
}

View File

@ -0,0 +1,695 @@
{
"__inputs": [ ],
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "5.0.0"
},
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": "5.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"showIn": 0,
"tags": [ ],
"type": "dashboard"
}
]
},
"description": "",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [ ],
"panels": [
{
"collapse": false,
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"panels": [ ],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": true,
"title": "RGW Overview - All Gateways",
"titleSize": "h6",
"type": "row"
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 1
},
"id": 3,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GET {{rgw_host}}",
"refId": "A"
},
{
"expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUT {{rgw_host}}",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Average GET/PUT Latencies by RGW Instance",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 7,
"x": 8,
"y": 1
},
"id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Total Requests/sec by RGW Instance",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "none",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 6,
"x": 15,
"y": 1
},
"id": 5,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "GET Latencies by RGW Instance",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "Total bytes transferred in/out of all radosgw instances within the cluster",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 6,
"w": 8,
"x": 0,
"y": 8
},
"id": 6,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GETs",
"refId": "A"
},
{
"expr": "sum(rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUTs",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Bandwidth Consumed by Type",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "Total bytes transferred in/out through get/put operations, by radosgw instance",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 6,
"w": 7,
"x": 8,
"y": 8
},
"id": 7,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Bandwidth by RGW Instance",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 6,
"w": 6,
"x": 15,
"y": 8
},
"id": 8,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "PUT Latencies by RGW Instance",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
}
],
"refresh": "30s",
"rows": [ ],
"schemaVersion": 16,
"style": "dark",
"tags": [
"ceph-mixin",
"overview"
],
"templating": {
"list": [
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 2,
"includeAll": true,
"label": "cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(ceph_osd_metadata, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": "job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(ceph_osd_metadata{}, job)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": "",
"multi": false,
"name": "rgw_servers",
"options": [ ],
"query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
"refresh": 1,
"regex": "RGW Server",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "RGW Overview",
"uid": "WAkugZpiz",
"version": 0
}

View File

@ -0,0 +1,490 @@
{
"__inputs": [ ],
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "5.0.0"
},
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": "5.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"showIn": 0,
"tags": [ ],
"type": "dashboard"
}
]
},
"description": "",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [ ],
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 0
},
"id": 2,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum{job=~\"$job\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Replication (throughput) from Source Zone",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 0
},
"id": 3,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count{job=~\"$job\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Replication (objects) from Source Zone",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": "Objects/s",
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 0
},
"id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum{job=~\"$job\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Polling Request Latency from Source Zone",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 7
},
"id": 5,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors{job=~\"$job\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"refId": "A"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Unsuccessful Object Replications from Source Zone",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": "Count/s",
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
}
],
"refresh": "30s",
"rows": [ ],
"schemaVersion": 16,
"style": "dark",
"tags": [
"ceph-mixin",
"overview"
],
"templating": {
"list": [
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 2,
"includeAll": true,
"label": "cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(ceph_osd_metadata, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": "job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(ceph_osd_metadata{}, job)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": "",
"multi": false,
"name": "rgw_servers",
"options": [ ],
"query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
"refresh": 1,
"regex": "RGW Server",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "RGW Sync Overview",
"uid": "rgw-sync-overview",
"version": 0
}

View File

@ -0,0 +1,444 @@
{
"__inputs": [ ],
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "5.3.3"
},
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": "5.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"showIn": 0,
"tags": [ ],
"type": "dashboard"
}
]
},
"description": "Detailed Performance of RBD Images (IOPS/Throughput/Latency)",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [ ],
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 8,
"x": 0,
"y": 0
},
"id": 2,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(ceph_rbd_write_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Write",
"refId": "A"
},
{
"expr": "rate(ceph_rbd_read_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Read",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "IOPS",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "iops",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "iops",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 8,
"x": 8,
"y": 0
},
"id": 3,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(ceph_rbd_write_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Write",
"refId": "A"
},
{
"expr": "rate(ceph_rbd_read_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Read",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Throughput",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 8,
"x": 16,
"y": 0
},
"id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(ceph_rbd_write_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_write_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Write",
"refId": "A"
},
{
"expr": "rate(ceph_rbd_read_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_read_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Read",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Average Latency",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "ns",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "ns",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
}
],
"refresh": "30s",
"rows": [ ],
"schemaVersion": 16,
"style": "dark",
"tags": [
"ceph-mixin"
],
"templating": {
"list": [
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 2,
"includeAll": true,
"label": "cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(ceph_osd_metadata, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": "job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(ceph_osd_metadata{}, job)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": false,
"label": "",
"multi": false,
"name": "pool",
"options": [ ],
"query": "label_values(pool)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": false,
"label": "",
"multi": false,
"name": "image",
"options": [ ],
"query": "label_values(image)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "RBD Details",
"uid": "YhCYGcuZz",
"version": 0
}

View File

@ -0,0 +1,723 @@
{
"__inputs": [ ],
"__requires": [
{
"id": "grafana",
"name": "Grafana",
"type": "grafana",
"version": "5.4.2"
},
{
"id": "graph",
"name": "Graph",
"type": "panel",
"version": "5.0.0"
},
{
"id": "prometheus",
"name": "Prometheus",
"type": "datasource",
"version": "5.0.0"
},
{
"id": "table",
"name": "Table",
"type": "panel",
"version": "5.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"showIn": 0,
"tags": [ ],
"type": "dashboard"
}
]
},
"description": "",
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [ ],
"panels": [
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 0
},
"id": 2,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "round(sum(rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Writes",
"refId": "A"
},
{
"expr": "round(sum(rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Reads",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "IOPS",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 0
},
"id": 3,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "round(sum(rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Write",
"refId": "A"
},
{
"expr": "round(sum(rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Read",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Throughput",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"aliasColors": { },
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${prometheusds}",
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 0
},
"id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"sideWidth": null,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [ ],
"nullPointMode": "null as zero",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [ ],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "round(\n sum(rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Write",
"refId": "A"
},
{
"expr": "round(\n sum(rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Read",
"refId": "B"
}
],
"thresholds": [ ],
"timeFrom": null,
"timeShift": null,
"title": "Average Latency",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [ ]
},
"yaxes": [
{
"format": "ns",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
},
{
"columns": [ ],
"datasource": "${prometheusds}",
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 7
},
"id": 5,
"links": [ ],
"sort": {
"col": 3,
"desc": true
},
"styles": [
{
"alias": "Pool",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "pool",
"thresholds": [ ],
"type": "string",
"unit": "short",
"valueMaps": [ ]
},
{
"alias": "Image",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "image",
"thresholds": [ ],
"type": "string",
"unit": "short",
"valueMaps": [ ]
},
{
"alias": "IOPS",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "Value",
"thresholds": [ ],
"type": "number",
"unit": "iops",
"valueMaps": [ ]
},
{
"alias": "",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "/.*/",
"thresholds": [ ],
"type": "hidden",
"unit": "short",
"valueMaps": [ ]
}
],
"targets": [
{
"expr": "topk(10,\n (\n sort((\n rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval]) +\n on (image, pool, namespace) rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])\n ))\n )\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Highest IOPS",
"transform": "table",
"type": "table"
},
{
"columns": [ ],
"datasource": "${prometheusds}",
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 7
},
"id": 6,
"links": [ ],
"sort": {
"col": 3,
"desc": true
},
"styles": [
{
"alias": "Pool",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "pool",
"thresholds": [ ],
"type": "string",
"unit": "short",
"valueMaps": [ ]
},
{
"alias": "Image",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "image",
"thresholds": [ ],
"type": "string",
"unit": "short",
"valueMaps": [ ]
},
{
"alias": "Throughput",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "Value",
"thresholds": [ ],
"type": "number",
"unit": "Bps",
"valueMaps": [ ]
},
{
"alias": "",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "/.*/",
"thresholds": [ ],
"type": "hidden",
"unit": "short",
"valueMaps": [ ]
}
],
"targets": [
{
"expr": "topk(10,\n sort(\n sum(\n rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])\n ) by (pool, image, namespace)\n )\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Highest Throughput",
"transform": "table",
"type": "table"
},
{
"columns": [ ],
"datasource": "${prometheusds}",
"description": "RBD per-image IO statistics are disabled by default.\n\nPlease refer to https://docs.ceph.com/en/latest/mgr/prometheus/#rbd-io-statistics for information about how to enable those optionally.",
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 7
},
"id": 7,
"links": [ ],
"sort": {
"col": 3,
"desc": true
},
"styles": [
{
"alias": "Pool",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "pool",
"thresholds": [ ],
"type": "string",
"unit": "short",
"valueMaps": [ ]
},
{
"alias": "Image",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "image",
"thresholds": [ ],
"type": "string",
"unit": "short",
"valueMaps": [ ]
},
{
"alias": "Latency",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "Value",
"thresholds": [ ],
"type": "number",
"unit": "ns",
"valueMaps": [ ]
},
{
"alias": "",
"colorMode": null,
"colors": [
"rgba(245, 54, 54, 0.9)",
"rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
],
"dateFormat": "YYYY-MM-DD HH:mm:ss",
"decimals": 2,
"mappingType": 1,
"pattern": "/.*/",
"thresholds": [ ],
"type": "hidden",
"unit": "short",
"valueMaps": [ ]
}
],
"targets": [
{
"expr": "topk(10,\n sum(\n rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]), 1) +\n rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]), 1)\n ) by (pool, image, namespace)\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Highest Latency",
"transform": "table",
"type": "table"
}
],
"refresh": "30s",
"rows": [ ],
"schemaVersion": 16,
"style": "dark",
"tags": [
"ceph-mixin",
"overview"
],
"templating": {
"list": [
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 2,
"includeAll": true,
"label": "cluster",
"multi": true,
"name": "cluster",
"options": [ ],
"query": "label_values(ceph_osd_metadata, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".+",
"current": { },
"datasource": "${prometheusds}",
"hide": 0,
"includeAll": true,
"label": "job",
"multi": true,
"name": "job",
"options": [ ],
"query": "label_values(ceph_osd_metadata{}, job)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "RBD Overview",
"uid": "41FrpeUiz",
"version": 0
}

View File

@ -0,0 +1,842 @@
# Copyright 2023 Canonical Ltd.
# See LICENSE file for licensing details.
r"""## Overview.
This library can be used to manage the cos_agent relation interface:
- `COSAgentProvider`: Use in machine charms that need to have a workload's metrics
or logs scraped, or forward rule files or dashboards to Prometheus, Loki or Grafana through
the Grafana Agent machine charm.
- `COSAgentConsumer`: Used in the Grafana Agent machine charm to manage the requirer side of
the `cos_agent` interface.
## COSAgentProvider Library Usage
Grafana Agent machine Charmed Operator interacts with its clients using the cos_agent library.
Charms seeking to send telemetry, must do so using the `COSAgentProvider` object from
this charm library.
Using the `COSAgentProvider` object only requires instantiating it,
typically in the `__init__` method of your charm (the one which sends telemetry).
The constructor of `COSAgentProvider` has only one required and nine optional parameters:
```python
def __init__(
self,
charm: CharmType,
relation_name: str = DEFAULT_RELATION_NAME,
metrics_endpoints: Optional[List[_MetricsEndpointDict]] = None,
metrics_rules_dir: str = "./src/prometheus_alert_rules",
logs_rules_dir: str = "./src/loki_alert_rules",
recurse_rules_dirs: bool = False,
log_slots: Optional[List[str]] = None,
dashboard_dirs: Optional[List[str]] = None,
refresh_events: Optional[List] = None,
scrape_configs: Optional[Union[List[Dict], Callable]] = None,
):
```
### Parameters
- `charm`: The instance of the charm that instantiates `COSAgentProvider`, typically `self`.
- `relation_name`: If your charmed operator uses a relation name other than `cos-agent` to use
the `cos_agent` interface, this is where you have to specify that.
- `metrics_endpoints`: In this parameter you can specify the metrics endpoints that Grafana Agent
machine Charmed Operator will scrape. The configs of this list will be merged with the configs
from `scrape_configs`.
- `metrics_rules_dir`: The directory in which the Charmed Operator stores its metrics alert rules
files.
- `logs_rules_dir`: The directory in which the Charmed Operator stores its logs alert rules files.
- `recurse_rules_dirs`: This parameters set whether Grafana Agent machine Charmed Operator has to
search alert rules files recursively in the previous two directories or not.
- `log_slots`: Snap slots to connect to for scraping logs in the form ["snap-name:slot", ...].
- `dashboard_dirs`: List of directories where the dashboards are stored in the Charmed Operator.
- `refresh_events`: List of events on which to refresh relation data.
- `scrape_configs`: List of standard scrape_configs dicts or a callable that returns the list in
case the configs need to be generated dynamically. The contents of this list will be merged
with the configs from `metrics_endpoints`.
### Example 1 - Minimal instrumentation:
In order to use this object the following should be in the `charm.py` file.
```python
from charms.grafana_agent.v0.cos_agent import COSAgentProvider
...
class TelemetryProviderCharm(CharmBase):
def __init__(self, *args):
...
self._grafana_agent = COSAgentProvider(self)
```
### Example 2 - Full instrumentation:
In order to use this object the following should be in the `charm.py` file.
```python
from charms.grafana_agent.v0.cos_agent import COSAgentProvider
...
class TelemetryProviderCharm(CharmBase):
def __init__(self, *args):
...
self._grafana_agent = COSAgentProvider(
self,
relation_name="custom-cos-agent",
metrics_endpoints=[
# specify "path" and "port" to scrape from localhost
{"path": "/metrics", "port": 9000},
{"path": "/metrics", "port": 9001},
{"path": "/metrics", "port": 9002},
],
metrics_rules_dir="./src/alert_rules/prometheus",
logs_rules_dir="./src/alert_rules/loki",
recursive_rules_dir=True,
log_slots=["my-app:slot"],
dashboard_dirs=["./src/dashboards_1", "./src/dashboards_2"],
refresh_events=["update-status", "upgrade-charm"],
scrape_configs=[
{
"job_name": "custom_job",
"metrics_path": "/metrics",
"authorization": {"credentials": "bearer-token"},
"static_configs": [
{
"targets": ["localhost:9003"]},
"labels": {"key": "value"},
},
],
},
]
)
```
### Example 3 - Dynamic scrape configs generation:
Pass a function to the `scrape_configs` to decouple the generation of the configs
from the instantiation of the COSAgentProvider object.
```python
from charms.grafana_agent.v0.cos_agent import COSAgentProvider
...
class TelemetryProviderCharm(CharmBase):
def generate_scrape_configs(self):
return [
{
"job_name": "custom",
"metrics_path": "/metrics",
"static_configs": [{"targets": ["localhost:9000"]}],
},
]
def __init__(self, *args):
...
self._grafana_agent = COSAgentProvider(
self,
scrape_configs=self.generate_scrape_configs,
)
```
## COSAgentConsumer Library Usage
This object may be used by any Charmed Operator which gathers telemetry data by
implementing the consumer side of the `cos_agent` interface.
For instance Grafana Agent machine Charmed Operator.
For this purpose the charm needs to instantiate the `COSAgentConsumer` object with one mandatory
and two optional arguments.
### Parameters
- `charm`: A reference to the parent (Grafana Agent machine) charm.
- `relation_name`: The name of the relation that the charm uses to interact
with its clients that provides telemetry data using the `COSAgentProvider` object.
If provided, this relation name must match a provided relation in metadata.yaml with the
`cos_agent` interface.
The default value of this argument is "cos-agent".
- `refresh_events`: List of events on which to refresh relation data.
### Example 1 - Minimal instrumentation:
In order to use this object the following should be in the `charm.py` file.
```python
from charms.grafana_agent.v0.cos_agent import COSAgentConsumer
...
class GrafanaAgentMachineCharm(GrafanaAgentCharm)
def __init__(self, *args):
...
self._cos = COSAgentRequirer(self)
```
### Example 2 - Full instrumentation:
In order to use this object the following should be in the `charm.py` file.
```python
from charms.grafana_agent.v0.cos_agent import COSAgentConsumer
...
class GrafanaAgentMachineCharm(GrafanaAgentCharm)
def __init__(self, *args):
...
self._cos = COSAgentRequirer(
self,
relation_name="cos-agent-consumer",
refresh_events=["update-status", "upgrade-charm"],
)
```
"""
import base64
import json
import logging
import lzma
from collections import namedtuple
from itertools import chain
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Dict, List, Optional, Set, Union
import pydantic
from cosl import JujuTopology
from cosl.rules import AlertRules
from ops.charm import RelationChangedEvent
from ops.framework import EventBase, EventSource, Object, ObjectEvents
from ops.model import Relation, Unit
from ops.testing import CharmType
if TYPE_CHECKING:
try:
from typing import TypedDict
class _MetricsEndpointDict(TypedDict):
path: str
port: int
except ModuleNotFoundError:
_MetricsEndpointDict = Dict # pyright: ignore
LIBID = "dc15fa84cef84ce58155fb84f6c6213a"
LIBAPI = 0
LIBPATCH = 6
PYDEPS = ["cosl", "pydantic < 2"]
DEFAULT_RELATION_NAME = "cos-agent"
DEFAULT_PEER_RELATION_NAME = "peers"
DEFAULT_SCRAPE_CONFIG = {
"static_configs": [{"targets": ["localhost:80"]}],
"metrics_path": "/metrics",
}
logger = logging.getLogger(__name__)
SnapEndpoint = namedtuple("SnapEndpoint", "owner, name")
class GrafanaDashboard(str):
"""Grafana Dashboard encoded json; lzma-compressed."""
# TODO Replace this with a custom type when pydantic v2 released (end of 2023 Q1?)
# https://github.com/pydantic/pydantic/issues/4887
@staticmethod
def _serialize(raw_json: Union[str, bytes]) -> "GrafanaDashboard":
if not isinstance(raw_json, bytes):
raw_json = raw_json.encode("utf-8")
encoded = base64.b64encode(lzma.compress(raw_json)).decode("utf-8")
return GrafanaDashboard(encoded)
def _deserialize(self) -> Dict:
try:
raw = lzma.decompress(base64.b64decode(self.encode("utf-8"))).decode()
return json.loads(raw)
except json.decoder.JSONDecodeError as e:
logger.error("Invalid Dashboard format: %s", e)
return {}
def __repr__(self):
"""Return string representation of self."""
return "<GrafanaDashboard>"
class CosAgentProviderUnitData(pydantic.BaseModel):
"""Unit databag model for `cos-agent` relation."""
# The following entries are the same for all units of the same principal.
# Note that the same grafana agent subordinate may be related to several apps.
# this needs to make its way to the gagent leader
metrics_alert_rules: dict
log_alert_rules: dict
dashboards: List[GrafanaDashboard]
subordinate: Optional[bool]
# The following entries may vary across units of the same principal app.
# this data does not need to be forwarded to the gagent leader
metrics_scrape_jobs: List[Dict]
log_slots: List[str]
# when this whole datastructure is dumped into a databag, it will be nested under this key.
# while not strictly necessary (we could have it 'flattened out' into the databag),
# this simplifies working with the model.
KEY: ClassVar[str] = "config"
class CosAgentPeersUnitData(pydantic.BaseModel):
"""Unit databag model for `peers` cos-agent machine charm peer relation."""
# We need the principal unit name and relation metadata to be able to render identifiers
# (e.g. topology) on the leader side, after all the data moves into peer data (the grafana
# agent leader can only see its own principal, because it is a subordinate charm).
principal_unit_name: str
principal_relation_id: str
principal_relation_name: str
# The only data that is forwarded to the leader is data that needs to go into the app databags
# of the outgoing o11y relations.
metrics_alert_rules: Optional[dict]
log_alert_rules: Optional[dict]
dashboards: Optional[List[GrafanaDashboard]]
# when this whole datastructure is dumped into a databag, it will be nested under this key.
# while not strictly necessary (we could have it 'flattened out' into the databag),
# this simplifies working with the model.
KEY: ClassVar[str] = "config"
@property
def app_name(self) -> str:
"""Parse out the app name from the unit name.
TODO: Switch to using `model_post_init` when pydantic v2 is released?
https://github.com/pydantic/pydantic/issues/1729#issuecomment-1300576214
"""
return self.principal_unit_name.split("/")[0]
class COSAgentProvider(Object):
"""Integration endpoint wrapper for the provider side of the cos_agent interface."""
def __init__(
self,
charm: CharmType,
relation_name: str = DEFAULT_RELATION_NAME,
metrics_endpoints: Optional[List["_MetricsEndpointDict"]] = None,
metrics_rules_dir: str = "./src/prometheus_alert_rules",
logs_rules_dir: str = "./src/loki_alert_rules",
recurse_rules_dirs: bool = False,
log_slots: Optional[List[str]] = None,
dashboard_dirs: Optional[List[str]] = None,
refresh_events: Optional[List] = None,
*,
scrape_configs: Optional[Union[List[dict], Callable]] = None,
):
"""Create a COSAgentProvider instance.
Args:
charm: The `CharmBase` instance that is instantiating this object.
relation_name: The name of the relation to communicate over.
metrics_endpoints: List of endpoints in the form [{"path": path, "port": port}, ...].
This argument is a simplified form of the `scrape_configs`.
The contents of this list will be merged with the contents of `scrape_configs`.
metrics_rules_dir: Directory where the metrics rules are stored.
logs_rules_dir: Directory where the logs rules are stored.
recurse_rules_dirs: Whether to recurse into rule paths.
log_slots: Snap slots to connect to for scraping logs
in the form ["snap-name:slot", ...].
dashboard_dirs: Directory where the dashboards are stored.
refresh_events: List of events on which to refresh relation data.
scrape_configs: List of standard scrape_configs dicts or a callable
that returns the list in case the configs need to be generated dynamically.
The contents of this list will be merged with the contents of `metrics_endpoints`.
"""
super().__init__(charm, relation_name)
dashboard_dirs = dashboard_dirs or ["./src/grafana_dashboards"]
self._charm = charm
self._relation_name = relation_name
self._metrics_endpoints = metrics_endpoints or []
self._scrape_configs = scrape_configs or []
self._metrics_rules = metrics_rules_dir
self._logs_rules = logs_rules_dir
self._recursive = recurse_rules_dirs
self._log_slots = log_slots or []
self._dashboard_dirs = dashboard_dirs
self._refresh_events = refresh_events or [self._charm.on.config_changed]
events = self._charm.on[relation_name]
self.framework.observe(events.relation_joined, self._on_refresh)
self.framework.observe(events.relation_changed, self._on_refresh)
for event in self._refresh_events:
self.framework.observe(event, self._on_refresh)
def _on_refresh(self, event):
"""Trigger the class to update relation data."""
relations = self._charm.model.relations[self._relation_name]
for relation in relations:
# Before a principal is related to the grafana-agent subordinate, we'd get
# ModelError: ERROR cannot read relation settings: unit "zk/2": settings not found
# Add a guard to make sure it doesn't happen.
if relation.data and self._charm.unit in relation.data:
# Subordinate relations can communicate only over unit data.
try:
data = CosAgentProviderUnitData(
metrics_alert_rules=self._metrics_alert_rules,
log_alert_rules=self._log_alert_rules,
dashboards=self._dashboards,
metrics_scrape_jobs=self._scrape_jobs,
log_slots=self._log_slots,
subordinate=self._charm.meta.subordinate,
)
relation.data[self._charm.unit][data.KEY] = data.json()
except (
pydantic.ValidationError,
json.decoder.JSONDecodeError,
) as e:
logger.error("Invalid relation data provided: %s", e)
@property
def _scrape_jobs(self) -> List[Dict]:
"""Return a prometheus_scrape-like data structure for jobs.
https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config
"""
if callable(self._scrape_configs):
scrape_configs = self._scrape_configs()
else:
# Create a copy of the user scrape_configs, since we will mutate this object
scrape_configs = self._scrape_configs.copy()
# Convert "metrics_endpoints" to standard scrape_configs, and add them in
for endpoint in self._metrics_endpoints:
scrape_configs.append(
{
"metrics_path": endpoint["path"],
"static_configs": [{"targets": [f"localhost:{endpoint['port']}"]}],
}
)
scrape_configs = scrape_configs or [DEFAULT_SCRAPE_CONFIG]
# Augment job name to include the app name and a unique id (index)
for idx, scrape_config in enumerate(scrape_configs):
scrape_config["job_name"] = "_".join(
[self._charm.app.name, str(idx), scrape_config.get("job_name", "default")]
)
return scrape_configs
@property
def _metrics_alert_rules(self) -> Dict:
"""Use (for now) the prometheus_scrape AlertRules to initialize this."""
alert_rules = AlertRules(
query_type="promql", topology=JujuTopology.from_charm(self._charm)
)
alert_rules.add_path(self._metrics_rules, recursive=self._recursive)
return alert_rules.as_dict()
@property
def _log_alert_rules(self) -> Dict:
"""Use (for now) the loki_push_api AlertRules to initialize this."""
alert_rules = AlertRules(query_type="logql", topology=JujuTopology.from_charm(self._charm))
alert_rules.add_path(self._logs_rules, recursive=self._recursive)
return alert_rules.as_dict()
@property
def _dashboards(self) -> List[GrafanaDashboard]:
dashboards: List[GrafanaDashboard] = []
for d in self._dashboard_dirs:
for path in Path(d).glob("*"):
dashboard = GrafanaDashboard._serialize(path.read_bytes())
dashboards.append(dashboard)
return dashboards
class COSAgentDataChanged(EventBase):
"""Event emitted by `COSAgentRequirer` when relation data changes."""
class COSAgentValidationError(EventBase):
"""Event emitted by `COSAgentRequirer` when there is an error in the relation data."""
def __init__(self, handle, message: str = ""):
super().__init__(handle)
self.message = message
def snapshot(self) -> Dict:
"""Save COSAgentValidationError source information."""
return {"message": self.message}
def restore(self, snapshot):
"""Restore COSAgentValidationError source information."""
self.message = snapshot["message"]
class COSAgentRequirerEvents(ObjectEvents):
"""`COSAgentRequirer` events."""
data_changed = EventSource(COSAgentDataChanged)
validation_error = EventSource(COSAgentValidationError)
class MultiplePrincipalsError(Exception):
"""Custom exception for when there are multiple principal applications."""
pass
class COSAgentRequirer(Object):
"""Integration endpoint wrapper for the Requirer side of the cos_agent interface."""
on = COSAgentRequirerEvents() # pyright: ignore
def __init__(
self,
charm: CharmType,
*,
relation_name: str = DEFAULT_RELATION_NAME,
peer_relation_name: str = DEFAULT_PEER_RELATION_NAME,
refresh_events: Optional[List[str]] = None,
):
"""Create a COSAgentRequirer instance.
Args:
charm: The `CharmBase` instance that is instantiating this object.
relation_name: The name of the relation to communicate over.
peer_relation_name: The name of the peer relation to communicate over.
refresh_events: List of events on which to refresh relation data.
"""
super().__init__(charm, relation_name)
self._charm = charm
self._relation_name = relation_name
self._peer_relation_name = peer_relation_name
self._refresh_events = refresh_events or [self._charm.on.config_changed]
events = self._charm.on[relation_name]
self.framework.observe(
events.relation_joined, self._on_relation_data_changed
) # TODO: do we need this?
self.framework.observe(events.relation_changed, self._on_relation_data_changed)
for event in self._refresh_events:
self.framework.observe(event, self.trigger_refresh) # pyright: ignore
# Peer relation events
# A peer relation is needed as it is the only mechanism for exchanging data across
# subordinate units.
# self.framework.observe(
# self.on[self._peer_relation_name].relation_joined, self._on_peer_relation_joined
# )
peer_events = self._charm.on[peer_relation_name]
self.framework.observe(peer_events.relation_changed, self._on_peer_relation_changed)
@property
def peer_relation(self) -> Optional["Relation"]:
"""Helper function for obtaining the peer relation object.
Returns: peer relation object
(NOTE: would return None if called too early, e.g. during install).
"""
return self.model.get_relation(self._peer_relation_name)
def _on_peer_relation_changed(self, _):
# Peer data is used for forwarding data from principal units to the grafana agent
# subordinate leader, for updating the app data of the outgoing o11y relations.
if self._charm.unit.is_leader():
self.on.data_changed.emit() # pyright: ignore
def _on_relation_data_changed(self, event: RelationChangedEvent):
# Peer data is the only means of communication between subordinate units.
if not self.peer_relation:
event.defer()
return
cos_agent_relation = event.relation
if not event.unit or not cos_agent_relation.data.get(event.unit):
return
principal_unit = event.unit
# Coherence check
units = cos_agent_relation.units
if len(units) > 1:
# should never happen
raise ValueError(
f"unexpected error: subordinate relation {cos_agent_relation} "
f"should have exactly one unit"
)
if not (raw := cos_agent_relation.data[principal_unit].get(CosAgentProviderUnitData.KEY)):
return
if not (provider_data := self._validated_provider_data(raw)):
return
# Copy data from the principal relation to the peer relation, so the leader could
# follow up.
# Save the originating unit name, so it could be used for topology later on by the leader.
data = CosAgentPeersUnitData( # peer relation databag model
principal_unit_name=event.unit.name,
principal_relation_id=str(event.relation.id),
principal_relation_name=event.relation.name,
metrics_alert_rules=provider_data.metrics_alert_rules,
log_alert_rules=provider_data.log_alert_rules,
dashboards=provider_data.dashboards,
)
self.peer_relation.data[self._charm.unit][
f"{CosAgentPeersUnitData.KEY}-{event.unit.name}"
] = data.json()
# We can't easily tell if the data that was changed is limited to only the data
# that goes into peer relation (in which case, if this is not a leader unit, we wouldn't
# need to emit `on.data_changed`), so we're emitting `on.data_changed` either way.
self.on.data_changed.emit() # pyright: ignore
def _validated_provider_data(self, raw) -> Optional[CosAgentProviderUnitData]:
try:
return CosAgentProviderUnitData(**json.loads(raw))
except (pydantic.ValidationError, json.decoder.JSONDecodeError) as e:
self.on.validation_error.emit(message=str(e)) # pyright: ignore
return None
def trigger_refresh(self, _):
"""Trigger a refresh of relation data."""
# FIXME: Figure out what we should do here
self.on.data_changed.emit() # pyright: ignore
@property
def _principal_unit(self) -> Optional[Unit]:
"""Return the principal unit for a relation.
Assumes that the relation is of type subordinate.
Relies on the fact that, for subordinate relations, the only remote unit visible to
*this unit* is the principal unit that this unit is attached to.
"""
if relations := self._principal_relations:
# Technically it's a list, but for subordinates there can only be one relation
principal_relation = next(iter(relations))
if units := principal_relation.units:
# Technically it's a list, but for subordinates there can only be one
return next(iter(units))
return None
@property
def _principal_relations(self):
relations = []
for relation in self._charm.model.relations[self._relation_name]:
if not json.loads(relation.data[next(iter(relation.units))]["config"]).get(
["subordinate"], False
):
relations.append(relation)
if len(relations) > 1:
logger.error(
"Multiple applications claiming to be principal. Update the cos-agent library in the client application charms."
)
raise MultiplePrincipalsError("Multiple principal applications.")
return relations
@property
def _remote_data(self) -> List[CosAgentProviderUnitData]:
"""Return a list of remote data from each of the related units.
Assumes that the relation is of type subordinate.
Relies on the fact that, for subordinate relations, the only remote unit visible to
*this unit* is the principal unit that this unit is attached to.
"""
all_data = []
for relation in self._charm.model.relations[self._relation_name]:
if not relation.units:
continue
unit = next(iter(relation.units))
if not (raw := relation.data[unit].get(CosAgentProviderUnitData.KEY)):
continue
if not (provider_data := self._validated_provider_data(raw)):
continue
all_data.append(provider_data)
return all_data
def _gather_peer_data(self) -> List[CosAgentPeersUnitData]:
"""Collect data from the peers.
Returns a trimmed-down list of CosAgentPeersUnitData.
"""
relation = self.peer_relation
# Ensure that whatever context we're running this in, we take the necessary precautions:
if not relation or not relation.data or not relation.app:
return []
# Iterate over all peer unit data and only collect every principal once.
peer_data: List[CosAgentPeersUnitData] = []
app_names: Set[str] = set()
for unit in chain((self._charm.unit,), relation.units):
if not relation.data.get(unit):
continue
for unit_name in relation.data.get(unit): # pyright: ignore
if not unit_name.startswith(CosAgentPeersUnitData.KEY):
continue
raw = relation.data[unit].get(unit_name)
if raw is None:
continue
data = CosAgentPeersUnitData(**json.loads(raw))
# Have we already seen this principal app?
if (app_name := data.app_name) in app_names:
continue
peer_data.append(data)
app_names.add(app_name)
return peer_data
@property
def metrics_alerts(self) -> Dict[str, Any]:
"""Fetch metrics alerts."""
alert_rules = {}
seen_apps: List[str] = []
for data in self._gather_peer_data():
if rules := data.metrics_alert_rules:
app_name = data.app_name
if app_name in seen_apps:
continue # dedup!
seen_apps.append(app_name)
# This is only used for naming the file, so be as specific as we can be
identifier = JujuTopology(
model=self._charm.model.name,
model_uuid=self._charm.model.uuid,
application=app_name,
# For the topology unit, we could use `data.principal_unit_name`, but that unit
# name may not be very stable: `_gather_peer_data` de-duplicates by app name so
# the exact unit name that turns up first in the iterator may vary from time to
# time. So using the grafana-agent unit name instead.
unit=self._charm.unit.name,
).identifier
alert_rules[identifier] = rules
return alert_rules
@property
def metrics_jobs(self) -> List[Dict]:
"""Parse the relation data contents and extract the metrics jobs."""
scrape_jobs = []
for data in self._remote_data:
for job in data.metrics_scrape_jobs:
# In #220, relation schema changed from a simplified dict to the standard
# `scrape_configs`.
# This is to ensure backwards compatibility with Providers older than v0.5.
if "path" in job and "port" in job and "job_name" in job:
job = {
"job_name": job["job_name"],
"metrics_path": job["path"],
"static_configs": [{"targets": [f"localhost:{job['port']}"]}],
}
scrape_jobs.append(job)
return scrape_jobs
@property
def snap_log_endpoints(self) -> List[SnapEndpoint]:
"""Fetch logging endpoints exposed by related snaps."""
plugs = []
for data in self._remote_data:
targets = data.log_slots
if targets:
for target in targets:
if target in plugs:
logger.warning(
f"plug {target} already listed. "
"The same snap is being passed from multiple "
"endpoints; this should not happen."
)
else:
plugs.append(target)
endpoints = []
for plug in plugs:
if ":" not in plug:
logger.error(f"invalid plug definition received: {plug}. Ignoring...")
else:
endpoint = SnapEndpoint(*plug.split(":"))
endpoints.append(endpoint)
return endpoints
@property
def logs_alerts(self) -> Dict[str, Any]:
"""Fetch log alerts."""
alert_rules = {}
seen_apps: List[str] = []
for data in self._gather_peer_data():
if rules := data.log_alert_rules:
# This is only used for naming the file, so be as specific as we can be
app_name = data.app_name
if app_name in seen_apps:
continue # dedup!
seen_apps.append(app_name)
identifier = JujuTopology(
model=self._charm.model.name,
model_uuid=self._charm.model.uuid,
application=app_name,
# For the topology unit, we could use `data.principal_unit_name`, but that unit
# name may not be very stable: `_gather_peer_data` de-duplicates by app name so
# the exact unit name that turns up first in the iterator may vary from time to
# time. So using the grafana-agent unit name instead.
unit=self._charm.unit.name,
).identifier
alert_rules[identifier] = rules
return alert_rules
@property
def dashboards(self) -> List[Dict[str, str]]:
"""Fetch dashboards as encoded content.
Dashboards are assumed not to vary across units of the same primary.
"""
dashboards: List[Dict[str, Any]] = []
seen_apps: List[str] = []
for data in self._gather_peer_data():
app_name = data.app_name
if app_name in seen_apps:
continue # dedup!
seen_apps.append(app_name)
for encoded_dashboard in data.dashboards or ():
content = GrafanaDashboard(encoded_dashboard)._deserialize()
title = content.get("title", "no_title")
dashboards.append(
{
"relation_id": data.principal_relation_id,
# We have the remote charm name - use it for the identifier
"charm": f"{data.principal_relation_name}-{app_name}",
"content": content,
"title": title,
}
)
return dashboards

View File

@ -42,6 +42,9 @@ provides:
interface: prometheus_scrape
dashboard:
interface: ceph-dashboard
cos-agent:
interface: cos_agent
requires:
bootstrap-source:
interface: ceph-bootstrap

View File

@ -3,12 +3,11 @@
- charm-unit-jobs-py38
- charm-unit-jobs-py310
- charm-yoga-functional-jobs
- charm-zed-functional-jobs
vars:
needs_charm_build: true
charm_build_name: ceph-mon
build_type: charmcraft
charmcraft_channel: 2.0/stable
charmcraft_channel: 2.x/stable
check:
jobs:
- new-install-focal-yoga

View File

@ -9,6 +9,8 @@ import json
import logging
import os.path
import pathlib
import socket
from typing import Optional, Union, List, TYPE_CHECKING
import ops.model
@ -17,6 +19,7 @@ if TYPE_CHECKING:
import charm
from charms.prometheus_k8s.v0 import prometheus_scrape
from charms.grafana_agent.v0 import cos_agent
from charms_ceph import utils as ceph_utils
from ops.framework import BoundEvent
from utils import mgr_config_set_rbd_stats_pools
@ -28,6 +31,10 @@ DEFAULT_CEPH_JOB = {
"metrics_path": "/metrics",
"static_configs": [{"targets": ["*:9283"]}],
}
DEFAULT_CEPH_METRICS_ENDPOINT = {
"path": "/metrics",
"port": 9283,
}
DEFAULT_ALERT_RULES_RELATIVE_PATH = "files/prometheus_alert_rules"
@ -144,3 +151,77 @@ class CephMetricsEndpointProvider(prometheus_scrape.MetricsEndpointProvider):
self._charm._stored.alert_rule_errors = msg
return
self._set_alert_rules(alert_rules_as_dict)
class CephCOSAgentProvider(cos_agent.COSAgentProvider):
def __init__(self, charm):
super().__init__(
charm,
metrics_rules_dir="./files/prometheus_alert_rules",
dashboard_dirs=["./files/grafana_dashboards"],
scrape_configs=self._custom_scrape_configs,
)
events = self._charm.on[cos_agent.DEFAULT_RELATION_NAME]
self.framework.observe(
events.relation_departed, self._on_relation_departed
)
def _on_refresh(self, event):
"""Enable prometheus on relation change"""
if self._charm.unit.is_leader() and ceph_utils.is_bootstrapped():
logger.debug("refreshing cos_agent relation")
mgr_config_set_rbd_stats_pools()
ceph_utils.mgr_enable_module("prometheus")
super()._on_refresh(event)
def _on_relation_departed(self, event):
"""Disable prometheus on depart of relation"""
if self._charm.unit.is_leader() and ceph_utils.is_bootstrapped():
logger.debug(
"is_leader and is_bootstrapped, running rel departed: %s",
event,
)
ceph_utils.mgr_disable_module("prometheus")
logger.debug("module_disabled")
def _custom_scrape_configs(self):
fqdn = socket.getfqdn()
fqdn_parts = fqdn.split('.')
domain = '.'.join(fqdn_parts[1:]) if len(fqdn_parts) > 1 else fqdn
return [
{
"metrics_path": "/metrics",
"static_configs": [{"targets": ["localhost:9283"]}],
"honor_labels": True,
"metric_relabel_configs": [
{
# localhost:9283 is the generic default instance label
# added by grafana-agent which is kinda useless.
# Replace it with a somewhat more meaningful label
"source_labels": ["instance"],
"regex": "^localhost:9283$",
"target_label": "instance",
"action": "replace",
"replacement": "ceph_cluster",
},
{ # if we have a non-empty hostname label, use it as the
# instance label
"source_labels": ["hostname"],
"regex": "(.+)",
"target_label": "instance",
"action": "replace",
"replacement": "${1}",
},
{ # tack on the domain to the instance label to make it
# conform to grafana-agent's node-exporter expectations
"source_labels": ["instance"],
"regex": "(.*)",
"target_label": "instance",
"action": "replace",
"replacement": "${1}." + domain,
},
]
},
]

View File

@ -23,7 +23,6 @@ import ceph_metrics
import ops_actions
logger = logging.getLogger(__name__)
@ -222,6 +221,7 @@ class CephMonCharm(ops_openstack.core.OSBaseCharm):
self.clients = ceph_client.CephClientProvides(self)
self.metrics_endpoint = ceph_metrics.CephMetricsEndpointProvider(self)
self.cos_agent = ceph_metrics.CephCOSAgentProvider(self)
self.ceph_status = ceph_status.StatusAssessor(self)
self._observe_action(self.on.change_osd_weight_action,

View File

@ -99,13 +99,45 @@ def _handle_rgw_key_rotation(entity, event, model):
event.fail("Entity %s not found" % entity)
def _find_mds_unit(relations, mds_name):
for relation in relations:
for unit in relation.units:
try:
if mds_name == relation.data[unit]['mds-name']:
return relation.data
except KeyError:
logger.exception('mds name not found in relation data bag')
def _handle_mds_key_rotation(entity, event, model):
mds_name = entity[4:]
relations = model.relations.get('mds')
if not relations:
event.fail('No mds relations found')
return
bag = _find_mds_unit(relations, mds_name)
if bag is None:
event.fail('No unit found for entity: %s' % entity)
return
pending_key = _create_key(entity, event)
bag[model.unit][mds_name + "_mds_key"] = pending_key
event.set_results({'message': 'success'})
def _get_osd_tree():
out = subprocess.check_output(["sudo", "ceph", "osd", "dump",
"--format=json"])
return json.loads(out.decode("utf8")).get("osds", ())
def _get_osd_addr(osd_id, tree=None):
def _clean_address(addr):
ix = addr.find(":")
return addr if ix < 0 else addr[0:ix]
def _get_osd_addrs(osd_id, tree=None):
if tree is None:
tree = _get_osd_tree()
@ -113,9 +145,9 @@ def _get_osd_addr(osd_id, tree=None):
if osd.get("osd") != osd_id:
continue
addr = osd["public_addr"]
ix = addr.find(":")
return addr if ix < 0 else addr[0:ix]
return [_clean_address(osd[x])
for x in ("public_addr", "cluster_addr")
if x in osd]
def _get_unit_addr(unit, rel_id):
@ -125,13 +157,13 @@ def _get_unit_addr(unit, rel_id):
def _find_osd_unit(relations, model, osd_id, tree):
addr = _get_osd_addr(osd_id, tree)
if not addr:
addrs = _get_osd_addrs(osd_id, tree)
if not addrs:
return None
for relation in relations:
for unit in relation.units:
if _get_unit_addr(unit.name, relation.id) == addr:
if _get_unit_addr(unit.name, relation.id) in addrs:
return relation.data[model.unit]
@ -225,6 +257,8 @@ def rotate_key(event, model=None) -> None:
event.set_results({"message": "success"})
elif entity.startswith("client.rgw."):
_handle_rgw_key_rotation(entity, event, model)
elif entity.startswith('mds.'):
_handle_mds_key_rotation(entity, event, model)
elif entity == "osd":
_rotate_all_osds(event, model)
elif entity.startswith("osd."):

View File

@ -57,3 +57,6 @@ pyopenssl<=22.0.0
# newer jsonschema needs rustc and cargo
jsonschema<4.18.0
pydantic < 2
cosl

View File

@ -1,259 +0,0 @@
variables:
openstack-origin: &openstack-origin cloud:jammy-zed
series: jammy
comment:
- 'machines section to decide order of deployment. database sooner = faster'
machines:
'0':
constraints: mem=3072M
'1':
constraints: mem=3072M
'2':
constraints: mem=3072M
'3':
'4':
'5':
'6':
'7':
'8':
'9':
'10':
'11':
'12':
'13':
'14':
'15':
'16':
'17':
applications:
keystone-mysql-router:
charm: ch:mysql-router
channel: 8.0/edge
glance-mysql-router:
charm: ch:mysql-router
channel: 8.0/edge
cinder-mysql-router:
charm: ch:mysql-router
channel: 8.0/edge
nova-cloud-controller-mysql-router:
charm: ch:mysql-router
channel: 8.0/edge
placement-mysql-router:
charm: ch:mysql-router
channel: 8.0/edge
mysql-innodb-cluster:
charm: ch:mysql-innodb-cluster
num_units: 3
to:
- '0'
- '1'
- '2'
channel: 8.0/edge
ceph-osd:
charm: ch:ceph-osd
num_units: 3
storage:
osd-devices: '10G'
options:
source: *openstack-origin
osd-devices: '/dev/test-non-existent'
to:
- '3'
- '4'
- '5'
channel: quincy/edge
ceph-mon:
charm: ch:ceph-mon
channel: quincy/edge
num_units: 3
options:
source: *openstack-origin
monitor-count: '3'
to:
- '6'
- '7'
- '8'
ceph-fs:
charm: ch:ceph-fs
num_units: 1
options:
source: *openstack-origin
channel: quincy/edge
to:
- '17'
rabbitmq-server:
charm: ch:rabbitmq-server
num_units: 1
to:
- '9'
channel: 3.9/edge
keystone:
expose: True
charm: ch:keystone
num_units: 1
options:
openstack-origin: *openstack-origin
to:
- '10'
channel: zed/edge
nova-compute:
charm: ch:nova-compute
num_units: 1
options:
openstack-origin: *openstack-origin
libvirt-image-backend: rbd
to:
- '11'
channel: zed/edge
glance:
expose: True
charm: ch:glance
num_units: 1
options:
openstack-origin: *openstack-origin
to:
- '12'
channel: zed/edge
cinder:
expose: True
charm: ch:cinder
num_units: 1
options:
block-device: 'None'
glance-api-version: '2'
openstack-origin: *openstack-origin
to:
- '13'
channel: zed/edge
cinder-ceph:
charm: ch:cinder-ceph
channel: zed/edge
nova-cloud-controller:
expose: True
charm: ch:nova-cloud-controller
num_units: 1
options:
openstack-origin: *openstack-origin
to:
- '14'
channel: zed/edge
placement:
charm: ch:placement
num_units: 1
options:
openstack-origin: *openstack-origin
to:
- '15'
channel: zed/edge
prometheus2:
charm: ch:prometheus2
num_units: 1
to:
- '16'
relations:
- - 'nova-compute:amqp'
- 'rabbitmq-server:amqp'
- - 'nova-compute:image-service'
- 'glance:image-service'
- - 'nova-compute:ceph'
- 'ceph-mon:client'
- - nova-compute:ceph-access
- cinder-ceph:ceph-access
- - 'keystone:shared-db'
- 'keystone-mysql-router:shared-db'
- - 'keystone-mysql-router:db-router'
- 'mysql-innodb-cluster:db-router'
- - 'glance:shared-db'
- 'glance-mysql-router:shared-db'
- - 'glance-mysql-router:db-router'
- 'mysql-innodb-cluster:db-router'
- - 'glance:identity-service'
- 'keystone:identity-service'
- - 'glance:amqp'
- 'rabbitmq-server:amqp'
- - 'glance:ceph'
- 'ceph-mon:client'
- - 'cinder:shared-db'
- 'cinder-mysql-router:shared-db'
- - 'cinder-mysql-router:db-router'
- 'mysql-innodb-cluster:db-router'
- - 'cinder:identity-service'
- 'keystone:identity-service'
- - 'cinder:amqp'
- 'rabbitmq-server:amqp'
- - 'cinder:image-service'
- 'glance:image-service'
- - 'cinder-ceph:storage-backend'
- 'cinder:storage-backend'
- - 'cinder-ceph:ceph'
- 'ceph-mon:client'
- - 'ceph-osd:mon'
- 'ceph-mon:osd'
- - 'ceph-mon:mds'
- 'ceph-fs:ceph-mds'
- - 'nova-cloud-controller:shared-db'
- 'nova-cloud-controller-mysql-router:shared-db'
- - 'nova-cloud-controller-mysql-router:db-router'
- 'mysql-innodb-cluster:db-router'
- - 'nova-cloud-controller:identity-service'
- 'keystone:identity-service'
- - 'nova-cloud-controller:amqp'
- 'rabbitmq-server:amqp'
- - 'nova-cloud-controller:cloud-compute'
- 'nova-compute:cloud-compute'
- - 'nova-cloud-controller:image-service'
- 'glance:image-service'
- - 'placement:shared-db'
- 'placement-mysql-router:shared-db'
- - 'placement-mysql-router:db-router'
- 'mysql-innodb-cluster:db-router'
- - 'placement'
- 'keystone'
- - 'placement'
- 'nova-cloud-controller'
- - 'ceph-mon:prometheus'
- 'prometheus2:target'

View File

@ -1,265 +0,0 @@
variables:
openstack-origin: &openstack-origin distro
series: jammy
comment:
- 'machines section to decide order of deployment. database sooner = faster'
machines:
'0':
constraints: mem=3072M
'1':
constraints: mem=3072M
'2':
constraints: mem=3072M
'3':
'4':
'5':
'6':
'7':
'8':
'9':
'10':
'11':
'12':
'13':
'14':
'15':
'16':
series: focal
'17':
applications:
keystone-mysql-router:
charm: ch:mysql-router
channel: 8.0.19/edge
glance-mysql-router:
charm: ch:mysql-router
channel: 8.0.19/edge
cinder-mysql-router:
charm: ch:mysql-router
channel: 8.0.19/edge
nova-cloud-controller-mysql-router:
charm: ch:mysql-router
channel: 8.0.19/edge
placement-mysql-router:
charm: ch:mysql-router
channel: 8.0.19/edge
mysql-innodb-cluster:
charm: ch:mysql-innodb-cluster
num_units: 3
options:
source: *openstack-origin
to:
- '0'
- '1'
- '2'
channel: 8.0.19/edge
ceph-osd:
charm: ch:ceph-osd
num_units: 3
storage:
osd-devices: '10G'
options:
source: *openstack-origin
osd-devices: '/dev/test-non-existent'
to:
- '3'
- '4'
- '5'
channel: quincy/edge
ceph-mon:
charm: ch:ceph-mon
channel: quincy/edge
num_units: 3
options:
source: *openstack-origin
monitor-count: '3'
to:
- '6'
- '7'
- '8'
ceph-fs:
charm: ch:ceph-fs
num_units: 1
options:
source: *openstack-origin
channel: quincy/edge
to:
- '17'
rabbitmq-server:
charm: ch:rabbitmq-server
num_units: 1
options:
source: *openstack-origin
to:
- '9'
channel: 3.9/edge
keystone:
expose: True
charm: ch:keystone
num_units: 1
options:
openstack-origin: *openstack-origin
to:
- '10'
channel: yoga/edge
nova-compute:
charm: ch:nova-compute
num_units: 1
options:
openstack-origin: *openstack-origin
libvirt-image-backend: rbd
to:
- '11'
channel: yoga/edge
glance:
expose: True
charm: ch:glance
num_units: 1
options:
openstack-origin: *openstack-origin
to:
- '12'
channel: yoga/edge
cinder:
expose: True
charm: ch:cinder
num_units: 1
options:
block-device: 'None'
glance-api-version: '2'
openstack-origin: *openstack-origin
to:
- '13'
channel: yoga/edge
cinder-ceph:
charm: ch:cinder-ceph
channel: yoga/edge
nova-cloud-controller:
expose: True
charm: ch:nova-cloud-controller
num_units: 1
options:
openstack-origin: *openstack-origin
to:
- '14'
channel: yoga/edge
placement:
charm: ch:placement
num_units: 1
options:
openstack-origin: *openstack-origin
to:
- '15'
channel: yoga/edge
prometheus2:
charm: ch:prometheus2
num_units: 1
series: focal
to:
- '16'
relations:
- - 'nova-compute:amqp'
- 'rabbitmq-server:amqp'
- - 'nova-compute:image-service'
- 'glance:image-service'
- - 'nova-compute:ceph'
- 'ceph-mon:client'
- - nova-compute:ceph-access
- cinder-ceph:ceph-access
- - 'keystone:shared-db'
- 'keystone-mysql-router:shared-db'
- - 'keystone-mysql-router:db-router'
- 'mysql-innodb-cluster:db-router'
- - 'glance:shared-db'
- 'glance-mysql-router:shared-db'
- - 'glance-mysql-router:db-router'
- 'mysql-innodb-cluster:db-router'
- - 'glance:identity-service'
- 'keystone:identity-service'
- - 'glance:amqp'
- 'rabbitmq-server:amqp'
- - 'glance:ceph'
- 'ceph-mon:client'
- - 'cinder:shared-db'
- 'cinder-mysql-router:shared-db'
- - 'cinder-mysql-router:db-router'
- 'mysql-innodb-cluster:db-router'
- - 'cinder:identity-service'
- 'keystone:identity-service'
- - 'cinder:amqp'
- 'rabbitmq-server:amqp'
- - 'cinder:image-service'
- 'glance:image-service'
- - 'cinder-ceph:storage-backend'
- 'cinder:storage-backend'
- - 'cinder-ceph:ceph'
- 'ceph-mon:client'
- - 'ceph-osd:mon'
- 'ceph-mon:osd'
- - 'ceph-mon:mds'
- 'ceph-fs:ceph-mds'
- - 'nova-cloud-controller:shared-db'
- 'nova-cloud-controller-mysql-router:shared-db'
- - 'nova-cloud-controller-mysql-router:db-router'
- 'mysql-innodb-cluster:db-router'
- - 'nova-cloud-controller:identity-service'
- 'keystone:identity-service'
- - 'nova-cloud-controller:amqp'
- 'rabbitmq-server:amqp'
- - 'nova-cloud-controller:cloud-compute'
- 'nova-compute:cloud-compute'
- - 'nova-cloud-controller:image-service'
- 'glance:image-service'
- - 'placement:shared-db'
- 'placement-mysql-router:shared-db'
- - 'placement-mysql-router:db-router'
- 'mysql-innodb-cluster:db-router'
- - 'placement'
- 'keystone'
- - 'placement'
- 'nova-cloud-controller'
- - 'ceph-mon:prometheus'
- 'prometheus2:target'

View File

@ -17,8 +17,7 @@ import charm
import helpers
@helpers.patch_network_get()
class TestCephMetrics(unittest.TestCase):
class CephMetricsTestBase(unittest.TestCase):
@classmethod
def setUpClass(cls):
"""Run once before tests begin."""
@ -33,11 +32,17 @@ class TestCephMetrics(unittest.TestCase):
rules: []
"""
)
rules_file = cls.rules_dir / "alert-rules.yaml"
with rules_file.open("w") as f:
f.write(cls.rules)
@classmethod
def tearDownClass(cls):
cls.tempdir.cleanup()
@helpers.patch_network_get()
class TestCephMetrics(CephMetricsTestBase):
def setUp(self):
super().setUp()
self.harness = Harness(charm.CephMonCharm)
@ -134,3 +139,60 @@ class TestCephMetrics(unittest.TestCase):
self.harness.charm.metrics_endpoint.update_alert_rules()
alert_rules = self.get_alert_rules(rel_id)
self.assertTrue(alert_rules.get("groups"))
class TestCephCOSAgentProvider(CephMetricsTestBase):
def setUp(self):
super().setUp()
self.harness = Harness(charm.CephMonCharm)
self.addCleanup(self.harness.cleanup)
self.harness.begin()
self.harness.set_leader(True)
self.harness.charm.cos_agent._metrics_rules = self.rules_dir
def test_init(self):
self.assertEqual(
self.harness.charm.cos_agent._relation_name,
"cos-agent",
)
@patch("ceph_metrics.mgr_config_set_rbd_stats_pools", lambda: None)
@patch("ceph_metrics.ceph_utils.is_bootstrapped", return_value=True)
@patch("ceph_metrics.ceph_utils.is_mgr_module_enabled", return_value=False)
@patch("ceph_metrics.ceph_utils.mgr_enable_module")
@patch("ceph_metrics.ceph_utils.mgr_disable_module")
def test_add_remove_rel(
self,
mgr_disable_module,
mgr_enable_module,
_is_mgr_module_enable,
_is_bootstrapped,
):
rel_id = self.harness.add_relation("cos-agent", "grafana-agent")
self.harness.add_relation_unit(rel_id, "grafana-agent/0")
unit_rel_data = self.harness.get_relation_data(
rel_id, self.harness.model.unit
)
data = json.loads(unit_rel_data["config"])
self.assertTrue("metrics_scrape_jobs" in data)
self.assertEqual(
data["metrics_scrape_jobs"][0]["metrics_path"], "/metrics"
)
self.assertTrue("metrics_alert_rules" in data)
self.assertTrue("groups" in data["metrics_alert_rules"])
mgr_enable_module.assert_called_once()
self.harness.remove_relation(rel_id)
mgr_disable_module.assert_called_once()
@patch("socket.getfqdn", return_value="node1.ceph.example.com")
def test_custom_scrape_configs(self, _mock_getfqdn):
configs = self.harness.charm.cos_agent._custom_scrape_configs()
self.assertEqual(
configs[0]["static_configs"][0]["targets"], ["localhost:9283"]
)
self.assertEqual(
configs[0]["metric_relabel_configs"][0]["replacement"],
"ceph_cluster",
)