# Copyright 2017 The Openstack-Helm Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Default values for grafana # This is a YAML-formatted file. # Declare variables to be passed into your templates. images: tags: grafana: docker.io/grafana/grafana:4.5.2 datasource: docker.io/kolla/ubuntu-source-heat-engine:3.0.3 dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.3.0 db_init: docker.io/kolla/ubuntu-source-heat-engine:3.0.3 grafana_db_session_sync: docker.io/kolla/ubuntu-source-heat-engine:3.0.3 image_repo_sync: docker.io/docker:17.07.0 pull_policy: IfNotPresent local_registry: active: false exclude: - dep_check - image_repo_sync labels: node_selector_key: openstack-control-plane node_selector_value: enabled pod: affinity: anti: type: default: preferredDuringSchedulingIgnoredDuringExecution topologyKey: default: kubernetes.io/hostname mounts: grafana: init_container: null grafana: replicas: grafana: 1 lifecycle: upgrades: deployments: revision_history: 3 pod_replacement_strategy: RollingUpdate rolling_update: max_unavailable: 1 max_surge: 3 termination_grace_period: grafana: timeout: 600 resources: enabled: false jobs: image_repo_sync: requests: memory: "128Mi" cpu: "100m" limits: memory: "1024Mi" cpu: "2000m" bootstrap: requests: memory: "128Mi" cpu: "100m" limits: memory: "1024Mi" cpu: "2000m" db_init: requests: memory: "128Mi" cpu: "100m" limits: memory: "1024Mi" cpu: "2000m" db_init_session: requests: memory: "128Mi" cpu: "100m" limits: memory: "1024Mi" cpu: "2000m" grafana_db_session_sync: requests: memory: "128Mi" cpu: "100m" limits: memory: "1024Mi" cpu: "2000m" grafana: requests: memory: "128Mi" cpu: "100m" limits: memory: "1024Mi" cpu: "2000m" endpoints: cluster_domain_suffix: cluster.local oslo_db: namespace: null auth: admin: username: root password: password user: username: grafana password: password hosts: default: mariadb host_fqdn_override: default: null path: /grafana scheme: mysql+pymysql port: mysql: default: 3306 oslo_db_session: namespace: null auth: admin: username: root password: password user: username: grafana_session password: password hosts: default: mariadb host_fqdn_override: default: null path: /grafana_session scheme: mysql+pymysql port: mysql: default: 3306 grafana: name: grafana namespace: null auth: admin: username: admin password: admin hosts: default: grafana-dashboard public: grafana host_fqdn_override: default: null path: default: null scheme: default: http port: grafana: default: 3000 monitoring: name: prometheus namespace: null hosts: default: prom-metrics public: prometheus host_fqdn_override: default: null path: default: null scheme: default: http port: api: default: 9090 public: 80 dependencies: dynamic: common: local_image_registry: jobs: - grafana-image-repo-sync services: - endpoint: node service: local_image_registry static: db_init: services: - endpoint: internal service: oslo_db db_init_session: services: - endpoint: internal service: oslo_db db_session_sync: jobs: - grafana-db-init-session services: - endpoint: internal service: oslo_db grafana: jobs: - grafana-db-init - grafana-db-session-sync services: - endpoint: internal service: oslo_db image_repo_sync: services: - endpoint: internal service: local_image_registry register_datasource: services: - endpoint: internal service: grafana network: grafana: node_port: enabled: false port: 30902 ingress: public: true annotations: kubernetes.io/ingress.class: "nginx" ingress.kubernetes.io/rewrite-target: / secrets: oslo_db: admin: grafana-db-admin user: grafana-db-user oslo_db_session: admin: grafana-session-db-admin user: grafana-session-db-user manifests: configmap_bin: true configmap_dashboards: true configmap_etc: true deployment: true ingress: true job_db_init: true job_db_init_session: true job_db_session_sync: true job_datasource: true job_image_repo_sync: true secret_db: true secret_db_session: true secret_admin_creds: true service: true service_ingress: true conf: datasource: name: prometheus type: prometheus database: access: proxy isDefault: true grafana: paths: data: /var/lib/grafana/data plugins: /var/lib/grafana/plugins server: protocol: http http_port: 3000 database: type: mysql session: provider: mysql provider_config: null cookie_name: grafana_sess cookie_secure: false session_life_time: 86400 security: admin_user: ${GF_SECURITY_ADMIN_USER} admin_password: ${GF_SECURITY_ADMIN_PASSWORD} cookie_username: grafana_user cookie_remember_name: grafana_remember login_remember_days: 7 users: allow_sign_up: false allow_org_create: false auto_assign_org: true auto_assign_org_role: Admin default_theme: dark log: mode: console level: info log.console: level: info format: console dashboards.json: enabled: true path: /var/lib/grafana/dashboards grafana_net: url: https://grafana.net dashboards: ceph_cluster: __inputs: - name: prometheus label: Prometheus description: Prometheus.IO type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: panel id: singlestat name: Singlestat version: '' - type: panel id: graph name: Graph version: '' - type: grafana id: grafana name: Grafana version: 3.1.1 - type: datasource id: prometheus name: Prometheus version: 1.0.0 id: title: Ceph - Cluster tags: - ceph - cluster style: dark timezone: browser editable: true hideControls: false sharedCrosshair: false rows: - collapse: false editable: true height: 150px panels: - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 21 interval: 1m isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: count(ceph_health_status) interval: "$interval" intervalFactor: 1 refId: A step: 60 thresholds: '0,1' title: Status transparent: false type: singlestat valueFontSize: 100% valueMaps: - op: "=" text: N/A value: 'null' - op: "=" text: WARNING value: '0' - op: "=" text: HEALTHY value: '1' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 14 interval: 1m isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: ceph_mon_quorum_count interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '2,3' title: Monitors In Quorum transparent: false type: singlestat valueFontSize: 100% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 22 interval: 1m isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: true targets: - expr: count(ceph_pool_max_avail) interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '' title: Pools transparent: false type: singlestat valueFontSize: 100% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: bytes gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 33 interval: 1m isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: true targets: - expr: ceph_cluster_total_bytes interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: 0.025,0.1 title: Cluster Capacity transparent: false type: singlestat valueFontSize: 100% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: bytes gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 34 interval: 1m isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: true targets: - expr: ceph_cluster_total_used_bytes interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: 0.025,0.1 title: Used Capacity transparent: false type: singlestat valueFontSize: 100% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percentunit gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 23 interval: 1m isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: false targets: - expr: ceph_cluster_total_used_bytes/ceph_cluster_total_bytes interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '70,80' title: Available Capacity transparent: false type: singlestat valueFontSize: 100% valueMaps: - op: "=" text: N/A value: 'null' valueName: current title: New row - collapse: false editable: true height: 100px panels: - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 26 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: count(ceph_osd_in) interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '' title: OSDs IN type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 40, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 27 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: count(ceph_osd_metadata) - count(ceph_osd_in) interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '1,1' title: OSDs OUT type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 28 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum(ceph_osd_up) interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '' title: OSDs UP type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 40, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 29 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: count(ceph_osd_metadata) - count(ceph_osd_up) interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '1,1' title: OSDs DOWN type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 30 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: true lineColor: rgb(31, 120, 193) show: true targets: - expr: avg(ceph_osd_numpg) interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '250,300' title: Average PGs per OSD type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current title: New row - collapse: false editable: true height: 250px panels: - aliasColors: Available: "#EAB839" Total Capacity: "#447EBC" Used: "#BF1B00" total_avail: "#6ED0E0" total_space: "#7EB26D" total_used: "#890F02" bars: false datasource: prometheus editable: true error: false fill: 4 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) height: '300' id: 1 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 0 links: [] minSpan: nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: Total Capacity fill: 0 linewidth: 3 stack: false span: 4 stack: true steppedLine: false targets: - expr: ceph_cluster_total_bytes - ceph_cluster_total_used_bytes interval: "$interval" intervalFactor: 1 legendFormat: Available refId: A step: 60 - expr: ceph_cluster_total_used_bytes interval: "$interval" intervalFactor: 1 legendFormat: Used refId: B step: 60 - expr: ceph_cluster_total_bytes interval: "$interval" intervalFactor: 1 legendFormat: Total Capacity refId: C step: 60 timeFrom: timeShift: title: Capacity tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: show: true yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: Total Capacity: "#7EB26D" Used: "#BF1B00" total_avail: "#6ED0E0" total_space: "#7EB26D" total_used: "#890F02" bars: false datasource: prometheus decimals: 0 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) thresholdLine: false height: '300' id: 3 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 2 links: [] minSpan: nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 4 stack: true steppedLine: false targets: - expr: sum(ceph_osd_op_w) interval: "$interval" intervalFactor: 1 legendFormat: Write refId: A step: 60 - expr: sum(ceph_osd_op_r) interval: "$interval" intervalFactor: 1 legendFormat: Read refId: B step: 60 timeFrom: timeShift: title: IOPS tooltip: msResolution: true shared: true sort: 2 value_type: individual type: graph xaxis: show: true yaxes: - format: none label: '' logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: 0 show: true - aliasColors: {} bars: false datasource: prometheus editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) height: '300' id: 7 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 4 stack: true steppedLine: false targets: - expr: sum(ceph_osd_op_in_bytes) interval: "$interval" intervalFactor: 1 legendFormat: Write refId: A step: 60 - expr: sum(ceph_osd_op_out_bytes) interval: "$interval" intervalFactor: 1 legendFormat: Read refId: B step: 60 timeFrom: timeShift: title: Throughput tooltip: msResolution: false shared: true sort: 2 value_type: individual type: graph xaxis: show: true yaxes: - format: Bps label: logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: 0 show: true repeat: showTitle: true title: CLUSTER - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 18 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: "/^Total.*$/" stack: false span: 12 stack: true steppedLine: false targets: - expr: ceph_cluster_total_objects interval: "$interval" intervalFactor: 1 legendFormat: Total refId: A step: 60 timeFrom: timeShift: title: Objects in the Cluster tooltip: msResolution: false shared: true sort: 1 value_type: individual type: graph xaxis: show: true yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 19 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: "/^Total.*$/" stack: false span: 6 stack: true steppedLine: false targets: - expr: sum(ceph_osd_numpg) interval: "$interval" intervalFactor: 1 legendFormat: Total refId: A step: 60 - expr: sum(ceph_pg_active) interval: "$interval" intervalFactor: 1 legendFormat: Active refId: B step: 60 - expr: sum(ceph_pg_inconsistent) interval: "$interval" intervalFactor: 1 legendFormat: Inconsistent refId: C step: 60 - expr: sum(ceph_pg_creating) interval: "$interval" intervalFactor: 1 legendFormat: Creating refId: D step: 60 - expr: sum(ceph_pg_recovering) interval: "$interval" intervalFactor: 1 legendFormat: Recovering refId: E step: 60 - expr: sum(ceph_pg_down) interval: "$interval" intervalFactor: 1 legendFormat: Down refId: F step: 60 timeFrom: timeShift: title: PGs tooltip: msResolution: false shared: true sort: 1 value_type: individual type: graph xaxis: show: true yaxes: - format: short label: logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 20 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: "/^Total.*$/" stack: false span: 6 stack: true steppedLine: false targets: - expr: sum(ceph_pg_degraded) interval: "$interval" intervalFactor: 1 legendFormat: Degraded refId: A step: 60 - expr: sum(ceph_pg_stale) interval: "$interval" intervalFactor: 1 legendFormat: Stale refId: B step: 60 - expr: sum(ceph_pg_undersized) interval: "$interval" intervalFactor: 1 legendFormat: Undersized refId: C step: 60 timeFrom: timeShift: title: Stuck PGs tooltip: msResolution: false shared: true sort: 1 value_type: individual type: graph xaxis: show: true yaxes: - format: short label: logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: show: true title: New row time: from: now-1h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d templating: list: - auto: true auto_count: 10 auto_min: 1m current: tags: [] text: 1m value: 1m datasource: hide: 0 includeAll: false label: Interval multi: false name: interval options: - selected: false text: auto value: "$__auto_interval" - selected: true text: 1m value: 1m - selected: false text: 10m value: 10m - selected: false text: 30m value: 30m - selected: false text: 1h value: 1h - selected: false text: 6h value: 6h - selected: false text: 12h value: 12h - selected: false text: 1d value: 1d - selected: false text: 7d value: 7d - selected: false text: 14d value: 14d - selected: false text: 30d value: 30d query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d refresh: 0 type: interval annotations: list: [] refresh: 1m schemaVersion: 12 version: 26 links: [] gnetId: 917 description: "Ceph Cluster overview.\r\n" ceph_osd: __inputs: - name: prometheus label: Prometheus description: Prometheus.IO type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: panel id: singlestat name: Singlestat version: '' - type: panel id: graph name: Graph version: '' - type: grafana id: grafana name: Grafana version: 3.1.1 - type: datasource id: prometheus name: Prometheus version: 1.0.0 id: title: Ceph - OSD tags: - ceph - osd style: dark timezone: browser editable: true hideControls: false sharedCrosshair: false rows: - collapse: false editable: true height: 100px panels: - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 40, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 6 interval: isNew: true links: [] mappingType: 2 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' - from: '0' text: DOWN to: '0.99' - from: '0.99' text: UP to: '1' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: ceph_osd_up{ceph_daemon="osd.$osd"} interval: "$interval" intervalFactor: 1 refId: A step: 60 thresholds: '0,1' timeFrom: title: Status transparent: false type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: DOWN value: '0' - op: "=" text: UP value: '1' - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 40, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 8 interval: isNew: true links: [] mappingType: 2 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' - from: '0' text: OUT to: '0.99' - from: '0.99' text: IN to: '1' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: ceph_osd_in{ceph_daemon="osd.$osd"} interval: "$interval" intervalFactor: 1 refId: A step: 60 thresholds: '0,1' timeFrom: title: Available transparent: false type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: DOWN value: '0' - op: "=" text: UP value: '1' - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 10 interval: isNew: true links: [] mappingType: 2 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: count(ceph_osd_metadata) interval: "$interval" intervalFactor: 1 refId: A step: 60 thresholds: '0,1' timeFrom: title: Total OSDs transparent: false type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: DOWN value: '0' - op: "=" text: UP value: '1' - op: "=" text: N/A value: 'null' valueName: current title: New row - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: 250 threshold1Color: rgba(216, 200, 27, 0.27) threshold2: 300 threshold2Color: rgba(234, 112, 112, 0.22) thresholdLine: true id: 5 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: "/^Average.*/" fill: 0 stack: false span: 10 stack: true steppedLine: false targets: - expr: ceph_osd_numpg{ceph_daemon=~"osd.$osd"} interval: "$interval" intervalFactor: 1 legendFormat: Number of PGs - {{ osd.$osd }} refId: A step: 60 - expr: avg(ceph_osd_numpg) interval: "$interval" intervalFactor: 1 legendFormat: Average Number of PGs in the Cluster refId: B step: 60 timeFrom: timeShift: title: PGs tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: show: true yaxes: - format: short label: logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: 0 show: true - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 7 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: true targets: - expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd"}/ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd"})*100 interval: "$interval" intervalFactor: 1 legendFormat: '' refId: A step: 60 thresholds: '60,80' timeFrom: title: Utilization transparent: false type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current showTitle: true title: 'OSD: $osd' - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 2 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: true steppedLine: false targets: - expr: ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd"} interval: "$interval" intervalFactor: 1 legendFormat: Used - {{ osd.$osd }} metric: ceph_osd_used_bytes refId: A step: 60 - expr: ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd"} - ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd"} hide: false interval: "$interval" intervalFactor: 1 legendFormat: Available - {{ osd.$osd }} metric: ceph_osd_avail_bytes refId: B step: 60 timeFrom: timeShift: title: OSD Storage tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: show: true yaxes: - format: bytes label: logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: 0 show: true - aliasColors: {} bars: false datasource: prometheus decimals: 5 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 9 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: false linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 2 points: true renderer: flot seriesOverrides: [] span: 6 stack: false steppedLine: false targets: - expr: (ceph_osd_stat_bytes_used{ceph_daemon=~"osd.$osd"}/ceph_osd_stat_bytes{ceph_daemon=~"osd.$osd"}) interval: "$interval" intervalFactor: 1 legendFormat: Available - {{ osd.$osd }} metric: ceph_osd_avail_bytes refId: A step: 60 timeFrom: timeShift: title: Utilization Variance tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: show: true yaxes: - format: none label: logBase: 1 max: min: show: true - format: none label: logBase: 1 max: min: show: true time: from: now-1h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d templating: list: - auto: true auto_count: 10 auto_min: 1m current: selected: true text: 1m value: 1m datasource: hide: 0 includeAll: false label: Interval multi: false name: interval options: - selected: false text: auto value: "$__auto_interval" - selected: true text: 1m value: 1m - selected: false text: 10m value: 10m - selected: false text: 30m value: 30m - selected: false text: 1h value: 1h - selected: false text: 6h value: 6h - selected: false text: 12h value: 12h - selected: false text: 1d value: 1d - selected: false text: 7d value: 7d - selected: false text: 14d value: 14d - selected: false text: 30d value: 30d query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d refresh: 0 type: interval - current: {} datasource: prometheus hide: 0 includeAll: false label: OSD multi: false name: osd options: [] query: label_values(ceph_osd_metadata, id) refresh: 1 regex: '' type: query annotations: list: [] refresh: 15m schemaVersion: 12 version: 18 links: [] gnetId: 923 description: CEPH OSD Status. ceph_pool: __inputs: - name: prometheus label: Prometheus description: Prometheus.IO type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: panel id: graph name: Graph version: '' - type: panel id: singlestat name: Singlestat version: '' - type: grafana id: grafana name: Grafana version: 3.1.1 - type: datasource id: prometheus name: Prometheus version: 1.0.0 id: title: Ceph - Pools tags: - ceph - pools style: dark timezone: browser editable: true hideControls: false sharedCrosshair: false rows: - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 4 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) height: '' id: 2 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true rightSide: true show: true total: false values: true lines: true linewidth: 0 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: "/^Total.*$/" fill: 0 linewidth: 4 stack: false - alias: "/^Raw.*$/" color: "#BF1B00" fill: 0 linewidth: 4 span: 10 stack: true steppedLine: false targets: - expr: ceph_pool_max_avail{pool_id=~"$pool"} interval: "$interval" intervalFactor: 1 legendFormat: Total - {{ $pool }} refId: A step: 60 - expr: ceph_pool_bytes_used{pool_id=~"$pool"} interval: "$interval" intervalFactor: 1 legendFormat: Used - {{ $pool }} refId: B step: 60 - expr: ceph_pool_max_avail{pool_id=~"$pool"} - ceph_pool_bytes_used{pool_id=~"$pool"} interval: "$interval" intervalFactor: 1 legendFormat: Available - {{ $pool }} refId: C step: 60 - expr: ceph_pool_raw_bytes_used{pool_id=~"$pool"} interval: "$interval" intervalFactor: 1 legendFormat: Raw - {{ $pool }} refId: D step: 60 timeFrom: timeShift: title: "[[pool_name]] Pool Storage" tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: show: true yaxes: - format: bytes label: logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: 0 show: true - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus decimals: 2 editable: true error: false format: percentunit gauge: maxValue: 1 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 10 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: (ceph_pool_bytes_used{pool_id=~"$pool"} / ceph_pool_max_avail{pool_id=~"$pool"}) * 100 interval: "$interval" intervalFactor: 1 refId: A step: 60 thresholds: '' title: "[[pool_name]] Pool Usage" type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current showTitle: true title: 'Pool: $pool' - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) height: '' id: 7 isNew: true legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: false steppedLine: false targets: - expr: ceph_pool_objects{pool_id=~"$pool"} interval: "$interval" intervalFactor: 1 legendFormat: Objects - {{ $pool_name }} refId: A step: 60 - expr: ceph_pool_dirty{pool_id=~"$pool"} interval: "$interval" intervalFactor: 1 legendFormat: Dirty Objects - {{ $pool_name }} refId: B step: 60 timeFrom: timeShift: title: Objects in Pool [[pool_name]] tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: show: true yaxes: - format: short label: logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: 0 show: true - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) thresholdLine: false id: 4 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: true steppedLine: false targets: - expr: irate(ceph_pool_rd{pool_id=~"$pool"}[3m]) interval: "$interval" intervalFactor: 1 legendFormat: Read - {{ $pool_name}} refId: B step: 60 - expr: irate(ceph_pool_wr{pool_id=~"$pool"}[3m]) interval: "$interval" intervalFactor: 1 legendFormat: Write - {{ $pool_name }} refId: A step: 60 timeFrom: timeShift: title: "[[pool_name]] Pool IOPS" tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: show: true yaxes: - format: none label: IOPS logBase: 1 max: min: 0 show: true - format: short label: IOPS logBase: 1 max: min: 0 show: false - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 5 interval: "$interval" isNew: true legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: true steppedLine: false targets: - expr: irate(ceph_pool_rd_bytes{pool_id="$pool"}[3m]) interval: "$interval" intervalFactor: 1 legendFormat: Read Bytes - {{ $pool_name }} refId: A step: 60 - expr: irate(ceph_pool_wr_bytes{pool_id="$pool"}[3m]) interval: "$interval" intervalFactor: 1 legendFormat: Written Bytes - {{ $pool_name }} refId: B step: 60 timeFrom: timeShift: title: "[[pool_name]] Pool Throughput" tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: show: true yaxes: - format: Bps label: logBase: 1 max: min: 0 show: true - format: Bps label: logBase: 1 max: min: 0 show: true title: New row time: from: now-3h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d templating: list: - auto: true auto_count: 10 auto_min: 1m current: selected: true text: 1m value: 1m datasource: hide: 0 includeAll: false label: Interval multi: false name: interval options: - selected: false text: auto value: "$__auto_interval" - selected: true text: 1m value: 1m - selected: false text: 10m value: 10m - selected: false text: 30m value: 30m - selected: false text: 1h value: 1h - selected: false text: 6h value: 6h - selected: false text: 12h value: 12h - selected: false text: 1d value: 1d - selected: false text: 7d value: 7d - selected: false text: 14d value: 14d - selected: false text: 30d value: 30d query: 1m,10m,30m,1h,6h,12h,1d,7d,14d,30d refresh: 0 type: interval - current: {} datasource: prometheus hide: 0 includeAll: false label: Pool multi: false name: pool options: [] query: label_values(ceph_pool_objects, pool_id) refresh: 1 regex: '' type: query - current: {} datasource: prometheus hide: 0 includeAll: false label: Pool multi: false name: pool_name options: [] query: label_values(ceph_pool_metadata{pool_id="[[pool]]" }, name) refresh: 1 regex: '' type: query annotations: list: [] refresh: 1m schemaVersion: 12 version: 22 links: [] gnetId: 926 description: Ceph Pools dashboard. hosts_containers: __inputs: - name: prometheus label: Prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: panel id: graph name: Graph version: '' - type: panel id: singlestat name: Singlestat version: '' - type: grafana id: grafana name: Grafana version: 3.1.1 - type: datasource id: prometheus name: Prometheus version: 1.3.0 id: title: Kubernetes cluster monitoring (via Prometheus) description: Monitors Kubernetes cluster using Prometheus. Shows overall cluster CPU / Memory / Filesystem usage as well as individual pod, containers, systemd services statistics. Uses cAdvisor metrics only. tags: - kubernetes style: dark timezone: browser editable: true hideControls: false sharedCrosshair: false rows: - collapse: false editable: true height: 200px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) thresholdLine: false height: 200px id: 32 isNew: true legend: alignAsTable: false avg: true current: true max: false min: false rightSide: false show: false sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: false targets: - expr: sum (rate (container_network_receive_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m])) interval: 10s intervalFactor: 1 legendFormat: Received metric: network refId: A step: 10 - expr: '- sum (rate (container_network_transmit_bytes_total{kubernetes_io_hostname=~"^$Node$"}[5m]))' interval: 10s intervalFactor: 1 legendFormat: Sent metric: network refId: B step: 10 timeFrom: timeShift: title: Network I/O pressure tooltip: msResolution: false shared: true sort: 0 value_type: cumulative transparent: false type: graph xaxis: show: true yaxes: - format: Bps label: logBase: 1 max: min: show: true - format: Bps label: logBase: 1 max: min: show: false title: Network I/O pressure - collapse: false editable: true height: 250px panels: - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true height: 180px id: 4 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 4 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"}) / sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"}) * 100 interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: 65, 90 title: Cluster memory usage transparent: false type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true height: 180px id: 6 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 4 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m])) / sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"}) * 100 interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: 65, 90 title: Cluster CPU usage (5m avg) type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true height: 180px id: 7 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 4 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) / sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) * 100 interval: 10s intervalFactor: 1 legendFormat: '' metric: '' refId: A step: 10 thresholds: 65, 90 title: Cluster filesystem usage type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: bytes gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: 1px id: 9 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 20% prefix: '' prefixFontSize: 20% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (container_memory_working_set_bytes{id="/",kubernetes_io_hostname=~"^$Node$"}) interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: '' title: Used type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: bytes gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: 1px id: 10 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (machine_memory_bytes{kubernetes_io_hostname=~"^$Node$"}) interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: '' title: Total type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: 1px id: 11 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: " cores" postfixFontSize: 30% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (rate (container_cpu_usage_seconds_total{id="/",kubernetes_io_hostname=~"^$Node$"}[5m])) interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: '' title: Used type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: 1px id: 12 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: " cores" postfixFontSize: 30% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (machine_cpu_cores{kubernetes_io_hostname=~"^$Node$"}) interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: '' title: Total type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: bytes gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: 1px id: 13 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (container_fs_usage_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: '' title: Used type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: 2 editable: true error: false format: bytes gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true height: 1px id: 14 interval: isNew: true links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 2 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: sum (container_fs_limit_bytes{device=~"^/dev/[sv]da[0-9]$",id=~"/.+",kubernetes_io_hostname=~"^$Node$"}) interval: 10s intervalFactor: 1 refId: A step: 10 thresholds: '' title: Total type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: N/A value: 'null' valueName: current showTitle: false title: Total usage - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 3 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) height: '' id: 17 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (pod_name) interval: 10s intervalFactor: 1 legendFormat: "{{ pod_name }}" metric: container_cpu refId: A step: 10 timeFrom: timeShift: title: Pods CPU usage (5m avg) tooltip: msResolution: true shared: true sort: 2 value_type: cumulative transparent: false type: graph xaxis: show: true yaxes: - format: none label: cores logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false showTitle: false title: Pods CPU usage - collapse: true editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 3 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) height: '' id: 23 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (rate (container_cpu_usage_seconds_total{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) by (systemd_service_name) hide: false interval: 10s intervalFactor: 1 legendFormat: "{{ systemd_service_name }}" metric: container_cpu refId: A step: 10 timeFrom: timeShift: title: System services CPU usage (5m avg) tooltip: msResolution: true shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: none label: cores logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: System services CPU usage - collapse: true editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 3 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) height: '' id: 24 isNew: true legend: alignAsTable: true avg: true current: true hideEmpty: false hideZero: false max: false min: false rightSide: true show: true sideWidth: sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (rate (container_cpu_usage_seconds_total{image!="",name=~"^k8s_.*",container_name!="POD",kubernetes_io_hostname=~"^$Node$"}[5m])) by (container_name, pod_name) hide: false interval: 10s intervalFactor: 1 legendFormat: 'pod: {{ pod_name }} | {{ container_name }}' metric: container_cpu refId: A step: 10 - expr: sum (rate (container_cpu_usage_seconds_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (kubernetes_io_hostname, name, image) hide: false interval: 10s intervalFactor: 1 legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})' metric: container_cpu refId: B step: 10 - expr: sum (rate (container_cpu_usage_seconds_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) by (kubernetes_io_hostname, rkt_container_name) interval: 10s intervalFactor: 1 legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}' metric: container_cpu refId: C step: 10 timeFrom: timeShift: title: Containers CPU usage (5m avg) tooltip: msResolution: true shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: none label: cores logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: Containers CPU usage - collapse: true editable: true height: 500px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 3 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 20 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: false show: true sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (rate (container_cpu_usage_seconds_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m])) by (id) hide: false interval: 10s intervalFactor: 1 legendFormat: "{{ id }}" metric: container_cpu refId: A step: 10 timeFrom: timeShift: title: All processes CPU usage (5m avg) tooltip: msResolution: true shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: none label: cores logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false repeat: showTitle: false title: All processes CPU usage - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 25 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}) by (pod_name) interval: 10s intervalFactor: 1 legendFormat: "{{ pod_name }}" metric: container_memory_usage:sort_desc refId: A step: 10 timeFrom: timeShift: title: Pods memory usage tooltip: msResolution: false shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: Pods memory usage - collapse: true editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 26 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (container_memory_working_set_bytes{systemd_service_name!="",kubernetes_io_hostname=~"^$Node$"}) by (systemd_service_name) interval: 10s intervalFactor: 1 legendFormat: "{{ systemd_service_name }}" metric: container_memory_usage:sort_desc refId: A step: 10 timeFrom: timeShift: title: System services memory usage tooltip: msResolution: false shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: System services memory usage - collapse: true editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 27 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (container_memory_working_set_bytes{image!="",name=~"^k8s_.*",container_name!="POD",kubernetes_io_hostname=~"^$Node$"}) by (container_name, pod_name) interval: 10s intervalFactor: 1 legendFormat: 'pod: {{ pod_name }} | {{ container_name }}' metric: container_memory_usage:sort_desc refId: A step: 10 - expr: sum (container_memory_working_set_bytes{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}) by (kubernetes_io_hostname, name, image) interval: 10s intervalFactor: 1 legendFormat: 'docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})' metric: container_memory_usage:sort_desc refId: B step: 10 - expr: sum (container_memory_working_set_bytes{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}) by (kubernetes_io_hostname, rkt_container_name) interval: 10s intervalFactor: 1 legendFormat: 'rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}' metric: container_memory_usage:sort_desc refId: C step: 10 timeFrom: timeShift: title: Containers memory usage tooltip: msResolution: false shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: Containers memory usage - collapse: true editable: true height: 500px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 0 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 28 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: false show: true sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: true targets: - expr: sum (container_memory_working_set_bytes{id!="/",kubernetes_io_hostname=~"^$Node$"}) by (id) interval: 10s intervalFactor: 1 legendFormat: "{{ id }}" metric: container_memory_usage:sort_desc refId: A step: 10 timeFrom: timeShift: title: All processes memory usage tooltip: msResolution: false shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: All processes memory usage - collapse: false editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 16 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: false targets: - expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (pod_name) interval: 10s intervalFactor: 1 legendFormat: "-> {{ pod_name }}" metric: network refId: A step: 10 - expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (pod_name)' interval: 10s intervalFactor: 1 legendFormat: "<- {{ pod_name }}" metric: network refId: B step: 10 timeFrom: timeShift: title: Pods network I/O (5m avg) tooltip: msResolution: false shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: Bps label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: Pods network I/O - collapse: true editable: true height: 250px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 30 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: true show: true sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: false targets: - expr: sum (rate (container_network_receive_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (container_name, pod_name) hide: false interval: 10s intervalFactor: 1 legendFormat: "-> pod: {{ pod_name }} | {{ container_name }}" metric: network refId: B step: 10 - expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name=~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (container_name, pod_name)' hide: false interval: 10s intervalFactor: 1 legendFormat: "<- pod: {{ pod_name }} | {{ container_name }}" metric: network refId: D step: 10 - expr: sum (rate (container_network_receive_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (kubernetes_io_hostname, name, image) hide: false interval: 10s intervalFactor: 1 legendFormat: "-> docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})" metric: network refId: A step: 10 - expr: '- sum (rate (container_network_transmit_bytes_total{image!="",name!~"^k8s_.*",kubernetes_io_hostname=~"^$Node$"}[5m])) by (kubernetes_io_hostname, name, image)' hide: false interval: 10s intervalFactor: 1 legendFormat: "<- docker: {{ kubernetes_io_hostname }} | {{ image }} ({{ name }})" metric: network refId: C step: 10 - expr: sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) by (kubernetes_io_hostname, rkt_container_name) hide: false interval: 10s intervalFactor: 1 legendFormat: "-> rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}" metric: network refId: E step: 10 - expr: '- sum (rate (container_network_transmit_bytes_total{rkt_container_name!="",kubernetes_io_hostname=~"^$Node$"}[5m])) by (kubernetes_io_hostname, rkt_container_name)' hide: false interval: 10s intervalFactor: 1 legendFormat: "<- rkt: {{ kubernetes_io_hostname }} | {{ rkt_container_name }}" metric: network refId: F step: 10 timeFrom: timeShift: title: Containers network I/O (5m avg) tooltip: msResolution: false shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: Bps label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: Containers network I/O - collapse: true editable: true height: 500px panels: - aliasColors: {} bars: false datasource: prometheus decimals: 2 editable: true error: false fill: 1 grid: threshold1: threshold1Color: rgba(216, 200, 27, 0.27) threshold2: threshold2Color: rgba(234, 112, 112, 0.22) id: 29 isNew: true legend: alignAsTable: true avg: true current: true max: false min: false rightSide: false show: true sideWidth: 200 sort: current sortDesc: true total: false values: true lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 12 stack: false steppedLine: false targets: - expr: sum (rate (container_network_receive_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m])) by (id) interval: 10s intervalFactor: 1 legendFormat: "-> {{ id }}" metric: network refId: A step: 10 - expr: '- sum (rate (container_network_transmit_bytes_total{id!="/",kubernetes_io_hostname=~"^$Node$"}[5m])) by (id)' interval: 10s intervalFactor: 1 legendFormat: "<- {{ id }}" metric: network refId: B step: 10 timeFrom: timeShift: title: All processes network I/O (5m avg) tooltip: msResolution: false shared: true sort: 2 value_type: cumulative type: graph xaxis: show: true yaxes: - format: Bps label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: false title: All processes network I/O time: from: now-5m to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d templating: list: - allValue: ".*" current: {} datasource: prometheus hide: 0 includeAll: true multi: false name: Node options: [] query: label_values(kubernetes_io_hostname) refresh: 1 type: query annotations: list: [] refresh: 10s schemaVersion: 12 version: 13 links: [] gnetId: 315 rabbitmq: __inputs: - name: prometheus label: Prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.2.0 - type: panel id: graph name: Graph version: '' - type: datasource id: prometheus name: Prometheus version: 1.0.0 - type: panel id: singlestat name: Singlestat version: '' annotations: list: [] editable: true gnetId: 2121 graphTooltip: 0 hideControls: false id: links: [] refresh: 5s rows: - collapse: false height: 266 panels: - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 13 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false targets: - expr: rabbitmq_up intervalFactor: 2 metric: rabbitmq_up{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} refId: A step: 2 thresholds: Up,Down timeFrom: 30s title: RabbitMQ Server type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' - op: "=" text: Down value: '0' - op: "=" text: Up value: '1' valueName: current - alert: conditions: - evaluator: params: - 1 type: lt operator: type: and query: params: - A - 10s - now reducer: params: [] type: last type: query - evaluator: params: [] type: no_value operator: type: and query: params: - A - 10s - now reducer: params: [] type: last type: query executionErrorState: alerting frequency: 60s handler: 1 message: Some of the RabbitMQ node is down name: Node Stats alert noDataState: no_data notifications: [] aliasColors: {} bars: true datasource: prometheus decimals: 0 fill: 1 id: 12 legend: alignAsTable: true avg: false current: true max: false min: false show: true total: false values: true lines: false linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 9 stack: false steppedLine: false targets: - expr: rabbitmq_running{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{node}}" metric: rabbitmq_running refId: A step: 2 thresholds: - colorMode: critical fill: true line: true op: lt value: 1 timeFrom: 30s timeShift: title: Node up Stats tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus decimals: 0 fill: 1 id: 6 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 4 stack: false steppedLine: false targets: - expr: rabbitmq_exchangesTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{instance}}:exchanges" metric: rabbitmq_exchangesTotal refId: A step: 2 thresholds: [] timeFrom: timeShift: title: Exchanges tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus decimals: 0 fill: 1 id: 4 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 4 stack: false steppedLine: false targets: - expr: rabbitmq_channelsTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{instance}}:channels" metric: rabbitmq_channelsTotal refId: A step: 2 thresholds: [] timeFrom: timeShift: title: Channels tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus decimals: 0 fill: 1 id: 3 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 4 stack: false steppedLine: false targets: - expr: rabbitmq_consumersTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{instance}}:consumers" metric: rabbitmq_consumersTotal refId: A step: 2 thresholds: [] timeFrom: timeShift: title: Consumers tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus decimals: 0 fill: 1 id: 5 legend: avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 4 stack: false steppedLine: false targets: - expr: rabbitmq_connectionsTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{instance}}:connections" metric: rabbitmq_connectionsTotal refId: A step: 2 thresholds: [] timeFrom: timeShift: title: Connections tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus fill: 1 id: 7 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 4 stack: false steppedLine: false targets: - expr: rabbitmq_queuesTotal{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{instance}}:queues" metric: rabbitmq_queuesTotal refId: A step: 2 thresholds: [] timeFrom: timeShift: title: Queues tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus decimals: 0 fill: 1 id: 8 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: false steppedLine: false targets: - expr: sum by (vhost)(rabbitmq_queue_messages_ready{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) intervalFactor: 2 legendFormat: "{{vhost}}:ready" metric: rabbitmq_queue_messages_ready refId: A step: 2 - expr: sum by (vhost)(rabbitmq_queue_messages_published_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) intervalFactor: 2 legendFormat: "{{vhost}}:published" metric: rabbitmq_queue_messages_published_total refId: B step: 2 - expr: sum by (vhost)(rabbitmq_queue_messages_delivered_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) intervalFactor: 2 legendFormat: "{{vhost}}:delivered" metric: rabbitmq_queue_messages_delivered_total refId: C step: 2 - expr: sum by (vhost)(rabbitmq_queue_messages_unacknowledged{application="prometheus_rabbitmq_exporter",release_group="$rabbit"}) intervalFactor: 2 legendFormat: "{{vhost}}:unack" metric: ack refId: D step: 2 thresholds: [] timeFrom: timeShift: title: Messages/host tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus decimals: 0 fill: 1 id: 2 legend: alignAsTable: true avg: false current: true max: false min: false rightSide: false show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: false steppedLine: false targets: - expr: rabbitmq_queue_messages{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{queue}}:{{durable}}" metric: rabbitmq_queue_messages refId: A step: 2 thresholds: [] timeFrom: timeShift: title: Messages / Queue tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus fill: 1 id: 9 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: false steppedLine: false targets: - expr: rabbitmq_node_mem_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{node}}:used" metric: rabbitmq_node_mem_used refId: A step: 2 - expr: rabbitmq_node_mem_limit{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{node}}:limit" metric: node_mem refId: B step: 2 thresholds: [] timeFrom: timeShift: title: Memory tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: decbytes label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus fill: 1 id: 10 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: false steppedLine: false targets: - expr: rabbitmq_fd_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{node}}:used" metric: '' refId: A step: 2 - expr: rabbitmq_fd_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{node}}:total" metric: node_mem refId: B step: 2 thresholds: [] timeFrom: timeShift: title: FIle descriptors tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - aliasColors: {} bars: false datasource: prometheus fill: 1 id: 11 legend: alignAsTable: true avg: true current: true max: true min: true show: true total: false values: true lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] span: 6 stack: false steppedLine: false targets: - expr: rabbitmq_sockets_used{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{node}}:used" metric: '' refId: A step: 2 - expr: rabbitmq_sockets_total{application="prometheus_rabbitmq_exporter",release_group="$rabbit"} intervalFactor: 2 legendFormat: "{{node}}:total" metric: '' refId: B step: 2 thresholds: [] timeFrom: timeShift: title: Sockets tooltip: shared: true sort: 0 value_type: individual transparent: false type: graph xaxis: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: Dashboard Row titleSize: h6 schemaVersion: 14 style: dark tags: [] templating: list: - current: tags: [] text: Prometheus value: Prometheus hide: 0 label: name: datasource options: [] query: prometheus refresh: 1 regex: '' type: datasource - current: {} hide: 0 label: null name: rabbit options: [] type: query query: label_values(rabbitmq_up, release_group) refresh: 1 sort: 1 datasource: prometheus time: from: now-5m to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d timezone: browser title: RabbitMQ Metrics version: 17 description: 'Basic rabbitmq host stats: Node Stats, Exchanges, Channels, Consumers, Connections, Queues, Messages, Messages per Queue, Memory, File Descriptors, Sockets.' kubernetes_capacity_planning: __inputs: - name: prometheus label: prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.4.1 - type: panel id: graph name: Graph version: '' - type: datasource id: prometheus name: Prometheus version: 1.0.0 - type: panel id: singlestat name: Singlestat version: '' annotations: list: [] description: '' editable: true gnetId: 22 graphTooltip: 0 hideControls: false id: links: [] refresh: false rows: - collapse: false height: 250px panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 3 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(rate(node_cpu{mode="idle"}[2m])) * 100 hide: false intervalFactor: 10 legendFormat: '' refId: A step: 50 thresholds: [] timeFrom: timeShift: title: Idle cpu tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: percent label: cpu usage logBase: 1 max: min: 0 show: true - format: short label: logBase: 1 max: min: show: true - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 9 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(node_load1) intervalFactor: 4 legendFormat: load 1m refId: A step: 20 target: '' - expr: sum(node_load5) intervalFactor: 4 legendFormat: load 5m refId: B step: 20 target: '' - expr: sum(node_load15) intervalFactor: 4 legendFormat: load 15m refId: C step: 20 target: '' thresholds: [] timeFrom: timeShift: title: System load tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: percentunit label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 - collapse: false height: 250px panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 4 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"} yaxis: 2 spaceLength: 10 span: 9 stack: true steppedLine: false targets: - expr: sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) - sum(node_memory_Cached) intervalFactor: 2 legendFormat: memory usage metric: memo refId: A step: 10 target: '' - expr: sum(node_memory_Buffers) interval: '' intervalFactor: 2 legendFormat: memory buffers metric: memo refId: B step: 10 target: '' - expr: sum(node_memory_Cached) interval: '' intervalFactor: 2 legendFormat: memory cached metric: memo refId: C step: 10 target: '' - expr: sum(node_memory_MemFree) interval: '' intervalFactor: 2 legendFormat: memory free metric: memo refId: D step: 10 target: '' thresholds: [] timeFrom: timeShift: title: Memory usage tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 5 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) - sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100" intervalFactor: 2 metric: '' refId: A step: 60 target: '' thresholds: 80, 90 title: Memory usage type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 - collapse: false height: 246 panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 6 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: read yaxis: 1 - alias: '{instance="172.17.0.1:9100"}' yaxis: 2 - alias: io time yaxis: 2 spaceLength: 10 span: 9 stack: false steppedLine: false targets: - expr: sum(rate(node_disk_bytes_read[5m])) hide: false intervalFactor: 4 legendFormat: read refId: A step: 20 target: '' - expr: sum(rate(node_disk_bytes_written[5m])) intervalFactor: 4 legendFormat: written refId: B step: 20 - expr: sum(rate(node_disk_io_time_ms[5m])) intervalFactor: 4 legendFormat: io time refId: C step: 20 thresholds: [] timeFrom: timeShift: title: Disk I/O tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: ms label: logBase: 1 max: min: show: true - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percentunit gauge: maxValue: 1 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 12 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"})) / sum(node_filesystem_size{device!="rootfs"}) intervalFactor: 2 refId: A step: 60 target: '' thresholds: 0.75, 0.9 title: Disk space usage type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 - collapse: false height: 250px panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 8 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: 'transmitted ' yaxis: 2 spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(rate(node_network_receive_bytes{device!~"lo"}[5m])) hide: false intervalFactor: 2 legendFormat: '' refId: A step: 10 target: '' thresholds: [] timeFrom: timeShift: title: Network received tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: bytes label: logBase: 1 max: min: show: true - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 10 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: 'transmitted ' yaxis: 2 spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum(rate(node_network_transmit_bytes{device!~"lo"}[5m])) hide: false intervalFactor: 2 legendFormat: '' refId: B step: 10 target: '' thresholds: [] timeFrom: timeShift: title: Network transmitted tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: bytes label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 - collapse: false height: 276 panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 11 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 9 stack: false steppedLine: false targets: - expr: sum(kube_pod_info) format: time_series intervalFactor: 2 legendFormat: Current number of Pods refId: A step: 10 - expr: sum(kube_node_status_capacity_pods) format: time_series intervalFactor: 2 legendFormat: Maximum capacity of pods refId: B step: 10 thresholds: [] timeFrom: timeShift: title: Cluster Pod Utilization tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 7 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods) * 100 format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 60 target: '' thresholds: '80,90' title: Pod Utilization type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current repeat: repeatIteration: repeatRowId: showTitle: false title: Dashboard Row titleSize: h6 schemaVersion: 14 style: dark tags: [] templating: list: [] time: from: now-1h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d timezone: browser title: Kubernetes Capacity Planning version: 4 inputs: - name: prometheus pluginId: prometheus type: datasource value: prometheus overwrite: true kubernetes_cluster_health: __inputs: - name: prometheus label: prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.4.1 - type: datasource id: prometheus name: Prometheus version: 1.0.0 - type: panel id: singlestat name: Singlestat version: '' annotations: list: [] editable: true gnetId: graphTooltip: 0 hideControls: false id: links: [] rows: - collapse: false height: 254 panels: - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 1 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: sum(up{job=~"kube-apiserver|kube-scheduler|kube-controller-manager"} == 0) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '1,3' title: Control Plane Components Down type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: Everything UP and healthy value: 'null' - op: "=" text: '' value: '' valueName: avg - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 2 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: sum(ALERTS{alertstate="firing",alertname!="DeadMansSwitch"}) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '3,5' title: Alerts Firing type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: '0' value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 3 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: sum(ALERTS{alertstate="pending",alertname!="DeadMansSwitch"}) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '3,5' title: Alerts Pending type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: '0' value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 4 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: count(increase(kube_pod_container_status_restarts[1h]) > 5) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '1,3' title: Crashlooping Pods type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: '0' value: 'null' valueName: current repeat: repeatIteration: repeatRowId: showTitle: false title: Dashboard Row titleSize: h6 - collapse: false height: 250 panels: - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 5 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: sum(kube_node_status_condition{condition="Ready",status!="true"}) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '1,3' title: Node Not Ready type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 6 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: sum(kube_node_status_condition{condition="DiskPressure",status="true"}) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '1,3' title: Node Disk Pressure type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 7 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: sum(kube_node_status_condition{condition="MemoryPressure",status="true"}) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '1,3' title: Node Memory Pressure type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 8 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: sum(kube_node_spec_unschedulable) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '1,3' title: Nodes Unschedulable type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current repeat: repeatIteration: repeatRowId: showTitle: false title: Dashboard Row titleSize: h6 schemaVersion: 14 style: dark tags: [] templating: list: [] time: from: now-6h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d timezone: '' title: Kubernetes Cluster Health version: 9 inputs: - name: prometheus pluginId: prometheus type: datasource value: prometheus overwrite: true kubernetes_cluster_status: __inputs: - name: prometheus label: prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.4.1 - type: datasource id: prometheus name: Prometheus version: 1.0.0 - type: panel id: singlestat name: Singlestat version: '' annotations: list: [] editable: true gnetId: graphTooltip: 0 hideControls: false id: links: [] rows: - collapse: false height: 129 panels: - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 5 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 6 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: sum(up{job=~"apiserver|kube-scheduler|kube-controller-manager"} == 0) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '1,3' title: Control Plane UP type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: UP value: 'null' valueName: total - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 6 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 6 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: sum(ALERTS{alertstate="firing",alertname!="DeadMansSwitch"}) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '3,5' title: Alerts Firing type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: '0' value: 'null' valueName: current repeat: repeatIteration: repeatRowId: showTitle: true title: Cluster Health titleSize: h6 - collapse: false height: 168 panels: - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus decimals: format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 1 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: (sum(up{job="apiserver"} == 1) / count(up{job="apiserver"})) * 100 format: time_series interval: '' intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '50,80' title: API Servers UP type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus decimals: format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 2 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: (sum(up{job="kube-controller-manager-discovery"} == 1) / count(up{job="kube-controller-manager-discovery"})) * 100 format: time_series interval: '' intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '50,80' title: Controller Managers UP type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus decimals: format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 3 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: (sum(up{job="kube-scheduler-discovery"} == 1) / count(up{job="kube-scheduler-discovery"})) * 100 format: time_series interval: '' intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '50,80' title: Schedulers UP type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current - cacheTimeout: colorBackground: false colorValue: true colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus decimals: format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true hideTimeOverride: false id: 4 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: count(increase(kube_pod_container_status_restarts{namespace=~"kube-system|tectonic-system"}[1h]) > 5) format: time_series interval: '' intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '1,3' title: Crashlooping Control Plane Pods type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: '0' value: 'null' valueName: current repeat: repeatIteration: repeatRowId: showTitle: true title: Control Plane Status titleSize: h6 - collapse: false height: 158 panels: - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 8 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: sum(100 - (avg by (instance) (rate(node_cpu{job="node-exporter",mode="idle"}[5m])) * 100)) / count(node_cpu{job="node-exporter",mode="idle"}) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '80,90' title: CPU Utilization type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 7 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) - sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100" format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '80,90' title: Memory Utilization type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 9 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: (sum(node_filesystem_size{device!="rootfs"}) - sum(node_filesystem_free{device!="rootfs"})) / sum(node_filesystem_size{device!="rootfs"}) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '80,90' title: Filesystem Utilization type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 10 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: 100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods) * 100 format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '80,90' title: Pod Utilization type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg repeat: repeatIteration: repeatRowId: showTitle: true title: Capacity Planing titleSize: h6 schemaVersion: 14 style: dark tags: [] templating: list: [] time: from: now-6h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d timezone: '' title: Kubernetes Cluster Status version: 3 inputs: - name: prometheus pluginId: prometheus type: datasource value: prometheus overwrite: true kubernetes_control_plane: __inputs: - name: prometheus label: prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.4.1 - type: panel id: graph name: Graph version: '' - type: datasource id: prometheus name: Prometheus version: 1.0.0 - type: panel id: singlestat name: Singlestat version: '' annotations: list: [] editable: true gnetId: graphTooltip: 0 hideControls: false id: links: [] rows: - collapse: false height: 250px panels: - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 1 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: (sum(up{job="apiserver"} == 1) / sum(up{job="apiserver"})) * 100 format: time_series intervalFactor: 2 refId: A step: 600 thresholds: '50,80' title: API Servers UP type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 2 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: (sum(up{job="kube-controller-manager-discovery"} == 1) / sum(up{job="kube-controller-manager-discovery"})) * 100 format: time_series intervalFactor: 2 refId: A step: 600 thresholds: '50,80' title: Controller Managers UP type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(245, 54, 54, 0.9) - rgba(237, 129, 40, 0.89) - rgba(50, 172, 45, 0.97) datasource: prometheus format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 3 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: (sum(up{job="kube-scheduler-discovery"} == 1) / sum(up{job="kube-scheduler-discovery"})) * 100 format: time_series intervalFactor: 2 refId: A step: 600 thresholds: '50,80' title: Schedulers UP type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 4 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: max(sum by(instance) (rate(apiserver_request_count{code=~"5.."}[5m])) / sum by(instance) (rate(apiserver_request_count[5m]))) * 100 format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 600 thresholds: '5,10' title: API Server Request Error Rate type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: '0' value: 'null' valueName: avg repeat: repeatIteration: repeatRowId: showTitle: false title: Dashboard Row titleSize: h6 - collapse: false height: 250 panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 7 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 12 stack: false steppedLine: false targets: - expr: sum by(verb) (rate(apiserver_latency_seconds:quantile[5m]) >= 0) format: time_series intervalFactor: 2 legendFormat: '' refId: A step: 30 thresholds: [] timeFrom: timeShift: title: API Server Request Latency tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: Dashboard Row titleSize: h6 - collapse: false height: 250 panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 5 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: cluster:scheduler_e2e_scheduling_latency_seconds:quantile format: time_series intervalFactor: 2 refId: A step: 60 thresholds: [] timeFrom: timeShift: title: End to end scheduling latency tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: dtdurations label: logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus fill: 1 id: 6 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 1 links: [] nullPointMode: 'null' percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: sum by(instance) (rate(apiserver_request_count{code!~"2.."}[5m])) format: time_series intervalFactor: 2 legendFormat: Error Rate refId: A step: 60 - expr: sum by(instance) (rate(apiserver_request_count[5m])) format: time_series intervalFactor: 2 legendFormat: Request Rate refId: B step: 60 thresholds: [] timeFrom: timeShift: title: API Server Request Rates tooltip: shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: Dashboard Row titleSize: h6 schemaVersion: 14 style: dark tags: [] templating: list: [] time: from: now-6h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d timezone: '' title: Kubernetes Control Plane Status version: 3 inputs: - name: prometheus pluginId: prometheus type: datasource value: prometheus overwrite: true nodes: __inputs: - name: prometheus label: prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.4.1 - type: panel id: graph name: Graph version: '' - type: datasource id: prometheus name: Prometheus version: 1.0.0 - type: panel id: singlestat name: Singlestat version: '' annotations: list: [] description: Dashboard to get an overview of one server editable: true gnetId: 22 graphTooltip: 0 hideControls: false id: links: [] refresh: false rows: - collapse: false height: 250px panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 3 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: 100 - (avg by (cpu) (irate(node_cpu{mode="idle", instance="$server"}[5m])) * 100) hide: false intervalFactor: 10 legendFormat: "{{cpu}}" refId: A step: 50 thresholds: [] timeFrom: timeShift: title: Idle cpu tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: percent label: cpu usage logBase: 1 max: 100 min: 0 show: true - format: short label: logBase: 1 max: min: show: true - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 9 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: node_load1{instance="$server"} intervalFactor: 4 legendFormat: load 1m refId: A step: 20 target: '' - expr: node_load5{instance="$server"} intervalFactor: 4 legendFormat: load 5m refId: B step: 20 target: '' - expr: node_load15{instance="$server"} intervalFactor: 4 legendFormat: load 15m refId: C step: 20 target: '' thresholds: [] timeFrom: timeShift: title: System load tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: percentunit label: logBase: 1 max: min: show: true - format: short label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 - collapse: false height: 250px panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 4 legend: alignAsTable: false avg: false current: false hideEmpty: false hideZero: false max: false min: false rightSide: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: node_memory_SwapFree{instance="172.17.0.1:9100",job="prometheus"} yaxis: 2 spaceLength: 10 span: 9 stack: true steppedLine: false targets: - expr: node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"} - node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"} hide: false interval: '' intervalFactor: 2 legendFormat: memory used metric: '' refId: C step: 10 - expr: node_memory_Buffers{instance="$server"} interval: '' intervalFactor: 2 legendFormat: memory buffers metric: '' refId: E step: 10 - expr: node_memory_Cached{instance="$server"} intervalFactor: 2 legendFormat: memory cached metric: '' refId: F step: 10 - expr: node_memory_MemFree{instance="$server"} intervalFactor: 2 legendFormat: memory free metric: '' refId: D step: 10 thresholds: [] timeFrom: timeShift: title: Memory usage tooltip: msResolution: false shared: true sort: 0 value_type: individual type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: '0' show: true - format: short label: logBase: 1 max: min: show: true - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percent gauge: maxValue: 100 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 5 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: ((node_memory_MemTotal{instance="$server"} - node_memory_MemFree{instance="$server"} - node_memory_Buffers{instance="$server"} - node_memory_Cached{instance="$server"}) / node_memory_MemTotal{instance="$server"}) * 100 intervalFactor: 2 refId: A step: 60 target: '' thresholds: 80, 90 title: Memory usage type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: avg repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 - collapse: false height: 250px panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 6 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: read yaxis: 1 - alias: '{instance="172.17.0.1:9100"}' yaxis: 2 - alias: io time yaxis: 2 spaceLength: 10 span: 9 stack: false steppedLine: false targets: - expr: sum by (instance) (rate(node_disk_bytes_read{instance="$server"}[2m])) hide: false intervalFactor: 4 legendFormat: read refId: A step: 20 target: '' - expr: sum by (instance) (rate(node_disk_bytes_written{instance="$server"}[2m])) intervalFactor: 4 legendFormat: written refId: B step: 20 - expr: sum by (instance) (rate(node_disk_io_time_ms{instance="$server"}[2m])) intervalFactor: 4 legendFormat: io time refId: C step: 20 thresholds: [] timeFrom: timeShift: title: Disk I/O tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: ms label: logBase: 1 max: min: show: true - cacheTimeout: colorBackground: false colorValue: false colors: - rgba(50, 172, 45, 0.97) - rgba(237, 129, 40, 0.89) - rgba(245, 54, 54, 0.9) datasource: prometheus editable: true error: false format: percentunit gauge: maxValue: 1 minValue: 0 show: true thresholdLabels: false thresholdMarkers: true id: 7 interval: links: [] mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 3 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - expr: (sum(node_filesystem_size{device!="rootfs",instance="$server"}) - sum(node_filesystem_free{device!="rootfs",instance="$server"})) / sum(node_filesystem_size{device!="rootfs",instance="$server"}) intervalFactor: 2 refId: A step: 60 target: '' thresholds: 0.75, 0.9 title: Disk space usage type: singlestat valueFontSize: 80% valueMaps: - op: "=" text: N/A value: 'null' valueName: current repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 - collapse: false height: 250px panels: - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 8 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: 'transmitted ' yaxis: 2 spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: rate(node_network_receive_bytes{instance="$server",device!~"lo"}[5m]) hide: false intervalFactor: 2 legendFormat: "{{device}}" refId: A step: 10 target: '' thresholds: [] timeFrom: timeShift: title: Network received tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: bytes label: logBase: 1 max: min: show: true - alerting: {} aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 1 grid: {} id: 10 legend: avg: false current: false max: false min: false show: true total: false values: false lines: true linewidth: 2 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: - alias: 'transmitted ' yaxis: 2 spaceLength: 10 span: 6 stack: false steppedLine: false targets: - expr: rate(node_network_transmit_bytes{instance="$server",device!~"lo"}[5m]) hide: false intervalFactor: 2 legendFormat: "{{device}}" refId: B step: 10 target: '' thresholds: [] timeFrom: timeShift: title: Network transmitted tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: bytes label: logBase: 1 max: min: show: true - format: bytes label: logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: false title: New row titleSize: h6 schemaVersion: 14 style: dark tags: [] templating: list: - allValue: current: {} datasource: prometheus hide: 0 includeAll: false label: multi: false name: server options: [] query: label_values(node_boot_time, instance) refresh: 1 regex: '' sort: 0 tagValuesQuery: '' tags: [] tagsQuery: '' type: query useTags: false time: from: now-1h to: now timepicker: refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d timezone: browser title: Nodes version: 2 inputs: - name: prometheus pluginId: prometheus type: datasource value: prometheus overwrite: true openstack_control_plane: __inputs: - name: prometheus label: prometheus description: '' type: datasource pluginId: prometheus pluginName: Prometheus __requires: - type: grafana id: grafana name: Grafana version: 4.5.2 - type: panel id: graph name: Graph version: '' - type: datasource id: prometheus name: Prometheus version: 1.0.0 - type: panel id: singlestat name: Singlestat version: '' - type: panel id: text name: Text version: '' annotations: list: [] editable: true gnetId: graphTooltip: 1 hideControls: false id: links: [] refresh: 1m rows: - collapse: false height: 250px panels: - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(200, 54, 35, 0.88) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 24 interval: "> 60s" links: - dashboard: Keystone name: Drilldown dashboard title: Keystone type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' dsType: influxdb expr: check_keystone_api{job="openstack-metrics", region="$region"} fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: Keystone type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(200, 54, 35, 0.88) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 23 interval: "> 60s" links: - dashboard: Glance name: Drilldown dashboard title: Glance type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' dsType: influxdb expr: check_glance_api{job="openstack-metrics", region="$region"} fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: Glance type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(202, 58, 40, 0.86) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 22 interval: "> 60s" links: - dashboard: Heat name: Drilldown dashboard title: Heat type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' dsType: influxdb expr: check_heat_api{job="openstack-metrics", region="$region"} fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: Heat type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(200, 54, 35, 0.88) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 21 interval: "> 60s" links: - dashboard: Neutron name: Drilldown dashboard title: Neutron type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' dsType: influxdb expr: check_neutron_api{job="openstack-metrics", region="$region"} fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: Neutron type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(208, 53, 34, 0.82) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 5 interval: "> 60s" links: - dashboard: Nova name: Drilldown dashboard title: Nova type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' dsType: influxdb expr: check_nova_api{job="openstack-metrics", region="$region"} fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: Nova type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(200, 54, 35, 0.88) - rgba(118, 245, 40, 0.73) - rgba(225, 177, 40, 0.59) datasource: prometheus editable: true error: false format: none gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 25 interval: "> 60s" links: - dashboard: Ceph name: Drilldown dashboard title: Ceph type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value condition: '' dsType: influxdb expr: check_swift_api{job="openstack-metrics", region="$region"} fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] groupby_field: '' interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 thresholds: '1,2' title: Ceph type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: CRIT value: '0' - op: "=" text: OK value: '1' - op: "=" text: UNKW value: '2' valueName: current - content: '' editable: true error: false id: 20 links: [] mode: markdown span: 1 style: {} title: '' type: text - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(71, 212, 59, 0.4) - rgba(245, 150, 40, 0.73) - rgba(225, 40, 40, 0.59) datasource: prometheus editable: true error: false format: short gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 16 interval: ">60s" links: - dashboard: RabbitMQ name: Drilldown dashboard title: RabbitMQ type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value dsType: influxdb expr: '' fill: '' format: time_series function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] interval: '' intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series thresholds: '' title: RabbitMQ type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: OKAY value: '0' - op: "=" text: WARN value: '1' - op: "=" text: UNKW value: '2' - op: "=" text: CRIT value: '3' - op: "=" text: DOWN value: '4' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(71, 212, 59, 0.4) - rgba(245, 150, 40, 0.73) - rgba(225, 40, 40, 0.59) datasource: prometheus editable: true error: false format: short gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 15 interval: ">60s" links: - dashboard: MySQL name: Drilldown dashboard title: MySQL type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value dsType: influxdb fill: '' function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] interval: '' policy: default rawQuery: false refId: A resultFormat: time_series thresholds: '' title: MySQL type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: OKAY value: '0' - op: "=" text: WARN value: '1' - op: "=" text: UNKW value: '2' - op: "=" text: CRIT value: '3' - op: "=" text: DOWN value: '4' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(71, 212, 59, 0.4) - rgba(245, 150, 40, 0.73) - rgba(225, 40, 40, 0.59) datasource: prometheus editable: true error: false format: short gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 18 interval: ">60s" links: - dashUri: db/apache dashboard: Apache name: Drilldown dashboard title: Apache type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value dsType: influxdb fill: '' function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] interval: '' policy: default rawQuery: false refId: A resultFormat: time_series thresholds: '' title: Apache type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: OKAY value: '0' - op: "=" text: WARN value: '1' - op: "=" text: UNKW value: '2' - op: "=" text: CRIT value: '3' - op: "=" text: DOWN value: '4' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(71, 212, 59, 0.4) - rgba(245, 150, 40, 0.73) - rgba(225, 40, 40, 0.59) datasource: prometheus editable: true error: false format: short gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 10 interval: ">60s" links: - dashUri: db/haproxy dashboard: HAProxy name: Drilldown dashboard title: HAProxy type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value dsType: influxdb fill: '' function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] interval: '' policy: default rawQuery: false refId: A resultFormat: time_series thresholds: '' title: haproxy type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: OKAY value: '0' - op: "=" text: WARN value: '1' - op: "=" text: UNKW value: '2' - op: "=" text: CRIT value: '3' - op: "=" text: DOWN value: '4' valueName: current - cacheTimeout: colorBackground: true colorValue: false colors: - rgba(71, 212, 59, 0.4) - rgba(245, 150, 40, 0.73) - rgba(225, 40, 40, 0.59) datasource: prometheus editable: true error: false format: short gauge: maxValue: 100 minValue: 0 show: false thresholdLabels: false thresholdMarkers: true id: 17 interval: ">60s" links: - dashUri: db/memcached dashboard: Memcached name: Drilldown dashboard title: Memcached type: dashboard mappingType: 1 mappingTypes: - name: value to text value: 1 - name: range to text value: 2 maxDataPoints: 100 nullPointMode: connected nullText: postfix: '' postfixFontSize: 50% prefix: '' prefixFontSize: 50% rangeMaps: - from: 'null' text: N/A to: 'null' span: 1 sparkline: fillColor: rgba(31, 118, 189, 0.18) full: false lineColor: rgb(31, 120, 193) show: false tableColumn: '' targets: - column: value dsType: influxdb fill: '' function: last groupBy: - params: - "$interval" type: time - params: - 'null' type: fill groupByTags: [] interval: '' policy: default rawQuery: false refId: A resultFormat: time_series thresholds: '' title: memcached type: singlestat valueFontSize: 50% valueMaps: - op: "=" text: no data value: 'null' - op: "=" text: OKAY value: '0' - op: "=" text: WARN value: '1' - op: "=" text: UNKW value: '2' - op: "=" text: CRIT value: '3' - op: "=" text: DOWN value: '4' valueName: current repeat: repeatIteration: repeatRowId: showTitle: true title: OpenStack Services titleSize: h6 - collapse: false height: 250px panels: - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 0 grid: {} id: 11 interval: "> 60s" legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - alias: free column: value dsType: influxdb expr: total_used_vcpus{job="openstack-metrics", region="$region"} + total_free_vcpus{job="openstack-metrics", region="$region"} format: time_series function: min groupBy: - params: - "$interval" type: time - params: - '0' type: fill groupByTags: [] intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 - alias: used column: value dsType: influxdb expr: total_used_vcpus{job="openstack-metrics", region="$region"} format: time_series function: max groupBy: - params: - "$interval" type: time - params: - '0' type: fill groupByTags: [] intervalFactor: 2 policy: default rawQuery: false refId: B resultFormat: time_series step: 120 thresholds: [] timeFrom: timeShift: title: VCPUs (total vs used) tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: short logBase: 1 max: min: 0 show: true - format: short logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 0 grid: {} id: 12 interval: "> 60s" legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - alias: free column: value dsType: influxdb expr: total_used_ram_MB{job="openstack-metrics", region="$region"} + total_free_ram_MB{job="openstack-metrics", region="$region"} format: time_series function: mean groupBy: - params: - "$interval" type: time - params: - '0' type: fill groupByTags: [] intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 - alias: used column: value dsType: influxdb expr: total_used_ram_MB{job="openstack-metrics", region="$region"} format: time_series function: mean groupBy: - params: - "$interval" type: time - params: - '0' type: fill groupByTags: [] interval: '' intervalFactor: 2 policy: default rawQuery: false refId: B resultFormat: time_series step: 120 thresholds: [] timeFrom: timeShift: title: RAM (total vs used) tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: mbytes label: '' logBase: 1 max: min: 0 show: true - format: short logBase: 1 max: min: show: true - aliasColors: {} bars: false dashLength: 10 dashes: false datasource: prometheus editable: true error: false fill: 0 grid: {} id: 13 interval: "> 60s" legend: avg: false current: false max: false min: false show: false total: false values: false lines: true linewidth: 1 links: [] nullPointMode: connected percentage: false pointradius: 5 points: false renderer: flot seriesOverrides: [] spaceLength: 10 span: 4 stack: false steppedLine: false targets: - alias: free column: value dsType: influxdb expr: total_used_disk_GB{job="openstack-metrics", region="$region"} + total_free_disk_GB{job="openstack-metrics", region="$region"} format: time_series function: mean groupBy: - params: - "$interval" type: time - params: - '0' type: fill groupByTags: [] intervalFactor: 2 policy: default rawQuery: false refId: A resultFormat: time_series step: 120 - alias: used column: value dsType: influxdb expr: total_used_disk_GB{job="openstack-metrics", region="$region"} format: time_series function: mean groupBy: - params: - "$interval" type: time - params: - '0' type: fill groupByTags: [] intervalFactor: 2 policy: default rawQuery: false refId: B resultFormat: time_series step: 120 thresholds: [] timeFrom: timeShift: title: Disk (used vs total) tooltip: msResolution: false shared: true sort: 0 value_type: cumulative type: graph xaxis: buckets: mode: time name: show: true values: [] yaxes: - format: gbytes logBase: 1 max: min: 0 show: true - format: short logBase: 1 max: min: show: true repeat: repeatIteration: repeatRowId: showTitle: true title: Virtual compute resources titleSize: h6 schemaVersion: 14 style: dark tags: [] templating: enable: true list: - allValue: current: {} datasource: prometheus hide: 0 includeAll: false label: multi: false name: region options: [] query: label_values(openstack_exporter_cache_refresh_duration_seconds, region) refresh: 1 regex: '' sort: 0 tagValuesQuery: '' tags: [] tagsQuery: '' type: query useTags: false time: from: now-1h to: now timepicker: collapse: false enable: true notice: false now: true refresh_intervals: - 5s - 10s - 30s - 1m - 5m - 15m - 30m - 1h - 2h - 1d status: Stable time_options: - 5m - 15m - 1h - 6h - 12h - 24h - 2d - 7d - 30d type: timepicker timezone: browser title: Openstack Main1 version: 2