diff --git a/doc/source/methodologies/index.rst b/doc/source/methodologies/index.rst index 2e13050..d69cf65 100644 --- a/doc/source/methodologies/index.rst +++ b/doc/source/methodologies/index.rst @@ -7,7 +7,8 @@ Methodologies ======================= .. toctree:: - :maxdepth: 2 + :maxdepth: 4 tools hyper-scale + monitoring/index diff --git a/doc/source/methodologies/monitoring/configs/ccp/ccp.yaml b/doc/source/methodologies/monitoring/configs/ccp/ccp.yaml new file mode 100644 index 0000000..ca7aa81 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/ccp/ccp.yaml @@ -0,0 +1,15 @@ +builder: + push: true + no_cache: false +registry: + address: "172.20.8.35:5000/env-1" +repositories: + skip_empty: True +kubernetes: + server: http://172.20.9.234:8080 +--- +!include +- versions.yaml +- topology.yaml +- configs.yaml +- repos.yaml diff --git a/doc/source/methodologies/monitoring/configs/ccp/configs.yaml b/doc/source/methodologies/monitoring/configs/ccp/configs.yaml new file mode 100644 index 0000000..cf12e35 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/ccp/configs.yaml @@ -0,0 +1,38 @@ +configs: + private_interface: p1p1.602 + public_interface: p1p1.602 + ingress: + enabled: true + glance: + bootstrap: + enable: true +# nova: +# allocation_ratio: +# cpu: 16.0 + neutron: + physnets: + - name: "physnet1" + bridge_name: "br-ex" + interface: "p1p1.649" + flat: true + vlan_range: false + bootstrap: + internal: + enable: true + external: + enable: true + net_name: ext-net + subnet_name: ext-subnet + physnet: physnet1 + network: 10.144.0.0/12 + gateway: 10.144.0.1 + nameserver: 10.144.0.1 + pool: + start: 10.144.1.0 + end: 10.159.255.250 + keystone: + debug: true + heat: + debug: true + memcached: + ram: 30720 diff --git a/doc/source/methodologies/monitoring/configs/ccp/deploy-ccp.sh b/doc/source/methodologies/monitoring/configs/ccp/deploy-ccp.sh new file mode 100755 index 0000000..fe3cfee --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/ccp/deploy-ccp.sh @@ -0,0 +1,78 @@ +#!/bin/bash +set -ex +if [ -z "$1" ]; then + echo "Please set number of env as argument" + exit 1 +fi + +DEPLOY_TIMEOUT=1200 +export SSH_USER="root" +export SSH_PASS="r00tme" +cd $(dirname $(realpath $0)) + +NODE1="172.20.8.6${1}" + +SSH_OPTS="-q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" +SSH_CMD="sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${NODE1}" +SCP_CMD="sshpass -p ${SSH_PASS} scp ${SSH_OPTS}" + +if [ ! -d ./env-${1} ]; then + echo "Yaml files for env-${1} is not found" + echo "Please, create and commit deployment/ccp/rackspace/env-${1}/configs with correct yaml files" + echo "Main file should be deployment/ccp/rackspace/env-${1}/configs/ccp.yaml" + exit 1 +fi + + +$SCP_CMD ./env-${1}/configs/ccp.yaml ${SSH_USER}@${NODE1}:/root/.ccp.yaml +for i in $(ls -1 ./env-${1}/configs/ | grep -v ccp.yaml ); do + $SCP_CMD ./env-${1}/configs/${i} ${SSH_USER}@${NODE1}:/root/ +done + +$SSH_CMD "rm -rf /root/fuel-ccp; cd /root; git clone https://git.openstack.org/openstack/fuel-ccp" +$SSH_CMD "apt-get -y install python-pip" +$SSH_CMD "/usr/bin/pip install --upgrade pip" +$SSH_CMD "/usr/bin/pip install /root/fuel-ccp/" + +CCP_STATUS=$($SSH_CMD "/usr/local/bin/ccp status") +if [ -n "$CCP_STATUS" ]; then + echo "Active deployment was found" + echo "$CCP_STATUS" + echo "Please execute 'ccp cleanup' and 'rm -rf /var/lib/mysql/*' on the ${NODE1} manually" + exit 1 +fi + +$SSH_CMD "echo '172.20.8.6${1} cloudformation.ccp.external console.ccp.external identity.ccp.external object-store.ccp.external compute.ccp.external orchestration.ccp.external network.ccp.external image.ccp.external volume.ccp.external horizon.ccp.external' >> /etc/hosts" +# $SSH_CMD kubectl delete configmaps traefik-conf -n kube-system +# $SSH_CMD kubectl delete service traefik -n kube-system +# $SSH_CMD kubectl delete secret traefik-cert -n kube-system +# $SSH_CMD kubectl delete deployment traefik -n kube-system +$SSH_CMD "/root/fuel-ccp/tools/ingress/deploy-ingress-controller.sh -i 172.20.8.6${1}" || echo "Already configured" +$SSH_CMD "echo 172.20.8.6${1} \$(ccp domains list -f value) >> /etc/hosts" +$SSH_CMD "openssl s_client -status -connect identity.ccp.external:8443 < /dev/null 2>&1 | awk 'BEGIN {pr=0;} /-----BEGIN CERTIFICATE-----/ {pr=1;} {if (pr) print;} /-----END CERTIFICATE-----/ {exit;}' >> /usr/local/lib/python2.7/dist-packages/requests/cacert.pem" +$SSH_CMD "openssl s_client -status -connect identity.ccp.external:8443 < /dev/null 2>&1 | awk 'BEGIN {pr=0;} /-----BEGIN CERTIFICATE-----/ {pr=1;} {if (pr) print;} /-----END CERTIFICATE-----/ {exit;}' > /usr/share/ca-certificates/ingress.crt" +$SSH_CMD "cp /usr/share/ca-certificates/ingress.crt /usr/local/share/ca-certificates/" +$SSH_CMD "update-ca-certificates" +if [ $($SSH_CMD "curl -s 'https://identity.ccp.external:8443/' > /dev/null; echo \$?") != 0 ] +then + echo "keystone is unreachable check https://identity.ccp.external:8443" + exit 1 +fi + +#$SSH_CMD "/root/fuel-ccp/tools/registry/deploy-registry.sh" && +$SSH_CMD "/usr/local/bin/ccp fetch" +$SSH_CMD "/usr/local/bin/ccp build" +$SSH_CMD "/usr/local/bin/ccp deploy" + +DEPLOY_TIME=0 +while [ "$($SSH_CMD '/usr/local/bin/ccp status -s -f value' 2>/dev/null)" != "ok" ] +do + sleep 5 + DEPLOY_TIME=$((${DEPLOY_TIME} + 5)) + if [ $DEPLOY_TIME -ge $DEPLOY_TIMEOUT ]; then + echo "Deployment timeout" + exit 1 + fi +done + +$SSH_CMD "/usr/local/bin/ccp status" diff --git a/doc/source/methodologies/monitoring/configs/ccp/openrc-ccp b/doc/source/methodologies/monitoring/configs/ccp/openrc-ccp new file mode 100644 index 0000000..775d8fa --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/ccp/openrc-ccp @@ -0,0 +1,7 @@ +export OS_PROJECT_DOMAIN_NAME=default +export OS_USER_DOMAIN_NAME=default +export OS_PROJECT_NAME=admin +export OS_USERNAME=admin +export OS_PASSWORD=password +export OS_IDENTITY_API_VERSION=3 +export OS_AUTH_URL=https://identity.ccp.external:8443/v3 \ No newline at end of file diff --git a/doc/source/methodologies/monitoring/configs/ccp/repos.yaml b/doc/source/methodologies/monitoring/configs/ccp/repos.yaml new file mode 100644 index 0000000..d6138d9 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/ccp/repos.yaml @@ -0,0 +1,44 @@ +repositories: + repos: + - git_url: https://git.openstack.org/openstack/fuel-ccp-ceph + name: fuel-ccp-ceph + - git_url: https://git.openstack.org/openstack/fuel-ccp-cinder + name: fuel-ccp-cinder + - git_url: https://git.openstack.org/openstack/fuel-ccp-debian-base + name: fuel-ccp-debian-base + - git_url: https://git.openstack.org/openstack/fuel-ccp-entrypoint + name: fuel-ccp-entrypoint + - git_url: https://git.openstack.org/openstack/fuel-ccp-etcd + name: fuel-ccp-etcd + - git_url: https://git.openstack.org/openstack/fuel-ccp-glance + name: fuel-ccp-glance + - git_url: https://git.openstack.org/openstack/fuel-ccp-heat + name: fuel-ccp-heat + - git_url: https://git.openstack.org/openstack/fuel-ccp-horizon + name: fuel-ccp-horizon +# - git_url: https://git.openstack.org/openstack/fuel-ccp-ironic +# name: fuel-ccp-ironic + - git_url: https://git.openstack.org/openstack/fuel-ccp-keystone + name: fuel-ccp-keystone +# - git_url: https://git.openstack.org/openstack/fuel-ccp-mariadb +# name: fuel-ccp-mariadb + - git_url: https://git.openstack.org/openstack/fuel-ccp-galera + name: fuel-ccp-galera + - git_url: https://git.openstack.org/openstack/fuel-ccp-memcached + name: fuel-ccp-memcached +# - git_url: https://git.openstack.org/openstack/fuel-ccp-murano +# name: fuel-ccp-murano + - git_url: https://git.openstack.org/openstack/fuel-ccp-neutron + name: fuel-ccp-neutron + - git_url: https://git.openstack.org/openstack/fuel-ccp-nova + name: fuel-ccp-nova + - git_url: https://git.openstack.org/openstack/fuel-ccp-openstack-base + name: fuel-ccp-openstack-base + - git_url: https://git.openstack.org/openstack/fuel-ccp-rabbitmq + name: fuel-ccp-rabbitmq +# - git_url: https://git.openstack.org/openstack/fuel-ccp-sahara +# name: fuel-ccp-sahara +# - git_url: https://git.openstack.org/openstack/fuel-ccp-searchlight +# name: fuel-ccp-searchlight +# - git_url: https://git.openstack.org/openstack/fuel-ccp-stacklight +# name: fuel-ccp-stacklight diff --git a/doc/source/methodologies/monitoring/configs/ccp/topology.yaml b/doc/source/methodologies/monitoring/configs/ccp/topology.yaml new file mode 100644 index 0000000..f22cb0f --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/ccp/topology.yaml @@ -0,0 +1,77 @@ +nodes: +# node[1-3]: Kubernetes + node([4-6])$: # 4-6 + roles: + - controller + - openvswitch + node[7-9]$: # 7-9 + roles: + - rabbitmq + node10$: # 10 + roles: + - galera + node11$: # 11 + roles: + - heat + node(1[2-9])$: # 12-19 + roles: + - compute + - openvswitch + node[2-9][0-9]$: # 20-99 + roles: + - compute + - openvswitch + node(1[0-9][0-9])$: # 100-199 + roles: + - compute + - openvswitch + node200$: + roles: + - backup +replicas: + glance-api: 1 + glance-registry: 1 + keystone: 3 + nova-api: 3 + nova-scheduler: 3 + nova-conductor: 3 + neutron-server: 3 + neutron-metadata-agent: 3 + horizon: 3 + heat-api: 1 + heat-api-cfn: 1 + heat-engine: 1 +roles: + galera: + - galera + rabbitmq: + - rabbitmq + controller: + - etcd + - glance-api + - glance-registry + - horizon + - keystone + - memcached + - neutron-dhcp-agent + - neutron-l3-agent + - neutron-metadata-agent + - neutron-server + - nova-api + - nova-conductor + - nova-consoleauth + - nova-novncproxy + - nova-scheduler + compute: + - nova-compute + - nova-libvirt + openvswitch: + - neutron-openvswitch-agent + - openvswitch-db + - openvswitch-vswitchd + backup: + - backup + heat: + - heat-api + - heat-api-cfn + - heat-engine diff --git a/doc/source/methodologies/monitoring/configs/ccp/versions.yaml b/doc/source/methodologies/monitoring/configs/ccp/versions.yaml new file mode 100644 index 0000000..add8173 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/ccp/versions.yaml @@ -0,0 +1,71 @@ +images: + tag: newton +# image_specs: +# keystone: +# tag: newton + +# horizon: +# tag: newton + +# nova-upgrade: +# tag: newton +# nova-api: +# tag: newton +# nova-conductor: +# tag: newton +# nova-consoleauth: +# tag: newton +# nova-novncproxy: +# tag: newton +# nova-scheduler: +# tag: newton +# nova-compute: +# tag: newton +# nova-libvirt: +# tag: newton + +# neutron-dhcp-agent: +# tag: newton +# neutron-l3-agent: +# tag: newton +# neutron-metadata-agent: +# tag: newton +# neutron-server: +# tag: newton +# neutron-openvswitch-agent: +# tag: newton + +# glance-api: +# tag: newton +# glance-registry: +# tag: newton +# glance-upgrade: +# tag: newton +sources: + openstack/cinder: + git_ref: stable/newton + git_url: https://github.com/openstack/cinder.git + openstack/glance: + git_ref: stable/newton + git_url: https://github.com/openstack/glance.git + openstack/heat: + git_ref: stable/newton + git_url: https://github.com/openstack/heat.git + openstack/horizon: + git_ref: stable/newton + git_url: https://github.com/openstack/horizon.git + openstack/keystone: + git_ref: stable/newton + git_url: https://github.com/openstack/keystone.git + openstack/neutron: + git_ref: stable/newton + git_url: https://github.com/openstack/neutron.git + openstack/nova: + git_ref: stable/newton + git_url: https://github.com/openstack/nova.git + openstack/requirements: + git_ref: stable/newton + git_url: https://git.openstack.org/openstack/requirements.git + openstack/sahara-dashboard: + git_ref: stable/newton + git_url: https://git.openstack.org/openstack/sahara-dashboard.git diff --git a/doc/source/methodologies/monitoring/configs/dashboards/ETCD.json b/doc/source/methodologies/monitoring/configs/dashboards/ETCD.json new file mode 100644 index 0000000..4f1d4f0 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/dashboards/ETCD.json @@ -0,0 +1,2086 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS-SYSTEMS", + "label": "prometheus-systems", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.0.1" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "id": null, + "title": "ETCD", + "tags": [ + "intel" + ], + "style": "dark", + "timezone": "browser", + "editable": true, + "sharedCrosshair": false, + "hideControls": false, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "env", + "options": [], + "query": "label_values(env)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query" + } + ] + }, + "annotations": { + "list": [] + }, + "schemaVersion": 13, + "version": 14, + "links": [], + "gnetId": null, + "rows": [ + { + "title": "Dashboard Row", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "etcd_general_stats_members_count{env=\"$env\"}", + "intervalFactor": 2, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Members total", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_general_stats_dataset_size{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Size of data set", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_general_stats_total_keys_count{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Number of keys in a custer", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": false, + "titleSize": "h6", + "height": 250, + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Leader", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_leader_stats_sendBandwidthRate{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Leader send bandwidth rate", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_leader_stats_sendAppendRequestCnt{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Leader send append request", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_leader_stats_sendPkgRate{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Leader send packag rate", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": 250, + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Followers", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_follower_stats_recvBandwidthRate{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "metric": "", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Followers receive bandwidth rate", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_follower_stats_recvAppendRequestCnt{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "metric": "etcd_follower_stats_latency_from_leader_avg", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Followers append request", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_follower_stats_recvPkgRate{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "metric": "etcd_follower_stats_latency_from_leader_avg", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Followers received packets", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_follower_stats_latency_from_leader_avg{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "metric": "etcd_follower_stats_latency_from_leader_avg", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Avg. latency to followers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Store", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_getsSuccess{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Get success", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_getsFail{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Get fail", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_setsSuccess{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Set success", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_setsFail{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Set fail", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_deleteSuccess{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Delete success", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_deleteFail{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Delete fail", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_updateSuccess{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Update success", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_updateFail{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Update fail", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_createSuccess{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Create success", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_createFail{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Create fail", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_compareAndSwapSuccess{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Compare and swap success", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_compareAndSwapFail{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Compare and swap fail", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_compareAndDeleteSuccess{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Compare and delete success", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_compareAndDeleteFail{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Compare and delete fail", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_expireCount{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Expire count", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 26, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "etcd_store_stats_watchers{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Watchers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": 250, + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + } + ] +} \ No newline at end of file diff --git a/doc/source/methodologies/monitoring/configs/dashboards/Kibana_dashboard.json b/doc/source/methodologies/monitoring/configs/dashboards/Kibana_dashboard.json new file mode 100644 index 0000000..87b6830 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/dashboards/Kibana_dashboard.json @@ -0,0 +1,103 @@ +[ + { + "_id": "Response-Time-Dashboard", + "_type": "dashboard", + "_source": { + "title": "Response Time Dashboard", + "hits": 0, + "description": "", + "panelsJSON": "[{\"id\":\"Env-1-Response-Time\",\"type\":\"visualization\",\"panelIndex\":1,\"size_x\":3,\"size_y\":2,\"col\":1,\"row\":1},{\"id\":\"Env-2-Response-Time\",\"type\":\"visualization\",\"panelIndex\":2,\"size_x\":3,\"size_y\":2,\"col\":4,\"row\":1},{\"id\":\"Env-3-Response-Time\",\"type\":\"visualization\",\"panelIndex\":3,\"size_x\":3,\"size_y\":2,\"col\":7,\"row\":1},{\"id\":\"Env-4-Response-Time\",\"type\":\"visualization\",\"panelIndex\":4,\"size_x\":3,\"size_y\":2,\"col\":1,\"row\":3},{\"id\":\"Env-5-Response-Time\",\"type\":\"visualization\",\"panelIndex\":5,\"size_x\":3,\"size_y\":2,\"col\":4,\"row\":3},{\"id\":\"Env-6-Response-Time\",\"type\":\"visualization\",\"panelIndex\":6,\"size_x\":3,\"size_y\":2,\"col\":7,\"row\":3}]", + "optionsJSON": "{\"darkTheme\":true}", + "uiStateJSON": "{}", + "version": 1, + "timeRestore": false, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"filter\":[{\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}}}]}" + } + } + }, + { + "_id": "Env-1-Response-Time", + "_type": "visualization", + "_source": { + "title": "Env-1 Response Time", + "visState": "{\"title\":\"New Visualization\",\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":true,\"showCircles\":true,\"smoothLines\":false,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"ResponseTime\",\"customLabel\":\"Avg Response Time ms\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"Timestamp\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-1\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}" + } + } + }, + { + "_id": "Env-4-Response-Time", + "_type": "visualization", + "_source": { + "title": "Env-4 Response Time", + "visState": "{\"title\":\"Env-3 Response Time\",\"type\":\"line\",\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"drawLinesBetweenPoints\":true,\"interpolate\":\"linear\",\"radiusRatio\":9,\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"showCircles\":true,\"smoothLines\":false,\"times\":[],\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"ResponseTime\",\"customLabel\":\"Avg Response Time ms\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"Timestamp\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-4\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}" + } + } + }, + { + "_id": "Env-5-Response-Time", + "_type": "visualization", + "_source": { + "title": "Env-5 Response Time", + "visState": "{\"title\":\"Env-4 Response Time\",\"type\":\"line\",\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"drawLinesBetweenPoints\":true,\"interpolate\":\"linear\",\"radiusRatio\":9,\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"showCircles\":true,\"smoothLines\":false,\"times\":[],\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"ResponseTime\",\"customLabel\":\"Avg Response Time ms\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"Timestamp\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-5\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}" + } + } + }, + { + "_id": "Env-6-Response-Time", + "_type": "visualization", + "_source": { + "title": "Env-6 Response Time", + "visState": "{\"title\":\"Env-5 Response Time\",\"type\":\"line\",\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"drawLinesBetweenPoints\":true,\"interpolate\":\"linear\",\"radiusRatio\":9,\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"showCircles\":true,\"smoothLines\":false,\"times\":[],\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"ResponseTime\",\"customLabel\":\"Avg Response Time ms\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"Timestamp\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-6\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}" + } + } + }, + { + "_id": "Env-3-Response-Time", + "_type": "visualization", + "_source": { + "title": "Env-3 Response Time", + "visState": "{\"aggs\":[{\"id\":\"1\",\"params\":{\"customLabel\":\"Avg Response Time ms\",\"field\":\"ResponseTime\"},\"schema\":\"metric\",\"type\":\"avg\"},{\"id\":\"2\",\"params\":{\"customInterval\":\"2h\",\"extended_bounds\":{},\"field\":\"Timestamp\",\"interval\":\"auto\",\"min_doc_count\":1},\"schema\":\"segment\",\"type\":\"date_histogram\"}],\"listeners\":{},\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"drawLinesBetweenPoints\":true,\"interpolate\":\"linear\",\"radiusRatio\":9,\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"showCircles\":true,\"smoothLines\":false,\"times\":[],\"yAxis\":{}},\"title\":\"Env-2 Response Time\",\"type\":\"line\"}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-3\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}" + } + } + }, + { + "_id": "Env-2-Response-Time", + "_type": "visualization", + "_source": { + "title": "Env-2 Response Time", + "visState": "{\"aggs\":[{\"id\":\"1\",\"params\":{\"customLabel\":\"Avg Response Time ms\",\"field\":\"ResponseTime\"},\"schema\":\"metric\",\"type\":\"avg\"},{\"id\":\"2\",\"params\":{\"customInterval\":\"2h\",\"extended_bounds\":{},\"field\":\"Timestamp\",\"interval\":\"auto\",\"min_doc_count\":1},\"schema\":\"segment\",\"type\":\"date_histogram\"}],\"listeners\":{},\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"drawLinesBetweenPoints\":true,\"interpolate\":\"linear\",\"radiusRatio\":9,\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"showCircles\":true,\"smoothLines\":false,\"times\":[],\"yAxis\":{}},\"title\":\"Env-1 Response Time\",\"type\":\"line\"}", + "uiStateJSON": "{}", + "description": "", + "version": 1, + "kibanaSavedObjectMeta": { + "searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-2\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}" + } + } + } +] \ No newline at end of file diff --git a/doc/source/methodologies/monitoring/configs/dashboards/Kubernetes_statistics.json b/doc/source/methodologies/monitoring/configs/dashboards/Kubernetes_statistics.json new file mode 100644 index 0000000..f918ee0 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/dashboards/Kubernetes_statistics.json @@ -0,0 +1,3242 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS-SYSTEMS", + "label": "prometheus-systems", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + }, + { + "name": "DS_PROMETHEUS-KUBER", + "label": "prometheus-kuber", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "grafana-piechart-panel", + "name": "Pie Chart", + "version": "1.1.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.0.1" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "id": null, + "title": "Kubernetes statistics", + "tags": [ + "intel" + ], + "style": "dark", + "timezone": "browser", + "editable": true, + "sharedCrosshair": false, + "hideControls": false, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS-KUBER}", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "env", + "options": [], + "query": "label_values(env)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS-KUBER}", + "hide": 0, + "includeAll": false, + "label": "Container", + "multi": false, + "name": "pod", + "options": [], + "query": "query_result(sum by (container_name) (container_cpu_system_seconds_total{env=\"$env\", container_name != \"POD\"} and container_cpu_system_seconds_total{env=\"$env\", container_name != \"\"}))", + "refresh": 1, + "regex": "/.*\"(.*)\".*/", + "sort": 0, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS-KUBER}", + "hide": 0, + "includeAll": false, + "label": "Node", + "multi": true, + "name": "node", + "options": [], + "query": "query_result(sum by (instance) (container_cpu_system_seconds_total{env=\"$env\", container_name =\"$pod\"}))", + "refresh": 1, + "regex": "/.*\"(.*)\".*/", + "sort": 0, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "annotations": { + "list": [] + }, + "refresh": false, + "schemaVersion": 13, + "version": 16, + "links": [], + "gnetId": null, + "rows": [ + { + "title": "Total cluster status", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 36, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "k8s_nodes_number_of_nodes_total{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 37, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "k8s_nodes_number_of_unsched{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total unshed nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 38, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "k8s_nodes_states_number_of_status_True{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total nodes in True status", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 39, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "k8s_namespaces_number_of_namespaces_total{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total namespaces", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 40, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "k8s_pods_number_of_pods_total{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total PODs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 41, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "k8s_pods_states_number_of_pods_state_Running{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Running PODs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 52, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "k8s_number_of_scheduler_instances{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Number of scheduler instances", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 42, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "k8s_pods_states_number_of_pods_state_Succeeded{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Succeeded PODs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 43, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "k8s_number_of_API_instances{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Number of API instances", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 44, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "k8s_number_of_controllers{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Number of controllers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 45, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "k8s_services_number_of_services_total{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total number of services", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 53, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "k8s_services_number_of_endpoints_total{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total number of endpoint", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "cacheTimeout": null, + "combine": { + "label": "{{pod_node}}", + "threshold": 0 + }, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fontSize": "80%", + "format": "short", + "id": 48, + "interval": null, + "legend": { + "percentage": true, + "show": true, + "values": true + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "span": 4, + "strokeWidth": 1, + "targets": [ + { + "expr": "k8s_pods_per_namespace{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{ns}}", + "refId": "A", + "step": 600 + } + ], + "title": "Pods per namespace", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 49, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 8, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "k8s_pods_per_namespace{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{ns}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pods per namespace", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "cacheTimeout": null, + "combine": { + "label": "{{pod_node}}", + "threshold": 0 + }, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fontSize": "80%", + "format": "short", + "id": 47, + "interval": null, + "legend": { + "percentage": true, + "show": false, + "values": true + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "span": 4, + "strokeWidth": 1, + "targets": [ + { + "expr": "k8s_pods_per_node{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{pod_node}}", + "refId": "A", + "step": 600 + } + ], + "title": "Pods per node", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 46, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 8, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "k8s_pods_per_node{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{pod_node}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pods per node", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 51, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "k8s_services_endpoints_number{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{service}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Endpoints per service", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Per PODs statistcis", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 32, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "count (container_cpu_system_seconds_total{container_name=\"$pod\", pod_name=~\"$pod-.+\", cpu=\"\" , env=\"$env\"})", + "intervalFactor": 2, + "refId": "A", + "step": 20 + } + ], + "thresholds": "", + "title": "Total PODs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count (container_cpu_system_seconds_total{container_name!=\"POD\", container_name=\"$pod\", env=\"$env\"})", + "intervalFactor": 2, + "legendFormat": "{{container_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Totol PODs", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fontSize": "80%", + "format": "s", + "id": 21, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": false + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "span": 2, + "strokeWidth": 1, + "targets": [ + { + "expr": "container_cpu_system_seconds_total{container_name=\"$pod\", pod_name=~\"$pod-.+\", cpu=\"\", instance=~\"$node\", env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 600 + } + ], + "title": "Container cpu system seconds total per node", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 23, + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (container_cpu_system_seconds_total{container_name=\"$pod\", pod_name=~\"$pod-.+\", cpu=\"\", instance=~\"$node\", env=\"$env\"})", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Container cpu system seconds total per node", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(container_cpu_system_seconds_total{container_name=\"$pod\", pod_name=~\"$pod-.+\", cpu=\"\", instance=~\"$node\", env=\"$env\"}[5m])", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average 5m rate of container cpu system seconds total per node", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": " count by (container_name) (container_cpu_system_seconds_total{container_name!=\"\", env=\"$env\"})", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{container_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Totol PODs", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "FS usage", + "panels": [ + { + "aliasColors": {}, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fontSize": "80%", + "format": "bytes", + "id": 22, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": false + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "span": 2, + "strokeWidth": 1, + "targets": [ + { + "expr": "sum by (instance) (container_fs_usage_bytes{container_name=\"$pod\", pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"})", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 600 + } + ], + "title": "Container fs usage summary per node", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (container_fs_usage_bytes{container_name=\"$pod\", pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"})", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "container_cpu_usage_seconds_total", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers fs current usage for $pod", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 25, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (rate(container_fs_usage_bytes{container_name=\"$pod\", pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"}[5m]))", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "container_cpu_usage_seconds_total", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average 5m rate of containers fs current usage for $pod", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (container_fs_io_current{container_name=\"$pod\", pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"})", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "container_cpu_usage_seconds_total", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers fs io current for $pod", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 26, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (rate(container_fs_io_current{container_name=\"$pod\", pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"}[5m]))", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "container_cpu_usage_seconds_total", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average 5m rate of containers fs io current for $pod", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Memory statistcis", + "panels": [ + { + "aliasColors": {}, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fontSize": "80%", + "format": "short", + "id": 29, + "interval": null, + "legend": { + "show": true, + "values": false + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "span": 2, + "strokeWidth": 1, + "targets": [ + { + "expr": "sum by (instance) (container_memory_cache{container_name=\"$pod\", pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"})", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 600 + } + ], + "title": "Containers memory cache for $pod", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (container_memory_cache{container_name=\"$pod\", pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"})", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "container_cpu_usage_seconds_total", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Containers memory cache for $pod", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 28, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (rate(container_memory_cache{container_name=\"$pod\", pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"}[5m]))", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "container_cpu_usage_seconds_total", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average 5m rate of containers memory cache for $pod", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (container_memory_failures_total{container_name=\"$pod\", pod_name=~\"$pod.+\", type=\"pgfault\", scope=\"hierarchy\", instance=~\"$node\", env=\"$env\"})", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "container_cpu_usage_seconds_total", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cumulative count of memory allocation failures for $pod", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (rate(container_memory_failures_total{container_name=\"$pod\", pod_name=~\"$pod.+\", type=\"pgfault\", scope=\"container\", instance=~\"$node\", env=\"$env\"}[5m]))", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "container_cpu_usage_seconds_total", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average 5m rate of cumulative count of memory allocation failures for $pod", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (container_memory_usage_bytes{container_name=\"$pod\", pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"})", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "container_cpu_usage_seconds_total", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Current memory usage for $pod", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (container_memory_rss{container_name=\"$pod\", pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"})", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "container_cpu_usage_seconds_total", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Size of RSS for $pod", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fontSize": "80%", + "format": "short", + "id": 31, + "interval": null, + "legend": { + "show": true, + "values": true + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "span": 3, + "strokeWidth": 1, + "targets": [ + { + "expr": "sum by (instance) (container_memory_swap{container_name=\"$pod\", pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"})", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 600 + } + ], + "title": "Container swap usage for $pod", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (container_memory_swap{container_name=\"$pod\", pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"})", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "container_cpu_usage_seconds_total", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Container swap usage for $pod", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 30, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (rate(container_memory_swap{container_name=\"$pod\", pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"}[5m]))", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "container_cpu_usage_seconds_total", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Container swap usage for $pod", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Network statistcis", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"}[10m])) by (instance)", + "intervalFactor": 2, + "legendFormat": "Input {{instance}}", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(rate(container_network_transmit_bytes_total{pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"}[10m])) by (instance)", + "intervalFactor": 2, + "legendFormat": "Output {{instance}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average 5m rate of containers network bytes total per node", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_packets_total{pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"}[5m])) by (instance)", + "intervalFactor": 2, + "legendFormat": "Received {{instance}}", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(rate(container_network_transmit_packets_total{pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"}[5m])) by (instance)", + "intervalFactor": 2, + "legendFormat": "Transmited {{instance}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average 5m rate of containers network packets total per node", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_errors_total{pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"}[5m])) by (instance)", + "intervalFactor": 2, + "legendFormat": "Received errors {{instance}}", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(rate(container_network_transmit_errors_total{pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"}[5m])) by (instance)", + "intervalFactor": 2, + "legendFormat": "Transmited errors {{instance}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average 5m rate of containers network errors total per node", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_recieve_packets_dropped_total{pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"}[5m])) by (instance)", + "intervalFactor": 2, + "legendFormat": "Input drops {{instance}}", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(rate(container_network_transmit_packets_dropped_total{pod_name=~\"$pod.+\", instance=~\"$node\", env=\"$env\"}[5m])) by (instance)", + "intervalFactor": 2, + "legendFormat": "Output drops {{instance}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average 5m rate of containers network dropped packets total per node", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + } + ] +} \ No newline at end of file diff --git a/doc/source/methodologies/monitoring/configs/dashboards/OpenStack.json b/doc/source/methodologies/monitoring/configs/dashboards/OpenStack.json new file mode 100644 index 0000000..87bc10b --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/dashboards/OpenStack.json @@ -0,0 +1,2962 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS-SYSTEMS", + "label": "prometheus-systems", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + }, + { + "name": "DS_PROMETHEUS-KUBER", + "label": "prometheus-kuber", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.0.1" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "id": null, + "title": "Open Stack", + "tags": [ + "intel" + ], + "style": "dark", + "timezone": "browser", + "editable": true, + "sharedCrosshair": false, + "hideControls": false, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "env", + "options": [], + "query": "label_values(env)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query" + }, + { + "allValue": ".+", + "current": {}, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "hide": 0, + "includeAll": true, + "label": "Hypervisor", + "multi": true, + "name": "hypervisor", + "options": [], + "query": "label_values(openstack_hypervisor_running_vms, hypervisor)", + "refresh": 1, + "regex": "", + "sort": 3, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query" + }, + { + "allValue": ".+", + "current": {}, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "hide": 0, + "includeAll": true, + "label": "Project", + "multi": false, + "name": "project", + "options": [], + "query": "label_values(openstack_server_state_active, project)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query" + } + ] + }, + "annotations": { + "list": [] + }, + "schemaVersion": 13, + "version": 8, + "links": [], + "gnetId": null, + "rows": [ + { + "title": "New row", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 1000, + "minValue": 0, + "show": false, + "thresholdLabels": true, + "thresholdMarkers": true + }, + "id": 32, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "count (system_uptime{env=\"$env\"})", + "intervalFactor": 2, + "metric": "prometheus_evaluator_duration_seconds", + "refId": "A", + "step": 4 + } + ], + "thresholds": "999", + "title": "Nodes online", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count (system_uptime{env=\"$env\"})", + "intervalFactor": 2, + "legendFormat": "nodes", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Nodes online", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": 10, + "min": 0, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": false, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "New row", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 18, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "count (count by (instance) (container_cpu_system_seconds_total{env=\"$env\", container_name=~\"neutron-server|nova-api|nova-conductor|nova-consoleauth|nova-novncproxy|nova-scheduler|neutron-metadata-agent|etcd|glance-api|glance-registry|horizon|keystone|memcached|neutron-dhcp-agent|neutron-l3-agent|neutron-metadata-agent\", container_name!=\"\"}))", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Total contollers nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": " count by (container_name) (container_cpu_system_seconds_total{env=\"$env\", container_name=~\"neutron-server|nova-api|nova-conductor|nova-consoleauth|nova-novncproxy|nova-scheduler|neutron-metadata-agent|etcd|glance-api|glance-registry|horizon|keystone|memcached|neutron-dhcp-agent|neutron-l3-agent|neutron-metadata-agent\", container_name!=\"\"})", + "intervalFactor": 2, + "legendFormat": "{{container_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Controllers containers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 19, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "count (count by (instance) (container_cpu_system_seconds_total{env=\"$env\", container_name=~\"nova-compute|nova-libvirit\", container_name!=\"\"}))", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Total computes nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": " count by (container_name) (container_cpu_system_seconds_total{env=\"$env\", container_name=~\"nova-compute|nova-libvirt\", container_name!=\"\"})", + "intervalFactor": 2, + "legendFormat": "{{container_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Compute containers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 22, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "count (container_cpu_system_seconds_total{env=\"$env\", container_name=~\"(mariadb|galera|mysqls)\", container_name!=\"\"})", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Total DB nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": " count by (container_name) (container_cpu_system_seconds_total{env=\"$env\", container_name=~\"(galera|mariadb|mysqld)\", container_name!=\"\"})", + "intervalFactor": 2, + "legendFormat": "{{container_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "DB containers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 24, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "count (count by (instance) (container_cpu_system_seconds_total{env=\"$env\", container_name=~\"rabbitmq\", container_name!=\"\"}))", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Total MQ nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": " count by (container_name) (container_cpu_system_seconds_total{env=\"$env\", container_name=~\"rabbitmq\", container_name!=\"\"})", + "intervalFactor": 2, + "legendFormat": "{{container_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "MQ containers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 26, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "count (count by (instance) (container_cpu_system_seconds_total{env=\"$env\", container_name=~\"neutron-openvswitch-agent|openvswitch-db|openvswitch-vswitchd\", container_name!=\"\"}))", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Total OVS nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": " count by (container_name) (container_cpu_system_seconds_total{env=\"$env\", container_name=~\"neutron-openvswitch-agent|openvswitch-db|openvswitch-vswitchd\", container_name!=\"\"})", + "intervalFactor": 2, + "legendFormat": "{{container_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OVS containers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 28, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "count (count by (instance) (container_cpu_system_seconds_total{env=\"$env\", container_name=~\"elasticsearch|kibana\", container_name!=\"\"}))", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Total stacklight backend nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": " count by (container_name) (container_cpu_system_seconds_total{env=\"$env\", container_name=~\"elasticsearch|kibana\", container_name!=\"\"})", + "intervalFactor": 2, + "legendFormat": "{{container_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Stacklight containers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 30, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "count (count by (instance) (container_cpu_system_seconds_total{env=\"$env\", container_name=~\"heka\", container_name!=\"\"}))", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Total stackligh collector nodes", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-KUBER}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": " count by (container_name) (container_cpu_system_seconds_total{env=\"$env\", container_name=~\"heka\", container_name!=\"\"})", + "intervalFactor": 2, + "legendFormat": "{{container_name}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Stacklight collector containers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": false, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "VMs statistics", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "80%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "count (openstack_hypervisor_running_vms{env=\"$env\" })", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Total hypervisors", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 15, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "VMs:", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "openstack_hypervisor_total_running_vms{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Total VMs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openstack_hypervisor_running_vms{env=\"$env\", hypervisor=~\"$hypervisor\"}", + "intervalFactor": 2, + "legendFormat": "{{hypervisor}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Running VMs per node", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Memory", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openstack_hypervisor_memory_used{env=\"$env\", hypervisor=~\"$hypervisor\"}", + "intervalFactor": 2, + "legendFormat": "{{hypervisor}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory used", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "VCPU", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 600, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "openstack_hypervisor_total_vcpus{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": "1000", + "title": "Total vCPUs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "openstack_hypervisor_total_vcpus_used{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Total vCPUs used", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openstack_hypervisor_vcpus_used {env=\"$env\", hypervisor=~\"$hypervisor\"}", + "intervalFactor": 2, + "legendFormat": "Used VCPUs {{hypervisor}}", + "refId": "A", + "step": 2 + }, + { + "expr": "openstack_hypervisor_vcpus{env=\"$env\", hypervisor=~\"$hypervisor\"}", + "intervalFactor": 2, + "legendFormat": "Total VCPUs {{hypervisor}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "VCPUS", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Projects", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "openstack_identity_total_projects{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Total projects", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openstack_identity_total_projects{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "Total projects", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total projects", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Servers", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "openstack_server_state_total_active{env=\"$env\"}", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Total active VMs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openstack_server_state_active{env=\"$env\", project=~\"$project\"}", + "intervalFactor": 2, + "legendFormat": "Project {{project}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Active VMs per project", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "VCPUs per project", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 16, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": null, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "openstack_server_stats_total_vcpus{env=\"$env\"}", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Total VCPUs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": null, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openstack_server_stats_vcpus{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "Project {{project}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "VCPUs per project", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Disks per project", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "decimals": 0, + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "openstack_server_stats_total_disk{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Total used disks", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openstack_server_stats_disk{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "{{project}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disks per project", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "RAM per project", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 13, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "openstack_server_stats_total_ram{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Total used RAM", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "openstack_server_stats_ram{env=\"$env\", project=~\"$project\"}", + "intervalFactor": 2, + "legendFormat": "Project {{project}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "User RAM per project", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "New row", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (container) (os_api_response_time_processingtime{env=\"$env\"}) / count by (container) (os_api_response_time_processingtime{env=\"$env\"})", + "intervalFactor": 2, + "legendFormat": "{{container}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average service response time", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": false, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "New row", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "vm_spawn_avg_time_timediffinsec{env=\"$env\"}", + "intervalFactor": 2, + "legendFormat": "spawn time", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "VM spawn average time", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": false, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + } + ] +} \ No newline at end of file diff --git a/doc/source/methodologies/monitoring/configs/dashboards/Systems_nodes_statistics.json b/doc/source/methodologies/monitoring/configs/dashboards/Systems_nodes_statistics.json new file mode 100644 index 0000000..6c7946c --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/dashboards/Systems_nodes_statistics.json @@ -0,0 +1,2999 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS-SYSTEMS", + "label": "prometheus-systems", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.0.1" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "id": null, + "title": "Systems nodes statistics", + "tags": [ + "intel" + ], + "style": "dark", + "timezone": "browser", + "editable": true, + "sharedCrosshair": false, + "hideControls": false, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "env", + "options": [], + "query": "label_values(env)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query" + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "hide": 0, + "includeAll": false, + "label": null, + "multi": true, + "name": "node", + "options": [], + "query": "query_result(sum by (host) (system_uptime{env=\"$env\"}))", + "refresh": 1, + "regex": "/.*\"(node.*|.*comp.*)\".*/", + "sort": 0, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query" + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "disk", + "options": [], + "query": "query_result(sum by (name) (diskio_read_bytes{env=\"$env\", host=\"$node\"}))", + "refresh": 1, + "regex": "/.*\"(.*)\".*/", + "sort": 0, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query" + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "interface", + "options": [], + "query": "query_result(sum by (interface) (net_bytes_recv{env=\"$env\", host=\"$node\"}))", + "refresh": 1, + "regex": "/.*\"(.*)\".*/", + "sort": 0, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query" + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "proc_name_for_numa", + "options": [], + "query": "query_result(sum by (name) (system_numa_memory_per_pid_memory_heap{env=\"$env\", host=~\"$node\"}))", + "refresh": 1, + "regex": "/.*\"(.*)\".*/", + "sort": 0, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query" + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "proc_name_for_cpu_stat", + "options": [], + "query": "query_result(sum by (process) (system_per_process_cpu_usage_system{env=\"$env\", host=~\"$node\"}))", + "refresh": 1, + "regex": "/.*\"(.*)\".*/", + "sort": 0, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query" + } + ] + }, + "annotations": { + "list": [] + }, + "refresh": false, + "schemaVersion": 13, + "version": 28, + "links": [], + "gnetId": null, + "rows": [ + { + "title": "Load average", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "system_load5{env=\"$env\", host=~\"$node\"} ", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Node load average 5m", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "system_load15{env=\"$env\", host=~\"$node\"} ", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Node load average 15m", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "system_load1{env=\"$env\", host=~\"$node\"} ", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Node load average 1m", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Processes statistics", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "processes_running{env=\"$env\",host=~\"$node\"} ", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Process running", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "processes_stopped{env=\"$env\", host=~\"$node\"} ", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Process stopped", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "processes_paging{env=\"$env\", host=~\"$node\"} ", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Process waiting", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "system_openstack_list_cinder{env=\"$env\",host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "Cinder proccess {{host}}", + "metric": "system_openstack_list_cinder", + "refId": "A", + "step": 2 + }, + { + "expr": "system_openstack_list_mariadb{env=\"$env\",host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "Mariadb proccess {{host}}", + "metric": "system_openstack_list_cinder", + "refId": "B", + "step": 2 + }, + { + "expr": "system_openstack_list_rabbitmq{env=\"$env\",host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "Rabbitmq proccess {{host}}", + "metric": "system_openstack_list_cinder", + "refId": "C", + "step": 2 + }, + { + "expr": "system_openstack_list_keystone{env=\"$env\",host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "Keystone proccess {{host}}", + "metric": "system_openstack_list_cinder", + "refId": "D", + "step": 2 + }, + { + "expr": "system_openstack_list_glance{env=\"$env\",host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "Glance proccess {{host}}", + "metric": "system_openstack_list_cinder", + "refId": "E", + "step": 2 + }, + { + "expr": "system_openstack_list_nova{env=\"$env\",host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "Nova proccess {{host}}", + "metric": "system_openstack_list_cinder", + "refId": "F", + "step": 2 + }, + { + "expr": "system_openstack_list_neutron{env=\"$env\",host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "Neutron proccess {{host}}", + "metric": "system_openstack_list_cinder", + "refId": "G", + "step": 2 + }, + { + "expr": "system_openstack_list_openvswitch{env=\"$env\",host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "Openvswitch proccess {{host}}", + "metric": "system_openstack_list_cinder", + "refId": "H", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OpenStack processes", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "CPU usage", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cpu_usage_steal{env=\"$env\", cpu=\"cpu-total\", host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU steal", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cpu_usage_iowait{env=\"$env\", cpu=\"cpu-total\", host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU wait", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cpu_usage_user{env=\"$env\", cpu=\"cpu-total\", host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU user", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cpu_usage_system{env=\"$env\", cpu=\"cpu-total\", host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU system", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cpu_usage_softirq{env=\"$env\", cpu=\"cpu-total\", host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU soft interrupts", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cpu_usage_irq{env=\"$env\", cpu=\"cpu-total\", host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU interrupts", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cpu_usage_nice{env=\"$env\", cpu=\"cpu-total\", host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU nice", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "cpu_usage_idle{env=\"$env\", cpu=\"cpu-total\", host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Idle", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "system_per_process_cpu_usage_system{env=\"$env\", host=\"$node\", process=\"$proc_name_for_cpu_stat\"}", + "intervalFactor": 2, + "legendFormat": "User CPU usage for {{process}}", + "refId": "A", + "step": 2 + }, + { + "expr": "system_per_process_cpu_usage_user{env=\"$env\", host=\"$node\", process=\"$proc_name_for_cpu_stat\"}", + "intervalFactor": 2, + "legendFormat": "System CPU usage {{process}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage per process", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Memory usage", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "mem_cached{env=\"$env\", host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Mem cached", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "mem_buffered{env=\"$env\", host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Mem buffered", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "mem_free{env=\"$env\", host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Mem free", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "mem_used{env=\"$env\", host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Mem used", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "system_memory_bandwidth{env=\"$env\",host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "Env: {{env}} hosts: {{host}}", + "metric": "system_memory_bandwidth", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory bandwidth", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decmbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Disk statistics", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(diskio_reads{env=\"$env\", name=~\"$disk\", host=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "Read {{host}} {{name}}", + "refId": "A", + "step": 2 + }, + { + "expr": "rate(diskio_writes{env=\"$env\", name=~\"$disk\", host=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "Write {{host}} {{name}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk rate read/s and write/s for last 5 minutes", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(diskio_read_bytes{env=\"$env\", name=~\"$disk\", host=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "Read {{host}} {{name}}", + "refId": "A", + "step": 2 + }, + { + "expr": "rate(diskio_write_bytes{env=\"$env\", name=~\"$disk\", host=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "Write {{host}} {{name}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk rate read/s and write/s bytes for last 5 minutes", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "NUMA statistic", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(kernel_vmstat_numa_hit{env=\"$env\", host=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "{{host}} ", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average 5m rate for NUMA hit", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(kernel_vmstat_numa_miss{env=\"$env\", host=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "{{host}} ", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average 5m rate for NUMA miss", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(kernel_vmstat_numa_foreign{env=\"$env\", host=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "{{host}} ", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average 5m rate for NUMA forign", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 26, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(kernel_vmstat_numa_local{env=\"$env\", host=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "{{host}} ", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average 5m rate for NUMA local", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(kernel_vmstat_numa_other{env=\"$env\", host=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "{{host}} ", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average 5m rate for NUMA other", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "system_numa_memory_per_pid_memory_stack{env=\"$env\", host=~\"$node\", name=\"$proc_name_for_numa\"}", + "intervalFactor": 2, + "legendFormat": "Numa memory stack for {{name}} pid {{pid}} on {{host}}", + "refId": "A", + "step": 2 + }, + { + "expr": "system_numa_memory_per_pid_memory_huge{env=\"$env\", host=~\"$node\", name=\"$proc_name_for_numa\"}", + "intervalFactor": 2, + "legendFormat": "Numa memory huge for {{name}} pid {{pid}} on {{host}}", + "refId": "B", + "step": 2 + }, + { + "expr": "system_numa_memory_per_pid_memory_heap{env=\"$env\", host=~\"$node\", name=\"$proc_name_for_numa\"}", + "intervalFactor": 2, + "legendFormat": "Numa memory heap for {{name}} pid {{pid}} on {{host}}", + "refId": "C", + "step": 2 + }, + { + "expr": "system_numa_memory_per_pid_memory_private{env=\"$env\", host=~\"$node\", name=\"$proc_name_for_numa\"}", + "intervalFactor": 2, + "legendFormat": "Numa memory private for {{name}} pid {{pid}} on {{host}}", + "refId": "D", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Numa memory per pid", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Network", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(net_bytes_sent{env=\"$env\", interface=~\"$interface\", host=~\"$node\"}[5m])*8", + "intervalFactor": 2, + "legendFormat": "Out {{host}} {{interface}}", + "refId": "A", + "step": 2 + }, + { + "expr": "rate(net_bytes_recv{env=\"$env\", interface=~\"$interface\", host=~\"$node\"}[5m])*8", + "intervalFactor": 2, + "legendFormat": "In {{host}} {{interface}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network load", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "system_tcp_queue_sum_recv{env=\"$env\",host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "Tcp queue summary recv on {{host}}", + "refId": "A", + "step": 2 + }, + { + "expr": "system_tcp_queue_sum_send{env=\"$env\",host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "Tcp queue summary send on {{host}}", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Tcp queue summary", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": "250px", + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "Over metrics", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "system_entropy{env=\"$env\",host=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "{{host}}", + "metric": "system_entropy", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System entropy", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": 250, + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + }, + { + "title": "IOstat", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "system_per_device_iostat_average_queue{env=\"$env\",host=~\"$node\",device=~\"$disk\"}", + "intervalFactor": 2, + "legendFormat": "Average queue {{host}} {{device}}", + "metric": "system_per_device_iostat_average_queue", + "refId": "A", + "step": 2 + }, + { + "expr": "system_per_device_iostat_read_merge{env=\"$env\",host=~\"$node\",device=~\"$disk\"}", + "intervalFactor": 2, + "legendFormat": "Read merge {{host}} {{device}}", + "metric": "", + "refId": "B", + "step": 2 + }, + { + "expr": "system_per_device_iostat_write_merge{env=\"$env\",host=~\"$node\",device=~\"$disk\"}", + "intervalFactor": 2, + "legendFormat": "Write merge {{host}} {{device}}", + "metric": "", + "refId": "C", + "step": 2 + }, + { + "expr": "system_per_device_iostat_await{env=\"$env\",host=~\"$node\",device=~\"$disk\"}", + "intervalFactor": 2, + "legendFormat": "Await {{host}} {{device}}", + "metric": "", + "refId": "D", + "step": 2 + }, + { + "expr": "system_per_device_iostat_read_await{env=\"$env\",host=~\"$node\",device=~\"$disk\"}", + "intervalFactor": 2, + "legendFormat": "Read await {{host}} {{device}}", + "metric": "", + "refId": "E", + "step": 2 + }, + { + "expr": "system_per_device_iostat_write_await{env=\"$env\",host=~\"$node\",device=~\"$disk\"}", + "intervalFactor": 2, + "legendFormat": "Write await {{host}} {{device}}", + "metric": "", + "refId": "F", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOstat per device", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "${DS_PROMETHEUS-SYSTEMS}", + "editable": true, + "error": false, + "fill": 1, + "id": 30, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "system_per_device_iostat_util{env=\"$env\",host=~\"$node\",device=~\"$disk\"}", + "intervalFactor": 2, + "legendFormat": "IO utilisation {{host}} {{device}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Panel Title", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "showTitle": true, + "titleSize": "h6", + "height": 250, + "repeat": null, + "repeatRowId": null, + "repeatIteration": null, + "collapse": false + } + ] +} \ No newline at end of file diff --git a/doc/source/methodologies/monitoring/configs/deploy_k8s_using_kargo.sh b/doc/source/methodologies/monitoring/configs/deploy_k8s_using_kargo.sh new file mode 100644 index 0000000..cf69456 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/deploy_k8s_using_kargo.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash + +: ${DB_CONNECTION_STRING:?"You need to specify DB_CONNECTION_STRING parameter"} +: ${ENV_NAME:?"You need to specify ENV_NAME parameter"} + +: ${MANAGEMENT_INTERFACE:="p1p1.602"} +: ${COBBLER_ADDRESS:="172.20.8.34"} +: ${CUSTOM_YAML} +: ${KARGO_REPO} +: ${KARGO_COMMIT} +: ${FUEL_CCP_COMMIT} +: ${ADMIN_USER} +: ${ADMIN_PASSWORD} +: ${ADMIN_NODE_CLEANUP} +DEPLOY_METHOD="kargo" +WORKSPACE="~/kargo_workspace_${ENV_NAME}" +SSH_OPTIONS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" + +get_env_nodes () +{ + ENV_NODES_NAMES=$(echo $(psql ${DB_CONNECTION_STRING} -c "select name from servers where environment_id in (select id from environments where name='${ENV_NAME}')" -P format=unaligned -t)) + if [ -z "${ENV_NODES_NAMES}" ] + then + echo "No nodes in environment with name ${ENV_NAME}" + exit 1 + fi +} + +get_env_nodes_ips () +{ + ENV_NODES_IPS=$(echo $(ssh ${SSH_OPTIONS} root@${COBBLER_ADDRESS} bash -ex << EOF + for COBBLER_SYSTEM_NAME in ${ENV_NODES_NAMES} + do + NODE_IP=\$(cobbler system dumpvars --name=\${COBBLER_SYSTEM_NAME} | grep ^ip_address_${MANAGEMENT_INTERFACE} | awk '{print \$3}') + NODE_IPS+=\${NODE_IP}" " + done + echo \${NODE_IPS} +EOF + )) +} + +main () +{ + get_env_nodes + get_env_nodes_ips + export ADMIN_IP=$(echo ${ENV_NODES_IPS} | awk '{print $1}') + export SLAVE_IPS=$(echo ${ENV_NODES_IPS}) + +# for SLAVE_IP in ${SLAVE_IPS} +# do +# ssh ${SSH_OPTIONS} root@${SLAVE_IP} bash -ex << EOF +#echo "deb https://apt.dockerproject.org/repo ubuntu-\$(grep DISTRIB_CODENAME /etc/lsb-release | awk -F"=" '{print \$2}') main" >> /etc/apt/sources.list +#apt-get update && apt-get install -y --allow-unauthenticated -o Dpkg::Options::="--force-confdef" docker-engine +#EOF +# done + + if [ -d "$WORKSPACE" ] ; then + rm -rf $WORKSPACE + fi + mkdir -p $WORKSPACE + cd $WORKSPACE + + if [ -d './fuel-ccp-installer' ] ; then + rm -rf ./fuel-ccp-installer + fi + git clone https://review.openstack.org/openstack/fuel-ccp-installer + cd ./fuel-ccp-installer + + if [ "$FUEL_CCP_COMMIT" ]; then + git fetch git://git.openstack.org/openstack/fuel-ccp-installer $FUEL_CCP_COMMIT && git checkout FETCH_HEAD + fi + + echo "Running on $NODE_NAME: $ENV_NAME" + + bash -xe "./utils/jenkins/run_k8s_deploy_test.sh" +} +main \ No newline at end of file diff --git a/doc/source/methodologies/monitoring/configs/elasticsearch-heka/deploy-heka.yaml b/doc/source/methodologies/monitoring/configs/elasticsearch-heka/deploy-heka.yaml new file mode 100644 index 0000000..43522e4 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/elasticsearch-heka/deploy-heka.yaml @@ -0,0 +1,46 @@ +--- +- hosts: main-kuber + remote_user: root + tasks: + - name: Fetch heka package + get_url: + url: "{{ heka_package_url }}" + dest: /tmp/heka_amd64.deb + mode: 0664 + force: yes + - name: Download heka package locally + fetch: + src: /tmp/heka_amd64.deb + dest: ./heka_amd64.deb + fail_on_missing: yes + flat: yes + +- hosts: cluster-nodes + remote_user: root + tasks: + - name: Propagate heka package across cluster nodes + copy: + src: ./heka_amd64.deb + dest: /tmp/heka_amd64.deb + +- hosts: all-cluster-nodes + remote_user: root + tasks: + - name: Install heka package + apt: deb=/tmp/heka_amd64.deb + - name: Adding heka user to docker group + user: name='heka' groups=docker append=yes + - name: Copy heka conf + template: src=heka/00-hekad.toml.j2 dest=/etc/heka/conf.d/00-hekad.toml + notify: restart heka + - name: Copy heka lua scripts + template: src=heka/kubeapi_to_int.lua.j2 dest=/usr/share/heka/lua_filters/kubeapi_to_int.lua + register: heka_lua + notify: restart heka + - name: ensure heka is running + systemd: state=started name=heka enabled=yes + + handlers: + - name: restart heka + systemd: state=restarted name=heka + diff --git a/doc/source/methodologies/monitoring/configs/elasticsearch-heka/deploy_elasticsearch_kibana.sh b/doc/source/methodologies/monitoring/configs/elasticsearch-heka/deploy_elasticsearch_kibana.sh new file mode 100644 index 0000000..ecbe00b --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/elasticsearch-heka/deploy_elasticsearch_kibana.sh @@ -0,0 +1,71 @@ +#!/bin/bash -xe + +HOSTNAME=`hostname` +ELASTICSEARCH_NODE=${ELASTICSEARCH_NODE:-172.20.9.3} + +# install java +sudo add-apt-repository -y ppa:webupd8team/java +sudo apt-get update +sudo apt-get -y install oracle-java8-installer + +# install elastic by adding extra repository +wget -qO - https://packages.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add - +echo "deb http://packages.elastic.co/elasticsearch/2.x/debian stable main" | sudo tee -a /etc/apt/sources.list.d/elasticsearch-2.x.list +sudo apt-get update +sudo apt-get -y install elasticsearch + +# edit configuration: +sed -i -E -e 's/^.*cluster.name: .*$/ cluster.name: elasticsearch_k8s/g' /etc/elasticsearch/elasticsearch.yml +sed -i -E -e "s/^.*node.name: .*$/ cluster.name: ${HOSTNAME}/g" /etc/elasticsearch/elasticsearch.yml +sed -i -E -e "s/^.*network.host: .*$/ network.host: ${ELASTICSEARCH_NODE}/g" /etc/elasticsearch/elasticsearch.yml + +# increase memory limits: +sed -i -E -e "s/^.*ES_HEAP_SIZE=.*$/ES_HEAP_SIZE=10g/g" /etc/default/elasticsearch + +# start service: +sudo systemctl restart elasticsearch +sudo systemctl daemon-reload +sudo systemctl enable elasticsearch + +# install kibana from extra repository: +echo "deb http://packages.elastic.co/kibana/4.5/debian stable main" | sudo tee -a /etc/apt/sources.list +sudo apt-get update +sudo apt-get -y install kibana +sed -i -E -e "s/^.*elasticsearch.url:.*$/ elasticsearch.url: \"http://${ELASTICSEARCH_NODE}:9200\"/g" /opt/kibana/config/kibana.yml + +# enable kibana service: +sudo systemctl daemon-reload +sudo systemctl enable kibana +sudo systemctl start kibana + +# install nginx: +sudo apt-get -y install nginx + +# set kibana admin:password (admin:admin) +echo "admin:`openssl passwd admin`" | sudo tee -a /etc/nginx/htpasswd.users + +# prepare nginx config: +cat << EOF >> /etc/nginx/sites-available/default +server { + listen 80; + + server_name ${HOSTNAME}; + + auth_basic "Restricted Access"; + auth_basic_user_file /etc/nginx/htpasswd.users; + + location / { + proxy_pass http://localhost:5601; + proxy_http_version 1.1; + proxy_set_header Upgrade \$http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host \$host; + proxy_cache_bypass \$http_upgrade; + } +} +EOF + +# check and start nginx service: +sudo nginx -t +sudo systemctl restart nginx + diff --git a/doc/source/methodologies/monitoring/configs/elasticsearch-heka/deploy_heka.sh b/doc/source/methodologies/monitoring/configs/elasticsearch-heka/deploy_heka.sh new file mode 100755 index 0000000..57b13ae --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/elasticsearch-heka/deploy_heka.sh @@ -0,0 +1,60 @@ +#!/bin/bash +set -e +export ANSIBLE_HOST_KEY_CHECKING=False +export SSH_USER="root" +export SSH_PASS="r00tme" +cd $(dirname $(realpath $0)) + +ENV=${1} +if [ -z "${ENV}" ]; then + echo "Please provide env number $(basename $0) [1|2|3|4|5|6]" + exit 1 +fi +# elastic for k8s at rackspace as default +ELASTICSEARCH_NODE=${ELASTICSEARCH_NODE:-172.20.9.3} +# heka 0.10.0 as default +HEKA_PACKAGE_URL=${HEKA_PACKAGE_URL:-https://github.com/mozilla-services/heka/releases/download/v0.10.0/heka_0.10.0_amd64.deb} +KUBE_MAIN_NODE="172.20.8.6${ENV}" +SSH_OPTS="-q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" + +echo "Get clusters nodes ..." +NODES_TMP=$(sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${KUBE_MAIN_NODE} 'kubectl get nodes -o jsonpath='"'"'{.items[*].status.addresses[?(@.type=="InternalIP")].address}'"'"'') +ALL_IP_ON_KUBER_NODE=$(sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${KUBE_MAIN_NODE} ip addr | grep 172.20 | awk '{print $2}' | awk -F'/' '{print $1}') +GREP_STRING_TMP="" +for i in $ALL_IP_ON_KUBER_NODE; do + GREP_STRING_TMP="${GREP_STRING_TMP}${i}|" +done +GREP_STRING=${GREP_STRING_TMP:0:-1} +SSH_AUTH="ansible_ssh_user=${SSH_USER} ansible_ssh_pass=${SSH_PASS}" +echo "[main-kuber]" > cluster-hosts +echo "${KUBE_MAIN_NODE} ${SSH_AUTH}" >> cluster-hosts +echo "[cluster-nodes]" >> cluster-hosts +set +e +# Remove IP of kuber node +for i in ${NODES_TMP} ; do + TMP_VAR=$(echo $i | grep -vE "(${GREP_STRING})") + NODES="${NODES} ${TMP_VAR}" +done +set -e +for i in ${NODES} ; do + if [ "$i" != "${KUBE_MAIN_NODE}" ]; then + echo "${i} ${SSH_AUTH}" >> cluster-hosts + fi +done +echo "[all-cluster-nodes:children]" >> cluster-hosts +echo "main-kuber" >> cluster-hosts +echo "cluster-nodes" >> cluster-hosts + +# Calculate parallel ansible execution +NODES_IPS=( $NODES ) +if [[ "${#NODES_IPS[@]}" -lt 50 ]] && [[ "${#NODES_IPS[@]}" -gt 5 ]]; then + ANSIBLE_FORKS="${#NODES_IPS[@]}" +elif [[ "${#NODES_IPS[@]}" -ge 50 ]]; then + ANSIBLE_FORKS=50 +else + ANSIBLE_FORKS=10 +fi + +echo "Starting ansible ..." +ansible-playbook -v --ssh-extra-args "-o\ StrictHostKeyChecking=no" -f ${ANSIBLE_FORKS} -i ./cluster-hosts -e env_num=${ENV} -e elasticsearch_node="${ELASTICSEARCH_NODE}" -e heka_package_url=${HEKA_PACKAGE_URL} ./deploy-heka.yaml --diff + diff --git a/doc/source/methodologies/monitoring/configs/elasticsearch-heka/heka/00-hekad.toml.j2 b/doc/source/methodologies/monitoring/configs/elasticsearch-heka/heka/00-hekad.toml.j2 new file mode 100644 index 0000000..5373341 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/elasticsearch-heka/heka/00-hekad.toml.j2 @@ -0,0 +1,69 @@ +# vim: set syntax=yaml + +[hekad] +maxprocs = 2 + +[DockerLogInput] +endpoint = "unix:///var/run/docker.sock" +#decoder = "KubeAPI_decoder" +decoder = "MultiDecoder" + +[MultiDecoder] +type = "MultiDecoder" +subs = ["KubeAPI_decoder", "EnvironmentScribbler"] +cascade_strategy = "all" +#log_sub_errors = true +{% raw %} +[KubeAPI_decoder] +type = "PayloadRegexDecoder" +match_regex = '\S+ \S+ .+ (?P\S+)\] (?P[A-Z]+) (?P\S+)\: \((?P\S+)ms\) (?P\d+) \[\[(?P.+)\] (?P\S+)\:(?P\d+)\]' +[KubeAPI_decoder.message_fields] +Type = "KubeAPIlog" +Logger = "Docker" +Code = "%Code%" +Method = "%Method%" +Url|uri = "%Url%" +ResponseTime = "%ResponseTime%" +StatusCode = "%StatusCode%" +Agent = "%Agent%" +RemoteIP|ipv4 = "%RemoteIP%" +RemotePort = "%RemotePort%" +{% endraw %} +[EnvironmentScribbler] +type = "ScribbleDecoder" +[EnvironmentScribbler.message_fields] +Environment = "env-{{ env_num }}" + + +[KubeAPI_to_int] +type = "SandboxFilter" +filename = "lua_filters/kubeapi_to_int.lua" +message_matcher = "Type == 'KubeAPIlog'" + +[ESJsonEncoder] +index = "env-{{ env_num }}-{{ '%{Type}-%{%Y.%m.%d}' }}" +#es_index_from_timestamp = true +type_name = "%{Type}" + +[ElasticSearchOutput] +message_matcher = "Type == 'heka.sandbox.KubeAPIlog' || Type == 'DockerLog'" +server = "http://{{ elasticsearch_node }}:9200" +flush_interval = 5000 +flush_count = 10 +encoder = "ESJsonEncoder" + +[PayloadEncoder] +append_newlines = false +# +[LogOutput] +<<<<<<< HEAD +#message_matcher = "Type == 'KubeAPIlog'" +message_matcher = "TRUE" +#encoder = "ESJsonEncoder" +encoder = "PayloadEncoder" +======= +message_matcher = "Type == 'heka.sandbox.KubeAPIlog' || Type == 'DockerLog'" +#message_matcher = "TRUE" +encoder = "ESJsonEncoder" +#encoder = "PayloadEncoder" +>>>>>>> b0caa3ceb82399dd16465645eebdebf90242662c diff --git a/doc/source/methodologies/monitoring/configs/elasticsearch-heka/heka/kubeapi_to_int.lua.j2 b/doc/source/methodologies/monitoring/configs/elasticsearch-heka/heka/kubeapi_to_int.lua.j2 new file mode 100644 index 0000000..2cabb94 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/elasticsearch-heka/heka/kubeapi_to_int.lua.j2 @@ -0,0 +1,30 @@ +{% raw %} +-- Invert Response time and some more fields to integer type + +local fields = {["ResponseTime"] = 0, ["RemotePort"] = 0, ["StatusCode"] = 0} +local msg = { + Type = "KubeAPIlog", + Severity = 6, + Fields = fields +} + +function process_message () + fields["ResponseTime"] = tonumber(read_message("Fields[ResponseTime]")) + fields["RemotePort"] = tonumber(read_message("Fields[RemotePort]")) + fields["StatusCode"] = tonumber(read_message("Fields[StatusCode]")) + msg.Payload = read_message("Payload") + fields["Code"] = read_message("Fields[Code]") + fields["ContainerID"] = read_message("Fields[ContainerID]") + fields["ContainerName"] = read_message("Fields[ContainerName]") + fields["Environment"] = read_message("Fields[Environment]") + fields["Method"] = read_message("Fields[Method]") + fields["RemoteIP"] = read_message("Fields[RemoteIP]") + fields["Url"] = read_message("Fields[Url]") + local ok, msg = pcall(inject_message, msg) + if not ok then + inject_payload("txt", "error", msg) + end + return 0 +end + +{% endraw %} diff --git a/doc/source/methodologies/monitoring/configs/node1.tar.gz b/doc/source/methodologies/monitoring/configs/node1.tar.gz new file mode 100644 index 0000000..ba36a1a Binary files /dev/null and b/doc/source/methodologies/monitoring/configs/node1.tar.gz differ diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy-graf-prom.yaml b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy-graf-prom.yaml new file mode 100644 index 0000000..f2458cf --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy-graf-prom.yaml @@ -0,0 +1,124 @@ +--- +- hosts: common + remote_user: root + tasks: + - name: Install common packages + apt: name={{ item }} state=installed + with_items: + - python-pip + tags: [ 'always' ] + - name: Install docker for Ubuntu 14.04 + apt: name=docker.io state=installed + when: ansible_distribution == 'Ubuntu' and ansible_distribution_version == '14.04' + tags: [ 'always' ] + - name: Install docker for Ubuntu 16.01 + apt: name=docker state=installed + when: ansible_distribution == 'Ubuntu' and ansible_distribution_version == '16.0.' + tags: [ 'always' ] + - name: Install python deps + pip: name={{ item }} + with_items: + - docker-py + - docker-compose + tags: [ 'always' ] + +- hosts: grafana + remote_user: root + vars: + postgresql_root_user: root + postgresql_root_password: aijoom1Shiex + grafana_postgresql_user: grafana + grafana_postgresql_password: sHskdhos6se + grafana_postgresql_db: grafana + grafana_user: admin + grafana_password: admin + tasks: + - name: Install packages for grafana + apt: name={{ item }} state=installed + with_items: + - postgresql-client-9.3 + - python-psycopg2 + - name: Create postgres data dir + file: path=/var/lib/postgres/data/db state=directory + tags: [ 'grafana' ] + - name: Run postgres in docker + docker_container: + name: postgres + image: 'postgres:latest' + ports: 5432:5432 + volumes: '/var/lib/postgres/data:/var/lib/postgres/data' + env: + POSTGRES_USER: "{{ postgresql_root_user }}" + POSTGRES_PASSWORD: "{{ postgresql_root_password }}" + PGDATA: /var/lib/postgres/data/db + tags: [ 'grafana' ] + - name: Create DB for grafana + postgresql_db: + name: "{{ grafana_postgresql_db }}" + login_user: "{{ postgresql_root_user }}" + login_password: "{{ postgresql_root_password }}" + login_host: localhost + encoding: 'UTF-8' + tags: [ 'grafana' ] + - name: Create user for grafana in postgres + postgresql_user: + name: "{{ grafana_postgresql_user }}" + login_user: "{{ postgresql_root_user }}" + login_password: "{{ postgresql_root_password }}" + login_host: localhost + password: "{{ grafana_postgresql_password }}" + db: grafana + priv: ALL + tags: [ 'grafana' ] + - name: Create data dir for Grafana + file: path=/var/lib/grafana state=directory + tags: [ 'grafana' ] + - name: Start Grafana container + docker_container: + name: grafana + image: 'grafana/grafana:4.0.1' + volumes: '/var/lib/grafana:/var/lib/grafana' + ports: 3000:3000 + env: + GF_SECURITY_ADMIN_PASSWORD: "{{ grafana_user }}" + GF_SECURITY_ADMIN_USER: "{{ grafana_password }}" + GF_DATABASE_TYPE: postgres + GF_DATABASE_HOST: "{{ ansible_default_ipv4.address }}" + GF_DATABASE_NAME: "{{ grafana_postgresql_db }}" + GF_DATABASE_USER: "{{ grafana_postgresql_user }}" + GF_DATABASE_PASSWORD: "{{ grafana_postgresql_password }}" + GF_INSTALL_PLUGINS: grafana-piechart-panel + tags: [ 'grafana' ] + +- hosts: prometheuses + remote_user: root + tasks: + - name: Data dir for prometheus + file: path=/var/lib/prometheus state=directory + tags: [ 'prometheus' ] + - include: docker_prometheus.yaml + +- hosts: prometheus-kuber + remote_user: root + tasks: + - name: Copy prometheus config + template: src=prometheus/prometheus-kuber.yml.j2 dest=/var/lib/prometheus/prometheus.yml + register: prometheus_yml + tags: [ 'prometheus', 'prometheus-conf' ] + - include: docker_prometheus.yaml + - name: Send kill -1 to prometheus if prometheus.yml changed + command: pkill -1 prometheus + when: prometheus_yml.changed + tags: [ 'prometheus', 'prometheus-conf'] +- hosts: prometheus-system + remote_user: root + tasks: + - name: Copy prometheus config + template: src=prometheus/prometheus-system.yml.j2 dest=/var/lib/prometheus/prometheus.yml + register: prometheus_yml + tags: [ 'prometheus', 'prometheus-conf' ] + - include: docker_prometheus.yaml + - name: Send kill -1 to prometheus if prometheus.yml changed + command: pkill -1 prometheus + when: prometheus_yml.changed + tags: [ 'prometheus', 'prometheus-conf'] diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy-telegraf.yaml b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy-telegraf.yaml new file mode 100644 index 0000000..32a9969 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy-telegraf.yaml @@ -0,0 +1,118 @@ +--- +- hosts: all-cluster-nodes + remote_user: root + tasks: + - name: Create user telegraf + user: name=telegraf home=/opt/telegraf + - name: Create /opt/telegraf + file: path=/opt/telegraf state=directory owner=telegraf + - name: Create bin dir for telegraf + file: path=/opt/telegraf/bin state=directory owner=telegraf + - name: Create etc dir for telegraf + file: path=/opt/telegraf/etc state=directory owner=telegraf + - name: Copy telegraf to server + copy: src=../../telegraf/opt/bin/telegraf dest=/opt/telegraf/bin/telegraf mode=0755 + register: telegraf_bin + - name: Copy telegraf.service + copy: src=telegraf/telegraf.service dest=/etc/systemd/system/telegraf.service + register: telegraf_service + - name: Start and enable telegraf + systemd: state=started enabled=yes daemon_reload=yes name=telegraf + - name: Delete allmetrics.tmp.lock + file: path=/opt/telegraf/bin/data/allmetrics.tmp.lock state=absent + when: telegraf_service.changed or telegraf_bin.changed + - name: Restart telegraf if telegraf binary has been changed + systemd: state=restarted name=telegraf + when: telegraf_bin.changed + - name: Install software + apt: name={{ item }} state=installed + with_items: + - sysstat + - numactl + - name: Copy system metric scripts + copy: src=../../telegraf/opt/system_stats/{{ item }} dest=/opt/telegraf/bin/{{ item }} mode=0755 + with_items: + - entropy.sh + - iostat_per_device.sh + - memory_bandwidth.sh + - numa_stat_per_pid.sh + - per_process_cpu_usage.sh + - list_openstack_processes.sh + - network_tcp_queue.sh + - name: Copy pcm-memory-one-line.x + copy: src=../../telegraf/opt/system_stats/intel_pcm_mem/pcm-memory-one-line.x dest=/opt/telegraf/bin/pcm-memory-one-line.x mode=0755 + - name: Add sysctl for pcm + sysctl: name=kernel.nmi_watchdog value=0 state=present reload=yes + - name: Load kernel module msr + modprobe: name=msr state=present + - name: Add module autoload + lineinfile: dest=/etc/modules line='msr' + - name: Add user telegraf to sudoers + lineinfile: + dest: /etc/sudoers + state: present + line: "telegraf ALL=(ALL) NOPASSWD: ALL" + + +- hosts: cluster-nodes + remote_user: root + tasks: + - name: Copy telegraf config + copy: src=./telegraf/telegraf-sys.conf dest=/opt/telegraf/etc/telegraf.conf + register: telegraf_conf + - name: Restart telegraf if config has been changed + systemd: state=restarted name=telegraf + when: telegraf_conf.changed + +- hosts: main-kuber + remote_user: root + tasks: + - name: Copy openstack scripts + copy: src=../../telegraf/opt/osapi/{{ item }} dest=/opt/telegraf/bin/{{ item }} mode=0755 + with_items: + - glog.sh + - osapitime.sh + - vmtime.sh + tags: [ 'openstack' ] + - name: Copy etcd scripts + copy: src=../../telegraf/opt/k8s_etcd/{{ item }} dest=/opt/telegraf/bin/{{ item }} mode=0755 + with_items: + - etcd_get_metrics.sh + - k8s_get_metrics.sh + - name: Install software for scripts + apt: name={{ item }} state=installed + with_items: + - mysql-client + - bc + - jq + tags: [ 'openstack' ] + - name: Create dirs for scripts + file: path=/opt/telegraf/bin/{{ item }} state=directory owner=telegraf + with_items: + - log + - data + - name: Copy telegraf config + template: src=telegraf/telegraf-openstack.conf.j2 dest=/opt/telegraf/etc/telegraf.conf + register: telegraf_conf + tags: [ 'openstack' ] + - name: Delete allmetrics.tmp.lock + file: path=/opt/telegraf/bin/data/allmetrics.tmp.lock state=absent + when: telegraf_conf.changed + - name: Restart telegraf if config has been changed + systemd: state=restarted name=telegraf + when: telegraf_conf.changed + tags: [ 'openstack' ] + +- hosts: all-cluster-nodes + remote_user: root + tasks: + - name: Reload telegraf is service file has been changed + systemd: daemon_reload=yes state=reloaded name=telegraf + when: telegraf_service.changed + +- hosts: main + remote_user: root + tasks: + - name: update prometheus config + template: src=./prometheus/targets.yml.j2 dest=/var/lib/prometheus/targets-{{ cluster_tag }}.yml + tags: [ 'prometheus' ] diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_etcd_collect.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_etcd_collect.sh new file mode 100755 index 0000000..a3f0fb6 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_etcd_collect.sh @@ -0,0 +1,46 @@ +#!/bin/bash +CLUSTER=${1} +TMP_YAML=$(mktemp -u) + +export ANSIBLE_HOST_KEY_CHECKING=False +export SSH_USER="root" +export SSH_PASS="r00tme" +cd $(dirname $(realpath $0)) + +ENV=${1} +if [ -z "${ENV}" ]; then + echo "Please provide env number $(basename $0) [1|2|3|4|5|6]" + exit 1 +fi +PROMETHEUS_HOST="172.20.9.115" +KUBE_MAIN_NODE="172.20.8.6${ENV}" +CLUSTER_TAG="env-${ENV}" + +ETCD="" + +SSH_OPTS="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" + + +TARGETS=$(sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${KUBE_MAIN_NODE} curl -ks https://127.0.0.1:2379/v2/members | python -m json.tool | grep 2379) + +if [ -z "$TARGETS" ]; then + echo "No etcd found" + exit 1 +fi + +for i in ${TARGETS}; do + TEMP_TARGET=${i#\"https://} + ETCD="$ETCD ${TEMP_TARGET%\"}" +done + +echo "- targets:" > ${TMP_YAML} +for i in ${ETCD}; do + echo " - $i" >> ${TMP_YAML} +done +echo " labels:" >> ${TMP_YAML} +echo " env: ${CLUSTER_TAG}" >> ${TMP_YAML} + +echo "Targets file is ready" +cat ${TMP_YAML} +sshpass -p ${SSH_PASS} scp ${SSH_OPTS} ${TMP_YAML} root@${PROMETHEUS_HOST}:/var/lib/prometheus/etcd-env-${1}.yml +rm ${TMP_YAML} diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_grafana.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_grafana.sh new file mode 100755 index 0000000..475197f --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_grafana.sh @@ -0,0 +1,2 @@ +#!/bin/bash +ansible-playbook -i ./hosts ./deploy-graf-prom.yaml --tags "grafana" diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_prometheus.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_prometheus.sh new file mode 100755 index 0000000..2317d7b --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_prometheus.sh @@ -0,0 +1,2 @@ +#!/bin/bash +ansible-playbook -i ./hosts ./deploy-graf-prom.yaml --tags "prometheus" diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_telegraf.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_telegraf.sh new file mode 100755 index 0000000..995a123 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_telegraf.sh @@ -0,0 +1,65 @@ +#!/bin/bash +set -e +export ANSIBLE_HOST_KEY_CHECKING=False +export SSH_USER="root" +export SSH_PASS="r00tme" +cd $(dirname $(realpath $0)) + +ENV=${1} +if [ -z "${ENV}" ]; then + echo "Please provide env number $(basename $0) [1|2|3|4|5|6]" + exit 1 +fi +PROMETHEUS_NODE="172.20.124.25" +KUBE_MAIN_NODE="172.20.8.6${ENV}" +CLUSTER_TAG="env-${ENV}" + +# Secret option +ANSIBLE_TAG=$2 + +SSH_OPTS="-q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" + +echo "Get clusters nodes" + +NODES_TMP=$(sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${KUBE_MAIN_NODE} 'kubectl get nodes -o jsonpath='"'"'{.items[*].status.addresses[?(@.type=="InternalIP")].address}'"'"'') +ALL_IP_ON_KUBER_NODE=$(sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${KUBE_MAIN_NODE} ip addr | grep 172.20 | awk '{print $2}' | awk -F'/' '{print $1}') +GREP_STRING_TMP="" +for i in $ALL_IP_ON_KUBER_NODE; do + GREP_STRING_TMP="${GREP_STRING_TMP}${i}|" +done +GREP_STRING=${GREP_STRING_TMP:0:-1} +SSH_AUTH="ansible_ssh_user=${SSH_USER} ansible_ssh_pass=${SSH_PASS}" +echo "[main]" > cluster-hosts +echo "${PROMETHEUS_NODE} ${SSH_AUTH}" >> cluster-hosts +echo "[main-kuber]" >> cluster-hosts +echo "${KUBE_MAIN_NODE} ${SSH_AUTH}" >> cluster-hosts +echo "[cluster-nodes]" >> cluster-hosts +set +e +# Remove IP of kuber node +for i in ${NODES_TMP} ; do + TMP_VAR=$(echo $i | grep -vE "(${GREP_STRING})") + NODES="${NODES} ${TMP_VAR}" +done +set -e +for i in ${NODES} ; do + if [ "$i" != "${KUBE_MAIN_NODE}" ]; then + echo "${i} ${SSH_AUTH}" >> cluster-hosts + fi +done +echo "[all-cluster-nodes:children]" >> cluster-hosts +echo "main-kuber" >> cluster-hosts +echo "cluster-nodes" >> cluster-hosts +LINES=$(wc -l cluster-hosts | awk '{print $1}') +NUM_NODES=$(($LINES - 7)) +if [ ${NUM_NODES} -le 0 ]; then + echo "Something wrong, $NUM_NODES nodes found" + exit 1 +else + echo "${NUM_NODES} nodes found" +fi + +if [ -z "${ANSIBLE_TAG}" ]; then + ansible-playbook -f 40 -i ./cluster-hosts -e cluster_tag=${CLUSTER_TAG} ./deploy-telegraf.yaml +else + ansible-playbook -f 40 -i ./cluster-hosts -e cluster_tag=${CLUSTER_TAG} -t ${ANSIBLE_TAG} ./deploy-telegraf.yaml +fi diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/docker_prometheus.yaml b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/docker_prometheus.yaml new file mode 100644 index 0000000..0d47b75 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/docker_prometheus.yaml @@ -0,0 +1,10 @@ +--- +- name: Deploy prometheus in docker + docker_container: + name: prometheus + image: 'prom/prometheus:v1.4.0' + ports: 9090:9090 + state: started + volumes: ['/var/lib/prometheus:/prometheus'] + command: '-config.file=/prometheus/prometheus.yml -storage.local.retention 168h0m0s -storage.local.max-chunks-to-persist 3024288 -storage.local.memory-chunks=50502740 -storage.local.num-fingerprint-mutexes=300960' + tags: [ 'prometheus' ] \ No newline at end of file diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/prometheus/prometheus-kuber.yml.j2 b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/prometheus/prometheus-kuber.yml.j2 new file mode 100644 index 0000000..51cc486 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/prometheus/prometheus-kuber.yml.j2 @@ -0,0 +1,58 @@ +global: + scrape_interval: 15s # By default, scrape targets every 15 seconds. + evaluation_interval: 15s # By default, scrape targets every 15 seconds. + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'codelab-monitor' + +rule_files: + # - "first.rules" + # - "second.rules" + +scrape_configs: + + - job_name: 'prometheus' + scrape_interval: 5s + scrape_timeout: 5s + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + static_configs: + - targets: ['172.20.9.115:9090'] + +{% for env_num in range(1,7) %} + - job_name: 'k8-env-{{env_num}}' + scrape_interval: 30s + scrape_timeout: 30s + scheme: https + tls_config: + insecure_skip_verify: true + kubernetes_sd_configs: + - api_server: 'https://172.20.8.6{{env_num}}:443' + role: node + tls_config: + insecure_skip_verify: true + basic_auth: + username: kube + password: changeme + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - source_labels: [__address__] + target_label: env + regex: .* + replacement: env-{{env_num}} + + - job_name: 'etcd-env-{{env_num}}' + scrape_interval: 5s + scrape_timeout: 5s + scheme: https + tls_config: + insecure_skip_verify: true + file_sd_configs: + - files: + - etcd-env-{{env_num}}.yml +{% endfor %} + + diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/prometheus/prometheus-system.yml.j2 b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/prometheus/prometheus-system.yml.j2 new file mode 100644 index 0000000..c9abe28 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/prometheus/prometheus-system.yml.j2 @@ -0,0 +1,33 @@ +global: + scrape_interval: 15s # By default, scrape targets every 15 seconds. + evaluation_interval: 15s # By default, scrape targets every 15 seconds. + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'codelab-monitor' + +rule_files: + # - "first.rules" + # - "second.rules" + +scrape_configs: + + - job_name: 'prometheus' + scrape_interval: 5s + scrape_timeout: 5s + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + static_configs: + - targets: ['172.20.124.25:9090'] + +{% for env_num in range(1,7) %} + - job_name: 'telegraf-systems-env-{{env_num}}' + scrape_interval: 30s + scrape_timeout: 30s + file_sd_configs: + - files: + - targets-env-{{env_num}}.yml +{% endfor %} + + diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/prometheus/targets.yml.j2 b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/prometheus/targets.yml.j2 new file mode 100644 index 0000000..71b2560 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/prometheus/targets.yml.j2 @@ -0,0 +1,6 @@ +- targets: +{% for host in groups['all-cluster-nodes']%} + - {{hostvars[host]['inventory_hostname']}}:9126 +{% endfor %} + labels: + env: {{ cluster_tag }} diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/entropy.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/entropy.sh new file mode 100755 index 0000000..c1a5d3d --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/entropy.sh @@ -0,0 +1,5 @@ +#!/bin/bash +export LANG=C +set -o nounset # Treat unset variables as an error +echo "system entropy=$(cat /proc/sys/kernel/random/entropy_avail)" + diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/etcd_get_metrics.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/etcd_get_metrics.sh new file mode 100755 index 0000000..628be29 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/etcd_get_metrics.sh @@ -0,0 +1,33 @@ +#!/bin/bash -e + +ETCD=/usr/local/bin/etcdctl + +type jq >/dev/null 2>&1 || ( echo "Jq is not installed" ; exit 1 ) +type curl >/dev/null 2>&1 || ( echo "Curl is not installed" ; exit 1 ) + +# get etcd members credentials +MEMBERS="${ETCD} --endpoints https://127.0.0.1:2379 member list" +LEADER_ID=$(eval "$MEMBERS" | awk -F ':' '/isLeader=true/ {print $1}') +LEADER_ENDPOINT=$(eval "$MEMBERS" | awk '/isLeader=true/ {print $4}' | cut -d"=" -f2) +SLAVE_ID=$(eval "$MEMBERS" | grep 'isLeader=false' | head -n 1 | awk -F ":" '{print $1}') +SLAVE_ENDPOINT=$(eval "$MEMBERS" | grep 'isLeader=false' | head -n 1 | awk '{print $4}' | cut -d"=" -f2) + +# member count: +metric_members_count=`curl -s -k https://172.20.9.15:2379/v2/members | jq -c '.members | length'` +metric_total_keys_count=`${ETCD} --endpoints https://127.0.0.1:2379 ls -r --sort | wc -l` +metric_total_size_dataset=`pidof etcd | xargs ps -o rss | awk '{rss=+$1} END {print rss}'` +metric_store_stats=`curl -s -k ${LEADER_ENDPOINT}/v2/stats/store| tr -d \"\{\} | sed -e 's/:/=/g'` +metric_latency_from_leader_avg=`curl -s -k ${LEADER_ENDPOINT}/v2/stats/leader | \ + jq -c ".followers.\"${SLAVE_ID}\".latency.average"` +metric_leader_stats=`curl -s -k ${LEADER_ENDPOINT}/v2/stats/self | \ + jq -c "{ sendBandwidthRate: .sendBandwidthRate, sendAppendRequestCnt: \ + .sendAppendRequestCnt, sendPkgRate: .sendPkgRate }"| tr -d \"\{\} | sed -e 's/:/=/g'` +metric_slave_stats=`curl -s -k ${SLAVE_ENDPOINT}/v2/stats/self | \ + jq -c "{ recvBandwidthRate: .recvBandwidthRate, recvAppendRequestCnt: \ + .recvAppendRequestCnt, recvPkgRate: .recvPkgRate }"| tr -d \"\{\} | sed -e 's/:/=/g'` +cat << EOF +etcd_general_stats,group=etcd_cluster_metrics members_count=${metric_members_count},dataset_size=${metric_total_size_dataset},total_keys_count=${metric_total_keys_count} +etcd_leader_stats,group=etcd_cluster_metrics $metric_leader_stats +etcd_follower_stats,group=etcd_cluster_metrics ${metric_slave_stats},latency_from_leader_avg=${metric_latency_from_leader_avg} +etcd_store_stats,group=etcd_cluster_metrics $metric_store_stats +EOF diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/glog.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/glog.sh new file mode 100755 index 0000000..e668b20 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/glog.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# Logs extractor / parser +# checking that we are good +if [[ -z "${TMP_DIR}" || -z "${POD}" || -z "${CONTAINER}" || -z "${K8S_NS}" || -z "${OS_LOG_FIELDS}" || -z ${CONTID} ]]; then + echo "Required variables are not set, exiting!" + exit 1 +fi +# Variables declaration +SSH_USER="${SSH_USER:-root}" +SSH_PASS="${SSH_PASS:-r00tme}" +LOG_ENTRIES_NUMBER=${LOG_ENTRIES_NUMBER:-1000} +LAST_TIME_STAMP_FILE="${TMP_DIR}/timestamp.tmp" +# get | set last timestamp for log entries +function last_ts_data() +{ + local action + action=${1} + shift + if [ "${action}" == "get" ]; then + if [ -e ${LAST_TIME_STAMP_FILE} ]; then + cat ${LAST_TIME_STAMP_FILE} + fi + else + echo "$*" > ${LAST_TIME_STAMP_FILE} + fi +} +function print_out() +{ + if [ -z "${TMP_METRICS}" ];then + echo "$@" + else + echo "$@" >> ${TMP_METRICS} + fi +} +function micro_to_seconds() +{ + local input + local output + input="${1}" + output=$(echo "scale=4;${input}/1000000" | bc) + if echo ${output} | grep -q '^\..'; then + output="0${output}" + fi + echo "${output}" +} +# extract container logs from k8s +function get_logs() +{ + local sdate + local stime + local scalltime + local lasttimestamp + local is_foundlast + local tmpdata + tmpdata="${TMP_DIR}/tmpdata.log" + if [ -e "${tmpdata}" ]; then rm -f ${tmpdata}; fi + if [ "${CONTAINER}" == "keystone" ];then + sshpass -p ${SSH_PASS} ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ${SSH_USER}@${HOST} "tail -n${LOG_ENTRIES_NUMBER} /var/log/ccp/keystone/keystone-access.log | cut -d' ' -f${OS_LOG_FIELDS} | sed -e 's#\[##g' -e 's#\]##g'" 2>/dev/null > ${tmpdata} + else + sshpass -p ${SSH_PASS} ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ${SSH_USER}@${HOST} "docker logs --tail ${LOG_ENTRIES_NUMBER} ${CONTID} 2>&1 | grep 'INFO' | grep 'GET /' | cut -d' ' -f${OS_LOG_FIELDS}" 2>/dev/null > ${tmpdata} + fi + is_foundlast=false + lasttimestamp=$(last_ts_data "get") + if [ -z "${lasttimestamp}" ]; then + while read log + do + sdate=$(echo ${log} | cut -d' ' -f1) + stime=$(echo ${log} | cut -d' ' -f2) + scalltime=$(echo ${log} | cut -d' ' -f3) + if [ "${CONTAINER}" == "keystone" ];then scalltime=$(micro_to_seconds ${scalltime});fi + if [ ! -z "${scalltime}" ]; then + print_out "os_api_response_time,container=${CONTAINER},pod=${POD},instance=${HOST},requestdate=${sdate},requesttime=${stime} processingtime=${scalltime}" + fi + done < <(cat ${tmpdata}) + sdate=$(tail -n 1 ${tmpdata} | cut -d' ' -f1) + stime=$(tail -n 1 ${tmpdata} | cut -d' ' -f2) + last_ts_data "set" "${sdate}${stime}" + else + while read log + do + sdate=$(echo ${log} | cut -d' ' -f1) + stime=$(echo ${log} | cut -d' ' -f2) + scalltime=$(echo ${log} | cut -d' ' -f3) + if [ "${CONTAINER}" == "keystone" ];then scalltime=$(micro_to_seconds ${scalltime});fi + if [[ "${is_foundlast}" = "false" && "${lasttimestamp}" = "${sdate}${stime}" ]]; then + #echo "FOUND: ${sdate}${stime} ${scalltime}" + is_foundlast=true + continue + fi + if [ "${is_foundlast}" == "true" ]; then + if [ ! -z "${scalltime}" ]; then + print_out "os_api_response_time,container=${CONTAINER},pod=${POD},instance=${HOST},requestdate=${sdate},requesttime=${stime} processingtime=${scalltime}" + fi + fi + done < <(cat ${tmpdata}) + if [ "${is_foundlast}" == "true" ]; then + sdate=$(tail -n 1 ${tmpdata} | cut -d' ' -f1) + stime=$(tail -n 1 ${tmpdata} | cut -d' ' -f2) + last_ts_data "set" "${sdate}${stime}" + fi + fi + rm -f ${tmpdata} +} +# Main logic +get_logs diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/iostat_per_device.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/iostat_per_device.sh new file mode 100755 index 0000000..a6ed297 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/iostat_per_device.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# output from iostat -Ndx is +# Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await r_await w_await svctm %util +export LANG=C +iostat -Ndx | tail -n +4 | head -n -1 | awk '{print "system_per_device_iostat,device="$1" read_merge="$2",write_merge="$3",await="$10",read_await="$11",write_await="$12",util="$14",average_queue="$9}' + diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/k8s_get_metrics.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/k8s_get_metrics.sh new file mode 100755 index 0000000..b6b3293 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/k8s_get_metrics.sh @@ -0,0 +1,75 @@ +#!/bin/bash -e + + +K8S_MASTER=127.0.0.1 + +if [[ $1 ]] ; then + K8S_MASTER=$1 +fi + +type jq >/dev/null 2>&1 || ( echo "Jq is not installed" ; exit 1 ) +type curl >/dev/null 2>&1 || ( echo "Curl is not installed" ; exit 1 ) + +curl_get() { + url="https://${K8S_MASTER}$@" + curl -k -s -u kube:changeme $url || ( echo "Curl failed at: $url" 1>&2; exit 1 ) +} +# gathering frequent API calls output to separate file(in order to avoid long timeouts): +node_file=`mktemp /tmp/XXXXX` +pods_file=`mktemp /tmp/XXXXX` +endpoints_file=`mktemp /tmp/XXXXX` +curl_get "/api/v1/nodes" > $node_file +curl_get "/api/v1/pods" > $pods_file +curl_get "/api/v1/endpoints" > $endpoints_file +# metrics withdrawal: +number_of_namespaces_total=`curl_get "/api/v1/namespaces" | jq '[ .items[] .metadata.name ] | length'` +number_of_services_total=`curl_get "/api/v1/services" | jq -c '[ .items[] .metadata.name ] | length'` +number_of_nodes_total=`jq -c '[ .items[] .metadata.name ] | length' $node_file` +number_of_unsched=`jq -c '[ .items[] | select(.spec.unschedulable != null) .metadata.name ] | length' $node_file` +number_in_each_status=`jq -c '[ .items[] | .status.conditions[] | select(.type == "Ready") .status \ + | gsub("(?.+)"; "number_of_status_\(.a)" ) ] | group_by(.) | map({(.[0]): length}) | add ' $node_file \ + | tr -d \"\{\} | sed -e 's/:/=/g'` +number_of_pods_total=`jq -c '[ .items[] .metadata.name ] | length' $pods_file` +number_of_pods_state_Pending=`jq -c '[ .items[] .status.phase | select(. == "Pending")] | length' $pods_file` +number_of_pods_state_Running=`jq -c '[ .items[] .status.phase | select(. == "Running")] | length' $pods_file` +number_of_pods_state_Succeeded=`jq -c '[ .items[] .status.phase | select(. == "Succeeded")] | length' $pods_file` +number_of_pods_state_Failed=`jq -c '[ .items[] .status.phase | select(. == "Failed")] | length' $pods_file` +number_of_pods_state_Unknown=`jq -c '[ .items[] .status.phase | select(. == "Unknown")] | length' $pods_file` +number_of_pods_per_node=`jq -c '[ .items[] | .spec.nodeName ] | group_by(.) | \ + map("k8s_pods_per_node,group=k8s_cluster_metrics,pod_node=\(.[0]) value=\(length)")' $pods_file \ + | sed -e 's/\["//g' -e 's/"\]//g' -e 's/","/\n/g'` +number_of_pods_per_ns=`jq -c '[ .items[] | .metadata.namespace ] | group_by(.) | \ + map("k8s_pods_per_namespace,group=k8s_cluster_metrics,ns=\(.[0]) value=\(length)")' $pods_file \ + | sed -e 's/\["//g' -e 's/"\]//g' -e 's/","/\n/g'` +number_of_endpoints_each_service=`jq -c '[ .items[] | { service: .metadata.name, endpoints: .subsets[] } | \ + . as { service: $svc, endpoints: $endp } | $endp.addresses | length | . as $addr | $endp.ports | length | \ + . as $prts | "k8s_services,group=k8s_cluster_metrics,service=\($svc) endpoints_number=\($addr * $prts)" ] ' $endpoints_file \ + | sed -e 's/\["//g' -e 's/"\]//g' -e 's/","/\n/g'` +number_of_endpoints_total=`jq -c '[ .items[] | .subsets[] | { addrs: .addresses, ports: .ports } \ + | map (length ) | .[0] * .[1] ] | add' $endpoints_file` +number_of_API_instances=`curl_get "/api/" | jq -c '.serverAddressByClientCIDRs | length'` +number_of_controllers=`curl_get "/api/v1/replicationcontrollers" | jq '.items | length'` +number_of_scheduler_instances=`curl_get /api/v1/namespaces/kube-system/pods?labelSelector='k8s-app=kube-scheduler' \ + | jq -c '.items | length' ` +cluster_resources_CPU=`jq -c '[ .items[] .status.capacity.cpu | tonumber ] | add' $node_file` +cluster_resources_RAM=`jq -c '[ .items[] .status.capacity.memory| gsub("[a-z]+$"; "" ; "i") | tonumber] | add' $node_file` + +# output: +cat << EOF +k8s_nodes,group=k8s_cluster_metrics number_of_nodes_total=${number_of_nodes_total},number_of_unsched=${number_of_unsched} +k8s_nodes_states,group=k8s_cluster_metrics ${number_in_each_status} +k8s_namespaces,group=k8s_cluster_metrics number_of_namespaces_total=${number_of_namespaces_total} +k8s_pods,group=k8s_cluster_metrics number_of_pods_total=${number_of_pods_total} +k8s_pods_states,group=k8s_cluster_metrics number_of_pods_state_Pending=${number_of_pods_state_Pending},number_of_pods_state_Running=${number_of_pods_state_Running},number_of_pods_state_Succeeded=${number_of_pods_state_Succeeded},number_of_pods_state_Failed=${number_of_pods_state_Failed},number_of_pods_state_Unknown=${number_of_pods_state_Unknown} +${number_of_pods_per_node} +${number_of_pods_per_ns} +${number_of_endpoints_each_service} +k8s_services,group=k8s_cluster_metrics number_of_services_total=${number_of_services_total},number_of_endpoints_total=${number_of_endpoints_total} +k8s_number_of_API_instances,group=k8s_cluster_metrics value=${number_of_API_instances} +k8s_number_of_controllers,group=k8s_cluster_metrics value=${number_of_controllers} +k8s_number_of_scheduler_instances,group=k8s_cluster_metrics value=${number_of_scheduler_instances} +k8s_cluster_resources,group=k8s_cluster_metrics cpu_total=${cluster_resources_CPU},ram_total=${cluster_resources_RAM} +EOF + +# cleanup +rm -f $node_file $pods_file $endpoints_file diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/list_openstack_processes.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/list_openstack_processes.sh new file mode 100755 index 0000000..be51ec1 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/list_openstack_processes.sh @@ -0,0 +1,15 @@ +#!/bin/bash +export LANG=C +PS_ALL=$(ps --no-headers -A -o command | grep -vE '(sh|bash)') +M_NAME=system_openstack_list + +MARIADB=$(echo "${PS_ALL}" | grep 'mariadb' | wc -l) +RABBITMQ=$(echo "${PS_ALL}" | grep 'rabbitmq' | wc -l) +KEYSTONE=$(echo "${PS_ALL}" | grep 'keystone' | wc -l) +GLANCE=$(echo "${PS_ALL}" | grep -E '(glance-api|glance-registry)' | wc -l) +CINDER=$(echo "${PS_ALL}" | grep 'cinder' | wc -l) +NOVA=$(echo "${PS_ALL}" | grep -E '(nova-api|nova-conductor|nova-consoleauth|nova-scheduler)' | wc -l) +NEUTRON=$(echo "${PS_ALL}" | grep -E '(neutron-server|neutron-metadata-agent|neutron-dhcp-agent|neutron-l3-agent|neutron-openvswitch-agent)' | wc -l) +OPENVSWITCH=$(echo "${PS_ALL}" | grep -E '(ovsdb-server|ovs-vswitchd|ovsdb-client)' | wc -l) + +echo "${M_NAME} mariadb=${MARIADB},rabbitmq=${RABBITMQ},keystone=${KEYSTONE},glance=${GLANCE},cinder=${CINDER},nova=${NOVA},neutron=${NEUTRON},openvswitch=${OPENVSWITCH}" \ No newline at end of file diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/memory_bandwidth.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/memory_bandwidth.sh new file mode 100755 index 0000000..40f7383 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/memory_bandwidth.sh @@ -0,0 +1,7 @@ +#!/bin/bash +# Output in MB/s +# echo 0 > /proc/sys/kernel/nmi_watchdog +# modprobe msr +export LANG=C +MEM_BW=$(sudo /opt/telegraf/bin/pcm-memory-one-line.x /csv 1 2>/dev/null | tail -n 1 | awk '{print $28}') +echo "system_memory bandwidth=${MEM_BW}" \ No newline at end of file diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/network_tcp_queue.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/network_tcp_queue.sh new file mode 100755 index 0000000..14196b9 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/network_tcp_queue.sh @@ -0,0 +1,13 @@ +#!/bin/bash +export LANG=C +IFS=' +' +SUM_RESV_Q=0 +SUM_SEND_Q=0 +for i in $(netstat -4 -n); do + RESV_Q=$(echo $i | awk '{print $2}') + SEND_Q=$(echo $i | awk '{print $3}') + SUM_RESV_Q=$((${SUM_RESV_Q} + ${RESV_Q})) + SUM_SEND_Q=$((${SUM_SEND_Q} + ${SEND_Q})) +done +echo "system_tcp_queue sum_recv=${SUM_RESV_Q},sum_send=${SUM_SEND_Q}" diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/numa_stat_per_pid.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/numa_stat_per_pid.sh new file mode 100755 index 0000000..989495d --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/numa_stat_per_pid.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -o nounset # Treat unset variables as an error +#set -x +export LANG=C +if [ ! -d '/sys/devices/system/node' ]; then + # This host does not have NUMA + exit 44 +fi +ALL_PROCESS="$(ps --no-headers -A -o pid,ucomm)" +for i in $(echo "${ALL_PROCESS}" | awk '{print $1}'); do + if [ -f "/proc/$i/numa_maps" ]; then + NUM_STAT=$(numastat -p $i) + PROC_NAME=$(echo "${ALL_PROCESS}" | grep -E "( $i |^$i )" | awk '{print $2}') + echo "${NUM_STAT}" | grep Huge | awk -v p=$i -v n=$PROC_NAME \ + '{printf "system_numa_memory_per_pid,pid="p",name="n" memory_huge="$NF","}' + echo "${NUM_STAT}" | grep Heap | awk '{printf "memory_heap="$NF","}' + echo "${NUM_STAT}" | grep Stack | awk '{printf "memory_stack="$NF","}' + echo "${NUM_STAT}" | grep Private | awk '{print "memory_private="$NF}' + fi +done + + diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/osapitime.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/osapitime.sh new file mode 100755 index 0000000..26347fa --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/osapitime.sh @@ -0,0 +1,215 @@ +#!/bin/bash +# Variables declaration +WORKDIR="$(cd "$(dirname ${0})" && pwd)" +OS_LOG_PARSER="${WORKDIR}/glog.sh" +TMPDATADIR="${WORKDIR}/data" +TMP_METRICS="${TMPDATADIR}/allmetrics.tmp" +MODE="${MODE:-bg}" +SCRIPT_LOG_DIR="${WORKDIR}/logs" +SCRIPT_LOG_FILE="${SCRIPT_LOG_DIR}/run_results_$(date +%Y-%m-%d).log" +SCRIPT_LOG_LVL=2 +K8S_NS="${K8S_NS:-ccp}" +declare -a OSCONTROLLER=( +'cinder-api:1,2,21' +'glance-api:1,2,22' +'heat-api:1,2,22' +'neutron-metadata-agent:1,2,17' +'neutron-server:1,2,22' +'nova-api:1,2,21' +'keystone:4,5,11' +) +declare -a OSCOMPUTE=( +'nova-compute:' +) +# crete subfolder under working directory +function mk_dir() +{ + local newdir="${TMPDATADIR}/${1}" + if [ ! -d "${newdir}" ]; then + mkdir -p ${newdir} + fi +} +# log function +function log() +{ + local input + local dtstamp + input="$*" + dtstamp="$(date +%Y-%m-%d_%H%M%S)" + if [ ! -d "${SCRIPT_LOG_DIR}" ]; then + mkdir -p "${SCRIPT_LOG_DIR}" + fi + case "${SCRIPT_LOG_LVL}" in + 3) + if [ ! -z "${input}" ]; then + echo "${dtstamp}: ${input}" | tee -a "${SCRIPT_LOG_FILE}" + fi + ;; + 2) + if [ ! -z "${input}" ]; then + echo "${dtstamp}: ${input}" >> "${SCRIPT_LOG_FILE}" + fi + ;; + 1) + if [ ! -z "${input}" ]; then + echo "${dtstamp}: ${input}" + fi + ;; + *) + ;; + esac +} +# get roles according to predefined in OSCONTROLLER & OSCOMPUTE +function get_role() +{ + local role + local input + local arr_name + local arr_name_fields + role=${1} + shift + input=$* + case ${role} in + "controller") + for i in $(seq 0 $(( ${#OSCONTROLLER[@]} - 1))) + do + arr_name=$(echo ${OSCONTROLLER[${i}]} | cut -d":" -f1) + arr_name_fields=$(echo ${OSCONTROLLER[${i}]} | cut -d":" -f2) + if [[ "${arr_name}" == "${input}" ]]; then + echo "${arr_name_fields}" + return 0 + fi + done + ;; + "compute") + for i in $(seq 0 $(( ${#OSCOMPUTE[@]} - 1))) + do + arr_name=$(echo ${OSCOMPUTE[${i}]} | cut -d":" -f1) + arr_name_fields=$(echo ${OSCOMPUTE[${i}]} | cut -d":" -f2) + if [ "${arr_name}" == "${input}" ]; then + echo "${arr_name_fields}" + return 0 + fi + done + ;; + esac + return 1 +} +# diff in seconds +function tdiff() +{ + local now + local datetime + local result + datetime="$(date -d "${1}" +%s)" + now="$(date +%s)" + result=$(( ${now} - ${datetime} )) + echo ${result} +} +# lock file function +function glock() +{ + local action + local lockfile + local accessdate + local old_in_sec=120 + action="${1}" + # lockfile="${TMP_METRICS}.lock" + lockfile="${TMPDATADIR}/allmetrics.tmp.lock" + if [[ "${action}" == "lock" && ! -e "${lockfile}" ]]; then + touch "${lockfile}" + elif [[ "${action}" == "lock" && -e "${lockfile}" ]]; then + accessdate="$(stat ${lockfile} | grep Modify | cut -d' ' -f2,3)" + if [ "$(tdiff "${accessdate}")" -ge "${old_in_sec}" ]; then + rm "${lockfile}" + touch "${lockfile}" + else + log "Lock file ${lockfile} exists!" + return 1 + fi + else + rm "${lockfile}" + fi + return 0 +} +# wait for parcers launched in backgroud mode +function gatherchildren() +{ + local childrencount + while true + do + childrencount=$(ps axf| grep ${OS_LOG_PARSER} | grep -v grep | wc -l) + if [ "${childrencount}" -eq 0 ]; then + return + fi + log "Children running ${childrencount}." + sleep 1 + done +} +# list of running contaners +function get_k8s_containers() +{ + local cont_host + local cont_pod + local cont_name + local cont_id + local os_log_fields + local cont_tmp_dir + local _raw_data + glock "lock" + if [ "$?" -ne 0 ]; then exit 1;fi + #echo '[' > ${TMP_METRICS} + _raw_data="${TMPDATADIR}/._raw_data" + rm -rf ${_raw_data} + kubectl get pods -n "${K8S_NS}" -o 'go-template={{range .items}}{{if or (ne .status.phase "Succeeded") (eq .status.phase "Running")}}{{.spec.nodeName}},{{.metadata.name}},{{range .status.containerStatuses}}{{.name}},{{.containerID}}{{end}}{{"\n"}}{{end}}{{end}}' > ${_raw_data} + for data in $(cat ${_raw_data}) + do + cont_host=$(echo ${data} | cut -d',' -f1) + cont_pod=$(echo ${data} | cut -d',' -f2) + cont_name=$(echo ${data} | cut -d',' -f3) + cont_id=$(echo ${data} | cut -d',' -f4 | sed 's|^docker://||') + cont_tmp_dir="${cont_host}_${cont_pod}_${cont_name}" + os_log_fields=$(get_role "controller" "${cont_name}") + if [ "$?" -eq 0 ]; then + mk_dir "${cont_tmp_dir}" + export K8S_NS=${K8S_NS} + export TMP_DIR=${TMPDATADIR}/${cont_tmp_dir} + # export TMP_METRICS=${TMP_METRICS} + export TMP_METRICS="${TMPDATADIR}/results/${cont_pod}.tmp" + export CONTID=${cont_id} + export CONTAINER=${cont_name} + export HOST=${cont_host} + export POD=${cont_pod} + export OS_LOG_FIELDS=${os_log_fields} + log "MODE=${MODE} CONTID=${cont_id} TMP_METRICS=${TMP_METRICS} ROLE=controller HOST=${cont_host} POD=${cont_pod} CONTAINER=${cont_name} OS_LOG_FIELDS=${os_log_fields} TMP_DIR=${TMPDATADIR}/${cont_tmp_dir} K8S_NS=${K8S_NS} ${OS_LOG_PARSER}" + if [[ "${MODE}" == "bg" ]]; then + log "${cont_pod} ${cont_name} ${cont_id}" + ${OS_LOG_PARSER} & + else + ${OS_LOG_PARSER} + fi + unset TMP_METRICS + unset CONTID + unset CONTAINER + unset POD + unset OS_LOG_FIELDS + unset HOST + fi + # os_log_fields=$(get_role "compute" "${cont_name}") + # if [ "$?" -eq 0 ]; then + # mk_dir "${cont_tmp_dir}" + # log "ROLE=compute HOST=${cont_host} POD=${cont_pod} CONTAINER=${cont_name} OS_LOG_FIELDS=${os_log_fields} TMP_DIR=${TMPDATADIR}/${cont_tmp_dir} K8S_NS=${K8S_NS} ${OS_LOG_PARSER}" + # fi + done + gatherchildren + if [ "$(ls ${TMPDATADIR}/results/ | wc -l)" -gt 0 ]; then + cat ${TMPDATADIR}/results/*.tmp + log "Resulting lines $(cat ${TMPDATADIR}/results/*.tmp | wc -l)" + rm -rf ${TMPDATADIR}/results/* + fi + glock "unlock" +} +# Main logic +mk_dir +mk_dir "results" +get_k8s_containers diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/per_process_cpu_usage.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/per_process_cpu_usage.sh new file mode 100755 index 0000000..07f5940 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/per_process_cpu_usage.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export LANG=C +for i in $(ps --no-headers -A -o pid); do + pidstat -p $i | tail -n 1 | grep -v PID | awk '{print "system_per_process_cpu_usage,process="$9" user="$4",system="$5}' +done + diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/vmtime.sh b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/vmtime.sh new file mode 100755 index 0000000..3193166 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/vmtime.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# +WORKDIR="$(cd "$(dirname ${0})" && pwd)" +SCRIPT="${WORKDIR}/$(basename ${0})" +MYSQLUSER="nova" +MYSQPASSWD="password" +MYSQLHOST="mariadb.ccp" +avgdata=$(mysql -u${MYSQLUSER} -p${MYSQPASSWD} -h ${MYSQLHOST} -D nova --skip-column-names --batch -e "select diff from (select avg(unix_timestamp(launched_at) - unix_timestamp(created_at)) as diff from instances where vm_state != 'error' and launched_at >= subtime(now(),'30')) t1 where diff IS NOT NULL;" 2>/dev/null | sed 's/\t/,/g';) +if [ ! -z "${avgdata}" ]; then + echo "vm_spawn_avg_time timediffinsec=${avgdata}" +fi + diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/telegraf-openstack.conf.j2 b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/telegraf-openstack.conf.j2 new file mode 100644 index 0000000..cb3a270 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/telegraf-openstack.conf.j2 @@ -0,0 +1,116 @@ +[global_tags] +metrics_source="system_openstack" +[agent] + interval = "10s" + round_interval = true + metric_batch_size = 1000 + metric_buffer_limit = 10000 + collection_jitter = "0s" + flush_interval = "15s" + flush_jitter = "5s" + precision = "" + debug = false + quiet = false + hostname = "" + omit_hostname = false +[[outputs.prometheus_client]] + listen = ":9126" +[[inputs.cpu]] + percpu = true + totalcpu = true + fielddrop = ["time_*"] +[[inputs.disk]] + ignore_fs = ["tmpfs", "devtmpfs"] +[[inputs.diskio]] +[[inputs.kernel]] +[[inputs.mem]] +[[inputs.processes]] +[[inputs.swap]] +[[inputs.system]] +[[inputs.kernel_vmstat]] +[[inputs.net]] +[[inputs.netstat]] +[[inputs.exec]] + interval = "15s" + commands = [ + "/opt/telegraf/bin/vmtime.sh", + ] + timeout = "30s" + data_format = "influx" +[[inputs.exec]] + interval = "30s" + commands = [ + "/opt/telegraf/bin/osapitime.sh", + ] + timeout = "60s" + data_format = "influx" +[[inputs.exec]] + interval = "15s" + commands = [ + "/opt/telegraf/bin/etcd_get_metrics.sh" + ] + timeout = "30s" + data_format = "influx" +[[inputs.exec]] + interval = "15s" + commands = [ + "/opt/telegraf/bin/k8s_get_metrics.sh" + ] + timeout = "30s" + data_format = "influx" +[[inputs.openstack]] + interval = '40s' + identity_endpoint = "http://keystone.ccp.svc.cluster.local:5000/v3" + domain = "default" + project = "admin" + username = "admin" + password = "password" +[[inputs.exec]] + interval = "15s" + commands = [ + "/opt/telegraf/bin/iostat_per_device.sh" + ] + timeout = "30s" + data_format = "influx" +[[inputs.exec]] + interval = "15s" + commands = [ + "/opt/telegraf/bin/per_process_cpu_usage.sh" + ] + timeout = "30s" + data_format = "influx" +[[inputs.exec]] + interval = "15s" + commands = [ + "/opt/telegraf/bin/entropy.sh" + ] + timeout = "30s" + data_format = "influx" + [[inputs.exec]] + interval = "60s" + commands = [ + "/opt/telegraf/bin/numa_stat_per_pid.sh" + ] + timeout = "60s" + data_format = "influx" +[[inputs.exec]] + interval = "15s" + commands = [ + "/opt/telegraf/bin/memory_bandwidth.sh" + ] + timeout = "30s" + data_format = "influx" +[[inputs.exec]] + interval = "15s" + commands = [ + "/opt/telegraf/bin/list_openstack_processes.sh" + ] + timeout = "30s" + data_format = "influx" +[[inputs.exec]] + interval = "15s" + commands = [ + "/opt/telegraf/bin/network_tcp_queue.sh" + ] + timeout = "30s" + data_format = "influx" diff --git a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/telegraf-sys.conf b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/telegraf-sys.conf new file mode 100644 index 0000000..4e7f610 --- /dev/null +++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/telegraf-sys.conf @@ -0,0 +1,81 @@ +[global_tags] +metrics_source="system" +[agent] + interval = "10s" + round_interval = true + metric_batch_size = 1000 + metric_buffer_limit = 10000 + collection_jitter = "0s" + flush_interval = "15s" + flush_jitter = "5s" + precision = "" + debug = false + quiet = false + hostname = "" + omit_hostname = false +[[outputs.prometheus_client]] + listen = ":9126" +[[inputs.cpu]] + percpu = true + totalcpu = true + fielddrop = ["time_*"] +[[inputs.disk]] + ignore_fs = ["tmpfs", "devtmpfs"] +[[inputs.diskio]] +[[inputs.kernel]] +[[inputs.mem]] +[[inputs.processes]] +[[inputs.swap]] +[[inputs.system]] +[[inputs.kernel_vmstat]] +[[inputs.net]] +[[inputs.netstat]] +[[inputs.exec]] + interval = "15s" + commands = [ + "/opt/telegraf/bin/iostat_per_device.sh" + ] + timeout = "30s" + data_format = "influx" +[[inputs.exec]] + interval = "15s" + commands = [ + "/opt/telegraf/bin/per_process_cpu_usage.sh" + ] + timeout = "30s" + data_format = "influx" +[[inputs.exec]] + interval = "15s" + commands = [ + "/opt/telegraf/bin/entropy.sh" + ] + timeout = "30s" + data_format = "influx" + [[inputs.exec]] + interval = "60s" + commands = [ + "/opt/telegraf/bin/numa_stat_per_pid.sh" + ] + timeout = "60s" + data_format = "influx" +[[inputs.exec]] + interval = "15s" + commands = [ + "/opt/telegraf/bin/memory_bandwidth.sh" + ] + timeout = "30s" + data_format = "influx" +[[inputs.exec]] + interval = "15s" + commands = [ + "/opt/telegraf/bin/list_openstack_processes.sh" + ] + timeout = "30s" + data_format = "influx" +[[inputs.exec]] + interval = "15s" + commands = [ + "/opt/telegraf/bin/network_tcp_queue.sh" + ] + timeout = "30s" + data_format = "influx" diff --git a/doc/source/methodologies/monitoring/index.rst b/doc/source/methodologies/monitoring/index.rst new file mode 100644 index 0000000..f5e2b09 --- /dev/null +++ b/doc/source/methodologies/monitoring/index.rst @@ -0,0 +1,948 @@ + +.. _Methodology_for_Containerized_Openstack_Monitoring: + +************************************************** +Methodology for Containerized Openstack Monitoring +************************************************** + +:Abstract: + + This document describes one of the Containerized Openstack monitoring solutions + to provide scalable and comprehensive architecture and obtain all crucial performance + metrics on each structure layer. + + +Containerized Openstack Monitoring Architecture +=============================================== + + This part of documentation describes required performance metrics in each + distinguished Containerized Openstack layer. + +Containerized Openstack comprises three layers where Monitoring System should +be able to query all necessary counters: + - OS layer + - Kubernetes layer + - Openstack layer + +Monitoring instruments must be logically divided in two groups: + - Monitoring Server Side + - Node Client Side + +Operation System Layer +---------------------- + +We were using Ubuntu Xenial on top of bare-metal servers for both server and node side. + +Baremetal hardware description +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We deployed everything at 200 servers environment with following hardware characteristics: + +.. table:: + + +-------+----------------+------------------------+ + |server |vendor,model |HP,DL380 Gen9 | + +-------+----------------+------------------------+ + |CPU |vendor,model |Intel,E5-2680 v3 | + | +----------------+------------------------+ + | |processor_count |2 | + | +----------------+------------------------+ + | |core_count |12 | + | +----------------+------------------------+ + | |frequency_MHz |2500 | + +-------+----------------+------------------------+ + |RAM |vendor,model |HP,752369-081 | + | +----------------+------------------------+ + | |amount_MB |262144 | + +-------+----------------+------------------------+ + |NETWORK|interface_name |p1p1 | + | +----------------+------------------------+ + | |vendor,model |Intel,X710 Dual Port | + | +----------------+------------------------+ + | |bandwidth |10G | + +-------+----------------+------------------------+ + |STORAGE|dev_name |/dev/sda | + | +----------------+------------------------+ + | |vendor,model | | raid10 - HP P840 | + | | | | 12 disks EH0600JEDHE | + | +----------------+------------------------+ + | |SSD/HDD |HDD | + | +----------------+------------------------+ + | |size | 3,6TB | + +-------+----------------+------------------------+ + +Operating system configuration +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Baremetal nodes were provisioned with Cobbler with our in-home preseed scripts. +OS versions we used: + +.. table:: Versions Operating Systems + + +--------------------+-----------------------------------------+ + |Software |Version | + +--------------------+-----------------------------------------+ + |Ubuntu |Ubuntu 16.04.1 LTS | + +--------------------+-----------------------------------------+ + |Kernel |4.4.0-47-generic | + +--------------------+-----------------------------------------+ + +You can find /etc folder contents from the one of the typical system we were using: + +:download:`etc_tarball ` + +Required system metrics +^^^^^^^^^^^^^^^^^^^^^^^ + +At this layer we must get this list of processes: + +.. table:: + + +------------------------+-----------------------------------------+ + |List of processes |Mariadb | + | +-----------------------------------------+ + | |Rabbitmq | + | |-----------------------------------------+ + | |Keystone | + | +-----------------------------------------+ + | |Glance | + | +-----------------------------------------+ + | |Cinder | + | +-----------------------------------------+ + | |Nova | + | +-----------------------------------------+ + | |Neutron | + | +-----------------------------------------+ + | |Openvswitch | + | +-----------------------------------------+ + | |Kubernetes | + +------------------------+-----------------------------------------+ + +And following list of metrics: + +.. table:: + + +------------------------+-----------------------------------------+ + |Node load average |1min | + | +-----------------------------------------+ + | |5min | + | |-----------------------------------------+ + | |15min | + +------------------------+-----------------------------------------+ + |Global process stats |Running | + | +-----------------------------------------+ + | |Stopped | + | |-----------------------------------------+ + | |Waiting | + +------------------------+-----------------------------------------+ + |Global CPU Usage | Steal | + | +-----------------------------------------+ + | | Wait | + | +-----------------------------------------+ + | | User | + | +-----------------------------------------+ + | | System | + | +-----------------------------------------+ + | | Interrupt | + | +-----------------------------------------+ + | | Nice | + | +-----------------------------------------+ + | | Idle | + +------------------------+-----------------------------------------+ + |Per CPU Usage | User | + | +-----------------------------------------+ + | | System | + +------------------------+-----------------------------------------+ + |Global memory usage |bandwidth | + | +-----------------------------------------+ + | |Cached | + | +-----------------------------------------+ + | |Buffered | + | +-----------------------------------------+ + | |Free | + | +-----------------------------------------+ + | |Used | + | +-----------------------------------------+ + | |Total | + +------------------------+-----------------------------------------+ + |Numa monitoring |Numa_hit | + |For each node +-----------------------------------------+ + | |Numa_miss | + | |-----------------------------------------+ + | |Numa_foreign | + | +-----------------------------------------+ + | |Local_node | + | +-----------------------------------------+ + | |Other_node | + +------------------------+-----------------------------------------+ + |Numa monitoring |Huge | + |For each pid +-----------------------------------------+ + | |Heap | + | |-----------------------------------------+ + | |Stack | + | +-----------------------------------------+ + | |Private | + +------------------------+-----------------------------------------+ + |Global IOSTAT \+ |Merge reads /s | + |Per device IOSTAT +-----------------------------------------+ + | |Merge write /s | + | +-----------------------------------------+ + | |read/s | + | +-----------------------------------------+ + | |write/s | + | +-----------------------------------------+ + | |Read transfer | + | +-----------------------------------------+ + | |Write transfer | + | +-----------------------------------------+ + | |Read latency | + | +-----------------------------------------+ + | |Write latency | + | +-----------------------------------------+ + | |Write transfer | + | +-----------------------------------------+ + | |Queue size | + | +-----------------------------------------+ + | |Await | + +------------------------+-----------------------------------------+ + |Network per interface |Octets /s (in, out) | + | +-----------------------------------------+ + | |Packet /s (in, out) | + | |-----------------------------------------+ + | |Dropped /s | + +------------------------+-----------------------------------------+ + |Other system metrics |Entropy | + | +-----------------------------------------+ + | |DF per device | + +------------------------+-----------------------------------------+ + +Kubernetes Layer +---------------- + +`Kargo`_ from `Fuel-CCP-installer`_ was our main tool to deploy K8S + on top of provisioned systems (monitored nodes). + + Kargo sets up Kubernetes in the following way: + + - masters: Calico, Kubernetes API services + - nodes: Calico, Kubernetes minion services + - etcd: etcd service + +Kargo deployment parameters +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can find Kargo deployment script in `Kargo deployment script`_ section + +.. code:: bash + + docker_options: "--insecure-registry 172.20.8.35:5000 -D" + upstream_dns_servers: [172.20.8.34, 8.8.4.4] + nameservers: [172.20.8.34, 8.8.4.4] + kube_service_addresses: 10.224.0.0/12 + kube_pods_subnet: 10.240.0.0/12 + kube_network_node_prefix: 22 + kube_apiserver_insecure_bind_address: "0.0.0.0" + dns_replicas: 3 + dns_cpu_limit: "100m" + dns_memory_limit: "512Mi" + dns_cpu_requests: "70m" + dns_memory_requests: "70Mi" + deploy_netchecker: false + +.. table:: + + +----------------------+-----------------------------------------+ + |Software |Version | + +----------------------+-----------------------------------------+ + |`Fuel-CCP-Installer`_ |6fd81252cb2d2c804f388337aa67d4403700f094 | + | | | + +----------------------+-----------------------------------------+ + |`Kargo`_ |2c23027794d7851ee31363c5b6594180741ee923 | + +----------------------+-----------------------------------------+ + +Required K8S metrics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Here we should get K8S health +metrics and ETCD performance metrics: + +.. table:: + + +------------------------+-----------------------------------------+ + |ETCD performance metrics|members count / states | + | +-----------------------------------------+ + | |numbers of keys in a cluster | + | |-----------------------------------------+ + | |Size of data set | + | +-----------------------------------------+ + | |Avg. latency from leader to followers | + | +-----------------------------------------+ + | |Bandwidth rate, send/receive | + | +-----------------------------------------+ + | |Create store success/fail | + | +-----------------------------------------+ + | |Get success/fail | + | +-----------------------------------------+ + | |Set success/fail | + | +-----------------------------------------+ + | |Package rate, send/receive | + | +-----------------------------------------+ + | |Expire count | + | +-----------------------------------------+ + | |Update success/fail | + | +-----------------------------------------+ + | |Compare-and-swap success/fail | + | +-----------------------------------------+ + | |Watchers | + | +-----------------------------------------+ + | |Delete success/fail | + | +-----------------------------------------+ + | |Compare-and-delete success/fail | + | +-----------------------------------------+ + | |Append req, send/ receive | + +------------------------+-----------------------------------------+ + |K8S health metrics |Number of node in each state | + | +-----------------------------------------+ + | |Total number of namespaces | + | +-----------------------------------------+ + | |Total number of PODs per cluster,node,ns | + | +-----------------------------------------+ + | |Total of number of services | + | +-----------------------------------------+ + | |Endpoints in each service | + | +-----------------------------------------+ + | |Number of API service instances | + | +-----------------------------------------+ + | |Number of controller instances | + | +-----------------------------------------+ + | |Number of scheduler instances | + | +-----------------------------------------+ + | |Cluster resources, scheduler view | + +------------------------+-----------------------------------------+ + |K8S API log analysis |Number of responses (per each HTTP code) | + | +-----------------------------------------+ + | |Response Time | + +------------------------+-----------------------------------------+ + +For last two metrics we should utilize log collector to store and parse all +log records within K8S environments. + +Openstack Layer +----------------- + +CCP stands for "Containerized Control Plane". CCP aims to build, run and manage +production-ready OpenStack containers on top of Kubernetes cluster. + +.. table:: + + +--------------------+-----------------------------------------+ + |Software |Version | + +--------------------+-----------------------------------------+ + |`Fuel-CCP`_ |8570d0e0e512bd16f8449f0a10b1e3900fd09b2d | + +--------------------+-----------------------------------------+ + + +CCP configuration +^^^^^^^^^^^^^^^^^ + +CCP was deployed on top of 200 nodes K8S cluster in the following configuration: + +.. code-block:: yaml + + node[1-3]: Kubernetes + node([4-6])$: # 4-6 + roles: + - controller + - openvswitch + node[7-9]$: # 7-9 + roles: + - rabbitmq + node10$: # 10 + roles: + - galera + node11$: # 11 + roles: + - heat + node(1[2-9])$: # 12-19 + roles: + - compute + - openvswitch + node[2-9][0-9]$: # 20-99 + roles: + - compute + - openvswitch + node(1[0-9][0-9])$: # 100-199 + roles: + - compute + - openvswitch + node200$: + roles: + - backup + + +CCP Openstack services list ( `versions.yaml`_ ): + + +.. code-block:: yaml + + openstack/cinder: + git_ref: stable/newton + git_url: https://github.com/openstack/cinder.git + openstack/glance: + git_ref: stable/newton + git_url: https://github.com/openstack/glance.git + openstack/heat: + git_ref: stable/newton + git_url: https://github.com/openstack/heat.git + openstack/horizon: + git_ref: stable/newton + git_url: https://github.com/openstack/horizon.git + openstack/keystone: + git_ref: stable/newton + git_url: https://github.com/openstack/keystone.git + openstack/neutron: + git_ref: stable/newton + git_url: https://github.com/openstack/neutron.git + openstack/nova: + git_ref: stable/newton + git_url: https://github.com/openstack/nova.git + openstack/requirements: + git_ref: stable/newton + git_url: https://git.openstack.org/openstack/requirements.git + openstack/sahara-dashboard: + git_ref: stable/newton + git_url: https://git.openstack.org/openstack/sahara-dashboard.git + + +`K8S Ingress Resources`_ rules were enabled during CCP deployment to expose Openstack services +endpoints to external routable network. + + +See CCP deployment script and configuration files in the +`CCP deployment and configuration files`_ section. + +Required Openstack-related metrics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +At this layer we should get openstack environment metrics, +API and resources utilization metrics. + +.. table:: Versions of CCP-related software + + +------------------------+-----------------------------------------+ + |Openstack metrics |Total number of controller nodes | + | +-----------------------------------------+ + | |Total number of services | + | |-----------------------------------------+ + | |Total number of compute nodes | + | +-----------------------------------------+ + | |Total number of nodes | + | +-----------------------------------------+ + | |Total number of VMs | + | +-----------------------------------------+ + | |Number of VMs per tenant, per node | + | +-----------------------------------------+ + | |Resource utilization per project,service | + | +-----------------------------------------+ + | |Total number of tenants | + | +-----------------------------------------+ + | |API request time | + | +-----------------------------------------+ + | |Mean time to spawn VM | + +------------------------+-----------------------------------------+ + +Implementation +============== + +This part of documentation describes Monitoring System implementation. +Here is software list that we chose to realize all required tasks: + +.. table:: + + +-----------------------------------------+-----------------------------------------+ + |Monitoring Node Server Side |Monitored Node Client Side | + +--------------------+--------------------+--------------------+--------------------+ + |Metrics server |Log storage |Metrics agent |Log collector | + | | | | | + +--------------------+--------------------+--------------------+--------------------+ + | `Prometheus`_ \+ | `ElasticSearch`_ |`Telegraf`_ | `Heka`_ | + | `Grafana`_ | \+ `Kibana`_ | | | + +--------------------+--------------------+--------------------+--------------------+ + +Server Side Software +--------------------- + +Prometheus +^^^^^^^^^^ + +.. table:: + + +--------------------+-----------------------------------------+ + |Software |Version | + +--------------------+-----------------------------------------+ + |`Prometheus GitHub`_|7e369b9318a4d5d97a004586a99f10fa51a46b26 | + +--------------------+-----------------------------------------+ + +Due to high load rate we faced an issue with Prometheus performance at metrics count up to 15 millions. +We split Prometheus setup in 2 standalone nodes. First node - to poll API metrics from K8S-related services +that natively available at `/metrics` uri and exposed by K8S API and ETCD API by default. +Second node - to store all other metrics that should be collected and calculated locally on environment +servers via Telegraf. + +Prometheus nodes deployments scripts and configuration files could be found at `Prometheus deployment and configuration files`_ section + +Grafana +^^^^^^^ + +.. table:: + + +--------------------+-----------------------------------------+ + |Software |Version | + +--------------------+-----------------------------------------+ + |`Grafana`_ |v4.0.1 | + +--------------------+-----------------------------------------+ + +Grafana was used as a metrics visualizer with several dashboards for each metrics group. +Separate individual dashboards were built for each group of metrics: + +- System nodes metrics +- Kubernetes metrics +- ETCD metrics +- Openstack metrics + +You can find their setting at `Grafana dashboards configuration`_ + +Grafana server deployment script: + +.. code-block:: bash + + #!/bin/bash + ansible-playbook -i ./hosts ./deploy-graf-prom.yaml --tags "grafana" + +It uses the same yaml configuration file `deploy-graf-prom.yaml`_ from `Prometheus deployment and configuration files`_ section. + +ElasticSearch +^^^^^^^^^^^^^ + +.. table:: + + +--------------------+-----------------------------------------+ + |Software |Version | + +--------------------+-----------------------------------------+ + |`ElasticSearch`_ |2.4.2 | + +--------------------+-----------------------------------------+ + +ElasticSearch is well-known proven log storage and we used it as a standalone +node for collecting Kubernetes API logs and all other logs from containers across environment. +For appropriate performance at 200 nodes lab we increased `ES_HEAP_SIZE` from default 1G to 10G +in /etc/default/elasticsearch configuration file. + +Elastic search and Kibana dashboard were installed with +`deploy_elasticsearch_kibana.sh`_ deployment script. + +Kibana +^^^^^^ + +.. table:: + + +--------------------+-----------------------------------------+ + |Software |Version | + +--------------------+-----------------------------------------+ + |`Kibana`_ |4.5.4 | + +--------------------+-----------------------------------------+ + +We used Kibana as a main visualization tool for Elastic Search. We were able to create chart +graphs based on K8S API logs analysis. Kibana was installed on a single separate node +with a single dashboard representing K8S API Response time graph. + +Dashboard settings: + +:download:`Kibana_dashboard.json ` + +Client side Software +-------------------- + +Telegraf +^^^^^^^^ + +.. table:: + + +--------------------+-----------------------------------------+ + |Software |Version | + +--------------------+-----------------------------------------+ + |`Telegraf`_ |v1.0.0-beta2-235-gbc14ac5 | + | |git: openstack_stats | + | |bc14ac5b9475a59504b463ad8f82ed810feed3ec | + +--------------------+-----------------------------------------+ + +Telegraf was chosen as client-side metrics agent. It provides multiple ways to poll and calculate from variety of +different sources. With regard to its plugin-driven nature, it takes data from different inputs and +exposes calculated metrics in Prometheus format. We used forked version of Telegraf with custom patches to +be able to utilize custom Openstack-input plugin: + +- `GitHub Telegraf Fork`_ +- `Go SDK for OpenStack`_ + +Following automation scripts and configuration files were used to start Telegraf agent +across environment nodes. + +`Telegraf deployment and configuration files`_ + +Below you can see which plugins were used to obtain metrics. + +Standart Plugins +"""""""""""""""" + +.. code:: bash + + inputs.cpu CPU + inputs.disk + inputs.diskio + inputs.kernel + inputs.mem + inputs.processes + inputs.swap + inputs.system + inputs.kernel_vmstat + inputs.net + inputs.netstat + inputs.exec + +Openstack input plugin +"""""""""""""""""""""" +`inputs.openstack` custom plugin was used to gather the most of required Openstack-related metrics. + +settings: + +.. code:: bash + + interval = '40s' + identity_endpoint = "http://keystone.ccp.svc.cluster.local:5000/v3" + domain = "default" + project = "admin" + username = "admin" + password = "password" + + +`System.exec` plugin +"""""""""""""""""""" +`system.exec` plugin was used to trigger scripts to poll +and calculate all non-standard metrics. + +common settings: + +.. code:: bash + + interval = "15s" + timeout = "30s" + data_format = "influx" + +commands: + +.. code:: bash + + "/opt/telegraf/bin/list_openstack_processes.sh" + "/opt/telegraf/bin/per_process_cpu_usage.sh" + "/opt/telegraf/bin/numa_stat_per_pid.sh" + "/opt/telegraf/bin/iostat_per_device.sh" + "/opt/telegraf/bin/memory_bandwidth.sh" + "/opt/telegraf/bin/network_tcp_queue.sh" + "/opt/telegraf/bin/etcd_get_metrics.sh" + "/opt/telegraf/bin/k8s_get_metrics.sh" + "/opt/telegraf/bin/vmtime.sh" + "/opt/telegraf/bin/osapitime.sh" + +You can see full Telegraf configuration file and its custom input scripts in the +section `Telegraf deployment and configuration files`_. + +Heka +^^^^ + +.. table:: + + +--------------------+-----------------------------------------+ + |Software |Version | + +--------------------+-----------------------------------------+ + |`Heka`_ |0.10.0 | + +--------------------+-----------------------------------------+ + +We chose Heka as log collecting agent for its wide variety of inputs +(possibility to feed data from Docker socket), filters (custom shorthand SandBox filters in LUA language) +and possibility to encode data for ElasticSearch. + +With Heka agent started across environment servers we were able to send containers' logs to ElasticSearch +server. With custom LUA filter we extracted K8S API data and convert it in appropriate format to +visualize API timing counters (Average Response Time). + +Heka deployment scripts and configuration file with LUA custom filter are in +`Heka deployment and configuration`_ section. + +Applications +============ + +Kargo deployment script +----------------------- + +deploy_k8s_using_kargo.sh +^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. literalinclude:: configs/deploy_k8s_using_kargo.sh + :language: bash + +CCP deployment and configuration files +--------------------------------------- + +deploy-ccp.sh +^^^^^^^^^^^^^ + +.. literalinclude:: configs/ccp/deploy-ccp.sh + :language: bash + +ccp.yaml +^^^^^^^^ + +.. literalinclude:: configs/ccp/ccp.yaml + :language: yaml + +configs.yaml +^^^^^^^^^^^^ + +.. literalinclude:: configs/ccp/configs.yaml + :language: yaml + +topology.yaml +^^^^^^^^^^^^^ + +.. literalinclude:: configs/ccp/topology.yaml + :language: yaml + +repos.yaml +^^^^^^^^^^ + +.. literalinclude:: configs/ccp/repos.yaml + :language: yaml + +versions.yaml +^^^^^^^^^^^^^ + +.. literalinclude:: configs/ccp/versions.yaml + :language: yaml + +Prometheus deployment and configuration files +--------------------------------------------- + +Deployment scripts +^^^^^^^^^^^^^^^^^^ + +deploy_prometheus.sh +"""""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/deploy_prometheus.sh + :language: bash + +deploy-graf-prom.yaml +""""""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/deploy-graf-prom.yaml + :language: yaml + +docker_prometheus.yaml +"""""""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/docker_prometheus.yaml + :language: yaml + +deploy_etcd_collect.sh +"""""""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/deploy_etcd_collect.sh + :language: bash + +Configuration files +^^^^^^^^^^^^^^^^^^^ + +prometheus-kuber.yml.j2 +""""""""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/prometheus/prometheus-kuber.yml.j2 + :language: bash + +prometheus-system.yml.j2 +"""""""""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/prometheus/prometheus-system.yml.j2 + :language: bash + +targets.yml.j2 +"""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/prometheus/targets.yml.j2 + :language: bash + +Grafana dashboards configuration +-------------------------------- + +:download:`Systems_nodes_statistics.json ` + +:download:`Kubernetes_statistics.json ` + +:download:`ETCD.json ` + +:download:`OpenStack.json ` + +ElasticSearch deployment script +------------------------------- + +deploy_elasticsearch_kibana.sh +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. literalinclude:: configs/elasticsearch-heka/deploy_elasticsearch_kibana.sh + :language: bash + +Telegraf deployment and configuration files +------------------------------------------- + +deploy_telegraf.sh +^^^^^^^^^^^^^^^^^^ + +.. literalinclude:: configs/prometheus-grafana-telegraf/deploy_telegraf.sh + :language: bash + +deploy-telegraf.yaml +^^^^^^^^^^^^^^^^^^^^ + +.. literalinclude:: configs/prometheus-grafana-telegraf/deploy-telegraf.yaml + :language: yaml + +Telegraf system +^^^^^^^^^^^^^^^ + +telegraf-sys.conf +""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/telegraf-sys.conf + :language: bash + +Telegraf openstack +^^^^^^^^^^^^^^^^^^^ + +telegraf-openstack.conf.j2 +"""""""""""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/telegraf-openstack.conf.j2 + :language: bash + +Telegraf inputs scripts +^^^^^^^^^^^^^^^^^^^^^^^ + +list_openstack_processes.sh +""""""""""""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/list_openstack_processes.sh + :language: bash + +per_process_cpu_usage.sh +"""""""""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/per_process_cpu_usage.sh + :language: bash + +numa_stat_per_pid.sh +"""""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/numa_stat_per_pid.sh + :language: bash + +iostat_per_device.sh +"""""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/iostat_per_device.sh + :language: bash + +memory_bandwidth.sh +""""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/memory_bandwidth.sh + :language: bash + +network_tcp_queue.sh +"""""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/network_tcp_queue.sh + :language: bash + +etcd_get_metrics.sh +""""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/etcd_get_metrics.sh + :language: bash + +k8s_get_metrics.sh +"""""""""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/k8s_get_metrics.sh + :language: bash + +vmtime.sh +""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/vmtime.sh + :language: bash + +osapitime.sh +"""""""""""" + +.. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/osapitime.sh + :language: bash + +Heka deployment and configuration +--------------------------------- + +Deployment +^^^^^^^^^^ + +deploy_heka.sh +"""""""""""""" + +.. literalinclude:: configs/elasticsearch-heka/deploy_heka.sh + :language: bash + +deploy-heka.yaml +"""""""""""""""" + +.. literalinclude:: configs/elasticsearch-heka/deploy-heka.yaml + :language: yaml + +Configuration +^^^^^^^^^^^^^ + +00-hekad.toml.j2 +"""""""""""""""" + +.. literalinclude:: configs/elasticsearch-heka/heka/00-hekad.toml.j2 + :language: bash + +kubeapi_to_int.lua.j2 +""""""""""""""""""""" + +.. literalinclude:: configs/elasticsearch-heka/heka/kubeapi_to_int.lua.j2 + :language: bash + + +.. references: + +.. _Fuel-CCP-Installer: https://github.com/openstack/fuel-ccp-installer +.. _Kargo: https://github.com/kubernetes-incubator/kargo.git +.. _Fuel-CCP: https://github.com/openstack/fuel-ccp +.. _Prometheus: https://prometheus.io/ +.. _Prometheus GitHub: https://github.com/prometheus/prometheus +.. _Grafana: http://grafana.org/ +.. _ElasticSearch: https://www.elastic.co/products/elasticsearch +.. _Kibana: https://www.elastic.co/products/kibana +.. _Telegraf: https://www.influxdata.com/time-series-platform/telegraf/ +.. _GitHub Telegraf Fork: https://github.com/spjmurray/telegraf/tree/openstack_stats/plugins/inputs/openstack +.. _Go SDK for OpenStack: https://github.com/rackspace/gophercloud/ +.. _Heka: https://hekad.readthedocs.io/en/v0.10.0/ +.. _K8S Ingress Resources: http://kubernetes.io/docs/user-guide/ingress/ + +