From 20f9ad47562181363ff57a35ee8a3f79bcd21508 Mon Sep 17 00:00:00 2001 From: Steven Fitzpatrick Date: Wed, 1 Apr 2020 02:54:58 -0500 Subject: [PATCH] [fix] Openstack Exporter - Handle Duplicate Values The CollectorRegistry object does not allow an identical metric to be added, which occasionally happens when the nova, neutron and hypervisor collectors update their caches. Also, - The cinder endpoint has been updated to v3 to resolve a 404 which was occuring during metric collection. - Extra logging settings were removed from files. Log level and format are now set in main.py - misc pep8 fixes to imports and newlines Change-Id: Ia0bebdc1a39b25bdeae47d01625cfb7b89d132eb --- .../exporter/base.py | 1 - .../exporter/check_os_api.py | 12 ++--- .../exporter/cinder_services.py | 15 +++--- .../exporter/hypervisor_stats.py | 46 +++++++++++------- .../exporter/main.py | 23 +++++---- .../exporter/neutron_agents.py | 48 +++++++++++-------- .../exporter/nova_services.py | 43 +++++++++-------- .../exporter/oscache.py | 9 ++-- .../exporter/osclient.py | 6 +-- 9 files changed, 110 insertions(+), 93 deletions(-) diff --git a/prometheus-openstack-exporter/exporter/base.py b/prometheus-openstack-exporter/exporter/base.py index 950677c8..c96ab5e8 100644 --- a/prometheus-openstack-exporter/exporter/base.py +++ b/prometheus-openstack-exporter/exporter/base.py @@ -14,7 +14,6 @@ import re - class OSBase(object): FAIL = 0 OK = 1 diff --git a/prometheus-openstack-exporter/exporter/check_os_api.py b/prometheus-openstack-exporter/exporter/check_os_api.py index 6a92b330..012f80ca 100644 --- a/prometheus-openstack-exporter/exporter/check_os_api.py +++ b/prometheus-openstack-exporter/exporter/check_os_api.py @@ -12,17 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging +from urllib.parse import urlparse + +from prometheus_client import CollectorRegistry, generate_latest, Gauge + from base import OSBase -from urllib.parse import urlparse -from prometheus_client import CollectorRegistry, generate_latest, Gauge -import logging -logging.basicConfig( - level=logging.DEBUG, - format="%(asctime)s:%(levelname)s:%(message)s") logger = logging.getLogger(__name__) - class CheckOSApi(OSBase): """Class to check the status of OpenStack API services.""" diff --git a/prometheus-openstack-exporter/exporter/cinder_services.py b/prometheus-openstack-exporter/exporter/cinder_services.py index d6ce7a80..0160e9d0 100644 --- a/prometheus-openstack-exporter/exporter/cinder_services.py +++ b/prometheus-openstack-exporter/exporter/cinder_services.py @@ -12,14 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from base import OSBase -from collections import Counter -from collections import defaultdict -from prometheus_client import CollectorRegistry, generate_latest, Gauge import logging -logging.basicConfig( - level=logging.DEBUG, - format="%(asctime)s:%(levelname)s:%(message)s") +from collections import Counter, defaultdict + +from prometheus_client import CollectorRegistry, generate_latest, Gauge + +from base import OSBase + logger = logging.getLogger(__name__) @@ -33,7 +32,7 @@ class CinderServiceStats(OSBase): aggregated_workers = defaultdict(Counter) - stats = self.osclient.get_workers('cinder') + stats = self.osclient.get_workers('cinderv3') for worker in stats: service = worker['service'] state = worker['state'] diff --git a/prometheus-openstack-exporter/exporter/hypervisor_stats.py b/prometheus-openstack-exporter/exporter/hypervisor_stats.py index fa407c36..0ea79a18 100644 --- a/prometheus-openstack-exporter/exporter/hypervisor_stats.py +++ b/prometheus-openstack-exporter/exporter/hypervisor_stats.py @@ -12,13 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from base import OSBase +import logging from prometheus_client import CollectorRegistry, generate_latest, Gauge -import logging -logging.basicConfig( - level=logging.DEBUG, - format="%(asctime)s:%(levelname)s:%(message)s") + +from base import OSBase + logger = logging.getLogger(__name__) @@ -59,7 +58,7 @@ class HypervisorStats(OSBase): 'metrics': {'free_vcpus': 0}, } nova_aggregates[agg['name']]['metrics'].update( - {v: 0 for v in list(self.VALUE_MAP.values())} + {v: 0 for v in list(self.VALUE_MAP.values())} ) r = self.osclient.get('nova', 'os-hypervisors/detail') @@ -137,15 +136,28 @@ class HypervisorStats(OSBase): labels = ['region', 'host', 'aggregate', 'aggregate_id'] hypervisor_stats_cache = self.get_cache_data() for hypervisor_stat in hypervisor_stats_cache: - stat_gauge = Gauge( - self.gauge_name_sanitize( - hypervisor_stat['stat_name']), - 'Openstack Hypervisor statistic', - labels, - registry=registry) - label_values = [self.osclient.region, - hypervisor_stat.get('host', ''), - hypervisor_stat.get('aggregate', ''), - hypervisor_stat.get('aggregate_id', '')] - stat_gauge.labels(*label_values).set(hypervisor_stat['stat_value']) + try: + stat_gauge = Gauge( + self.gauge_name_sanitize( + hypervisor_stat['stat_name']), + 'Openstack Hypervisor statistic', + labels, + registry=registry) + label_values = [self.osclient.region, + hypervisor_stat.get('host', ''), + hypervisor_stat.get('aggregate', ''), + hypervisor_stat.get('aggregate_id', '')] + stat_gauge.labels(*label_values).set(hypervisor_stat['stat_value']) + except ValueError: + if 'host' in hypervisor_stat: + location = hypervisor_stat['host'] + elif 'aggregate' in hypervisor_stat: + location = hypervisor_stat['aggregate'] + else: + location = 'N/A' + + logger.debug('Unchanged value for stat {} already present in ' + 'hypervisor registry for host {}; ignoring.' + .format(hypervisor_stat['stat_name'], location)) + return generate_latest(registry) diff --git a/prometheus-openstack-exporter/exporter/main.py b/prometheus-openstack-exporter/exporter/main.py index bdbefa9b..2d2646ab 100644 --- a/prometheus-openstack-exporter/exporter/main.py +++ b/prometheus-openstack-exporter/exporter/main.py @@ -17,10 +17,10 @@ import argparse import yaml import os import urllib.parse - -from http.server import BaseHTTPRequestHandler -from http.server import HTTPServer +import logging +from http.server import BaseHTTPRequestHandler, HTTPServer from socketserver import ForkingMixIn + from prometheus_client import CONTENT_TYPE_LATEST from osclient import OSClient @@ -31,10 +31,10 @@ from nova_services import NovaServiceStats from cinder_services import CinderServiceStats from hypervisor_stats import HypervisorStats -import logging logging.basicConfig( - level=logging.DEBUG, - format="%(asctime)s:%(levelname)s:%(message)s") + level=logging.INFO, + format="%(asctime)s:%(levelname)s: %(message)s") + logger = logging.getLogger(__name__) collectors = [] @@ -57,10 +57,15 @@ class OpenstackExporterHandler(BaseHTTPRequestHandler): stats = collector.get_stats() if stats is not None: output = output + stats - except BaseException: + except BaseException as inst: logger.warning( - "Could not get stats for collector {}".format( - collector.get_cache_key())) + 'Could not get stats for collector {}.' + '"{}" Exception "{}" occured.' + .format( + collector.get_cache_key(), + type(inst), + inst + )) self.send_response(200) self.send_header('Content-Type', CONTENT_TYPE_LATEST) self.end_headers() diff --git a/prometheus-openstack-exporter/exporter/neutron_agents.py b/prometheus-openstack-exporter/exporter/neutron_agents.py index 926183b7..b74e0ce2 100644 --- a/prometheus-openstack-exporter/exporter/neutron_agents.py +++ b/prometheus-openstack-exporter/exporter/neutron_agents.py @@ -12,14 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from base import OSBase -from collections import Counter -from collections import defaultdict -from prometheus_client import CollectorRegistry, generate_latest, Gauge import logging -logging.basicConfig( - level=logging.DEBUG, - format="%(asctime)s:%(levelname)s:%(message)s") +from collections import Counter, defaultdict + +from prometheus_client import CollectorRegistry, generate_latest, Gauge + +from base import OSBase + logger = logging.getLogger(__name__) @@ -67,18 +66,25 @@ class NeutronAgentStats(OSBase): labels = ['region', 'host', 'service', 'state'] neutron_agent_stats_cache = self.get_cache_data() for neutron_agent_stat in neutron_agent_stats_cache: - stat_gauge = Gauge( - self.gauge_name_sanitize( - neutron_agent_stat['stat_name']), - 'Openstack Neutron agent statistic', - labels, - registry=registry) - label_values = [self.osclient.region, - neutron_agent_stat.get('host', ''), - neutron_agent_stat.get('service', ''), - neutron_agent_stat.get('state', '')] - stat_gauge.labels( - * - label_values).set( - neutron_agent_stat['stat_value']) + try: + stat_gauge = Gauge( + self.gauge_name_sanitize( + neutron_agent_stat['stat_name']), + 'Openstack Neutron agent statistic', + labels, + registry=registry) + label_values = [self.osclient.region, + neutron_agent_stat.get('host', ''), + neutron_agent_stat.get('service', ''), + neutron_agent_stat.get('state', '')] + stat_gauge.labels( + * + label_values).set( + neutron_agent_stat['stat_value']) + except ValueError: + logger.debug('Unchanged value for stat {} already present in ' + 'neutron agent registry for host {}; ignoring.' + .format(neutron_agent_stat['stat_name'], + neutron_agent_stat['host'])) + return generate_latest(registry) diff --git a/prometheus-openstack-exporter/exporter/nova_services.py b/prometheus-openstack-exporter/exporter/nova_services.py index 5daf8be7..0c1ef598 100644 --- a/prometheus-openstack-exporter/exporter/nova_services.py +++ b/prometheus-openstack-exporter/exporter/nova_services.py @@ -12,16 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from base import OSBase -from collections import Counter -from collections import defaultdict -from prometheus_client import CollectorRegistry, generate_latest, Gauge import logging -logging.basicConfig( - level=logging.DEBUG, - format="%(asctime)s:%(levelname)s:%(message)s") -logger = logging.getLogger(__name__) +from collections import Counter, defaultdict +from prometheus_client import CollectorRegistry, generate_latest, Gauge + +from base import OSBase + +logger = logging.getLogger(__name__) class NovaServiceStats(OSBase): """ Class to report the statistics on Nova services. @@ -68,15 +66,22 @@ class NovaServiceStats(OSBase): labels = ['region', 'host', 'service', 'state'] services_stats_cache = self.get_cache_data() for services_stat in services_stats_cache: - stat_gauge = Gauge( - self.gauge_name_sanitize( - services_stat['stat_name']), - 'Openstack Nova Service statistic', - labels, - registry=registry) - label_values = [self.osclient.region, - services_stat.get('host', ''), - services_stat.get('service', ''), - services_stat.get('state', '')] - stat_gauge.labels(*label_values).set(services_stat['stat_value']) + try: + stat_gauge = Gauge( + self.gauge_name_sanitize( + services_stat['stat_name']), + 'Openstack Nova Service statistic', + labels, + registry=registry) + label_values = [self.osclient.region, + services_stat.get('host', ''), + services_stat.get('service', ''), + services_stat.get('state', '')] + stat_gauge.labels(*label_values).set(services_stat['stat_value']) + except ValueError: + logger.debug('Unchanged value for stat {} already present in ' + 'nova services registry for host {}; ignoring.' + .format(services_stat['stat_name'], + services_stat['host'])) + return generate_latest(registry) diff --git a/prometheus-openstack-exporter/exporter/oscache.py b/prometheus-openstack-exporter/exporter/oscache.py index c66ac8a0..f7cd9887 100644 --- a/prometheus-openstack-exporter/exporter/oscache.py +++ b/prometheus-openstack-exporter/exporter/oscache.py @@ -14,14 +14,12 @@ from threading import Thread from threading import Lock -from prometheus_client import CollectorRegistry, generate_latest, Gauge from time import sleep, time import logging -logging.basicConfig( - level=logging.DEBUG, - format="%(asctime)s:%(levelname)s:%(message)s") -logger = logging.getLogger(__name__) +from prometheus_client import CollectorRegistry, generate_latest, Gauge + +logger = logging.getLogger(__name__) class ThreadSafeDict(dict): def __init__(self, * p_arg, ** n_arg): @@ -35,7 +33,6 @@ class ThreadSafeDict(dict): def __exit__(self, type, value, traceback): self._lock.release() - class OSCache(Thread): def __init__(self, refresh_interval, region): diff --git a/prometheus-openstack-exporter/exporter/osclient.py b/prometheus-openstack-exporter/exporter/osclient.py index 6c1cbdec..471059a8 100644 --- a/prometheus-openstack-exporter/exporter/osclient.py +++ b/prometheus-openstack-exporter/exporter/osclient.py @@ -18,16 +18,12 @@ import dateutil.tz import requests import simplejson as json import logging -logging.basicConfig( - level=logging.DEBUG, - format="%(asctime)s:%(levelname)s:%(message)s") -logger = logging.getLogger(__name__) +logger = logging.getLogger(__name__) class KeystoneException(Exception): pass - class OSClient(object): """ Base class for querying the OpenStack API endpoints.