248 lines
12 KiB
Python
248 lines
12 KiB
Python
# (C) Copyright 2017 Hewlett Packard Enterprise Development LP
|
|
import math
|
|
import requests
|
|
import six
|
|
|
|
from prometheus_client.parser import text_string_to_metric_families
|
|
|
|
import monasca_agent.collector.checks as checks
|
|
import monasca_agent.collector.checks.utils as utils
|
|
|
|
KUBERNETES_LABELS = ['app']
|
|
|
|
|
|
class Prometheus(checks.AgentCheck):
|
|
"""Scrapes metrics from Prometheus endpoints
|
|
Can be configured three ways:
|
|
1. Autodetect endpoints by pod annotations
|
|
2. Autodetect endpoints by services
|
|
3. Manually configure each prometheus endpoints to scrape
|
|
|
|
We autodetect based on the annotations assigned to pods/services.
|
|
|
|
We look for the following entries:
|
|
'prometheus.io/scrape': Only scrape pods that have a value of 'true'
|
|
'prometheus.io/path': If the metrics path is not '/metrics' override this.
|
|
'prometheus.io/port': Scrape the pod on the indicated port instead of the default of '9102'.
|
|
"""
|
|
|
|
def __init__(self, name, init_config, agent_config, instances=None):
|
|
super(Prometheus, self).__init__(name, init_config, agent_config, instances)
|
|
self.connection_timeout = init_config.get("timeout", 3)
|
|
self.auto_detect_endpoints = init_config.get("auto_detect_endpoints", False)
|
|
if self.auto_detect_endpoints:
|
|
self.kubernetes_connector = None
|
|
self.detect_method = init_config.get("detect_method", "pod").lower()
|
|
self.kubelet_url = None
|
|
if instances is not None and len(instances) > 1:
|
|
raise Exception('Prometheus Client only supports one configured instance if auto detection is set')
|
|
if self.detect_method not in ['pod', 'service']:
|
|
raise Exception('Invalid detect method {}. Must be either pod or service')
|
|
|
|
def check(self, instance):
|
|
dimensions = self._set_dimensions(None, instance)
|
|
del dimensions['hostname']
|
|
if not self.auto_detect_endpoints:
|
|
metric_endpoint = instance.get("metric_endpoint", None)
|
|
if not metric_endpoint:
|
|
self.log.error("metric_endpoint must be defined for each instance")
|
|
return
|
|
endpoint_dimensions = instance.get("default_dimensions", {})
|
|
endpoint_dimensions.update(dimensions)
|
|
self.report_endpoint_metrics(metric_endpoint, endpoint_dimensions)
|
|
else:
|
|
self.kubernetes_labels = instance.get('kubernetes_labels', KUBERNETES_LABELS)
|
|
if not self.kubernetes_connector:
|
|
self.kubernetes_connector = utils.KubernetesConnector(self.connection_timeout)
|
|
if self.detect_method == "pod":
|
|
if not self.kubelet_url:
|
|
try:
|
|
host = self.kubernetes_connector.get_agent_pod_host()
|
|
self.kubelet_url = "http://{}:10255/pods".format(host)
|
|
except Exception as e:
|
|
self.log.error("Could not obtain current host from Kubernetes API {}. "
|
|
"Skipping check".format(e))
|
|
return
|
|
metric_endpoints = self._get_metric_endpoints_by_pod(dimensions)
|
|
# Detect by service
|
|
else:
|
|
metric_endpoints = self._get_metric_endpoints_by_service(dimensions)
|
|
for metric_endpoint, endpoint_dimensions in six.iteritems(metric_endpoints):
|
|
endpoint_dimensions.update(dimensions)
|
|
self.report_endpoint_metrics(metric_endpoint, endpoint_dimensions)
|
|
|
|
def _get_metric_endpoints_by_pod(self, dimensions):
|
|
scrape_endpoints = {}
|
|
# Grab running pods from local Kubelet
|
|
try:
|
|
pods = requests.get(self.kubelet_url, timeout=self.connection_timeout).json()
|
|
except Exception as e:
|
|
exception_message = "Could not get pods from local kubelet with error - {}".format(e)
|
|
self.log.exception(exception_message)
|
|
raise Exception(exception_message)
|
|
|
|
# Iterate through each pod and check if it contains a scrape endpoint
|
|
for pod in pods['items']:
|
|
try:
|
|
pod_metadata = pod['metadata']
|
|
pod_spec = pod['spec']
|
|
pod_status = pod['status']
|
|
if "annotations" not in pod_metadata or not ('containers' in pod_spec and 'podIP' in pod_status):
|
|
# No annotations, containers, or endpoints skipping pod
|
|
continue
|
|
|
|
# Check pod annotations if we should scrape pod
|
|
pod_annotations = pod_metadata['annotations']
|
|
prometheus_scrape = pod_annotations.get("prometheus.io/scrape", "false").lower()
|
|
if prometheus_scrape != "true":
|
|
continue
|
|
pod_ports = []
|
|
pod_containers = pod_spec['containers']
|
|
for container in pod_containers:
|
|
if "ports" in container:
|
|
pod_ports += container['ports']
|
|
pod_name = pod_metadata['name']
|
|
endpoints = self._get_prometheus_endpoint(pod_annotations, pod_ports, pod_name)
|
|
if not endpoints:
|
|
continue
|
|
|
|
# Add pod endpoint to scrape endpoints
|
|
pod_ip = pod_status['podIP']
|
|
# Loop through list of ports and build list of endpoints
|
|
|
|
pod_dimensions = dimensions.copy()
|
|
pod_dimensions.update(utils.get_pod_dimensions(
|
|
self.kubernetes_connector, pod['metadata'],
|
|
self.kubernetes_labels))
|
|
for endpoint in endpoints:
|
|
scrape_endpoint = "http://{}:{}".format(pod_ip, endpoint)
|
|
scrape_endpoints[scrape_endpoint] = pod_dimensions
|
|
self.log.info("Detected pod endpoint - {} with metadata "
|
|
"of {}".format(scrape_endpoint,
|
|
pod_dimensions))
|
|
except Exception as e:
|
|
self.log.warn("Error parsing {} to detect for scraping - {}".format(pod, e))
|
|
continue
|
|
|
|
return scrape_endpoints
|
|
|
|
def _get_metric_endpoints_by_service(self, dimensions):
|
|
scrape_endpoints = {}
|
|
# Grab services from Kubernetes API
|
|
try:
|
|
services = self.kubernetes_connector.get_request("/api/v1/services")
|
|
except Exception as e:
|
|
exception_message = "Could not get services from Kubernetes API with error - {}".format(e)
|
|
self.log.exception(exception_message)
|
|
raise Exception(exception_message)
|
|
|
|
# Iterate through each service and check if it is a scape endpoint
|
|
for service in services['items']:
|
|
service_metadata = service['metadata']
|
|
service_spec = service['spec']
|
|
if "annotations" not in service_metadata or "ports" not in service_spec:
|
|
# No annotations or pods skipping service
|
|
continue
|
|
|
|
# Check service annotations if we should scrape service
|
|
service_annotations = service_metadata['annotations']
|
|
prometheus_scrape = service_annotations.get("prometheus.io/scrape", "false").lower()
|
|
if prometheus_scrape != "true":
|
|
continue
|
|
service_name = service_metadata['name']
|
|
service_ports = service_spec['ports']
|
|
endpoints = self._get_prometheus_endpoint(service_annotations,
|
|
service_ports,
|
|
service_name)
|
|
if not endpoints:
|
|
continue
|
|
|
|
# Add service endpoint to scrape endpoints
|
|
cluster_ip = service_spec['clusterIP']
|
|
service_dimensions = dimensions.copy()
|
|
service_dimensions.update(
|
|
self._get_service_dimensions(service_metadata))
|
|
for endpoint in endpoints:
|
|
scrape_endpoint = "http://{}:{}".format(cluster_ip, endpoint)
|
|
scrape_endpoints[scrape_endpoint] = service_dimensions
|
|
self.log.info("Detected service endpoint - {} with metadata "
|
|
"of {}".format(scrape_endpoint,
|
|
service_dimensions))
|
|
return scrape_endpoints
|
|
|
|
def _get_service_dimensions(self, service_metadata):
|
|
service_dimensions = {'service_name': service_metadata['name'],
|
|
'namespace': service_metadata['namespace']}
|
|
if "labels" in service_metadata:
|
|
service_labels = service_metadata['labels']
|
|
for label in self.kubernetes_labels:
|
|
if label in service_labels:
|
|
service_dimensions[label] = service_labels[label]
|
|
return service_dimensions
|
|
|
|
def _get_prometheus_endpoint(self, annotations, ports, name):
|
|
"""Analyzes annotations and ports to generate a scrape target"""
|
|
pod_index = "containerPort" if self.detect_method == "pod" else "port"
|
|
configured_ports = []
|
|
if "prometheus.io/port" in annotations:
|
|
configured_ports = annotations.get("prometheus.io/port").split(',')
|
|
configured_ports = [int(i) for i in configured_ports]
|
|
|
|
if self.detect_method == "pod" and not configured_ports:
|
|
configured_ports = [9102]
|
|
prometheus_endpoint = annotations.get("prometheus.io/path", "/metrics")
|
|
prometheus_endpoint = prometheus_endpoint.lstrip('/')
|
|
|
|
endpoints = []
|
|
for port in ports:
|
|
for configured_port in configured_ports:
|
|
if port[pod_index] == configured_port:
|
|
# Build up list of ports and prometheus endpoints to return
|
|
endpoints.append("{}/{}".format(configured_port,
|
|
prometheus_endpoint))
|
|
|
|
if len(ports) == 1 and not endpoints:
|
|
self.log.info("Could not find matching port using only port "
|
|
"configured")
|
|
endpoints.append("{}/{}".format(ports[pod_index], prometheus_endpoint))
|
|
|
|
if not endpoints:
|
|
self.log.error("Can not derive which port to use. Due to more "
|
|
"then one port configured and none of them "
|
|
"selected via configurations. {} {} skipped for "
|
|
"scraping".format(self.detect_method, name))
|
|
return endpoints
|
|
|
|
def _send_metrics(self, metric_families, dimensions):
|
|
for metric_family in metric_families:
|
|
for metric in metric_family.samples:
|
|
metric_dimensions = dimensions.copy()
|
|
metric_name = metric[0]
|
|
metric_labels = metric[1]
|
|
metric_value = float(metric[2])
|
|
if math.isnan(metric_value):
|
|
self.log.debug('filtering out NaN value provided for metric %s{%s}', metric_name, metric_labels)
|
|
continue
|
|
# remove empty string dimensions from prometheus labels
|
|
for dim_key, dim_value in metric_labels.items():
|
|
if len(dim_value) > 0:
|
|
metric_dimensions[dim_key] = dim_value
|
|
self.gauge(metric_name, metric_value, dimensions=metric_dimensions, hostname="SUPPRESS")
|
|
|
|
def report_endpoint_metrics(self, metric_endpoint, endpoint_dimensions):
|
|
# Hit metric endpoint
|
|
try:
|
|
result = requests.get(metric_endpoint, timeout=self.connection_timeout)
|
|
except Exception as e:
|
|
self.log.error("Could not get metrics from {} with error {}".format(metric_endpoint, e))
|
|
else:
|
|
result_content_type = result.headers['Content-Type']
|
|
if "text/plain" in result_content_type:
|
|
try:
|
|
metric_families = text_string_to_metric_families(result.text)
|
|
self._send_metrics(metric_families, endpoint_dimensions)
|
|
except Exception as e:
|
|
self.log.error("Error parsing data from {} with error {}".format(metric_endpoint, e))
|
|
else:
|
|
self.log.error("Unsupported content type - {}".format(result_content_type))
|