338 lines
13 KiB
Python
338 lines
13 KiB
Python
# (C) Copyright 2016 Hewlett Packard Enterprise Development LP
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
from copy import deepcopy
|
|
import errno
|
|
import fcntl
|
|
import json
|
|
import os
|
|
import time
|
|
|
|
from monasca_agent.collector import checks
|
|
|
|
|
|
OK = 0
|
|
FAIL = 1
|
|
|
|
# name used for metrics reported directly by this module
|
|
PLUGIN_METRIC_NAME = 'monasca.json_plugin.status'
|
|
|
|
# Assumes metrics file written every 60 seconds
|
|
DEFAULT_STALE_AGE = 60 * 4 # These are too old to report
|
|
|
|
# Valid attributes of a metric
|
|
METRIC_KEYS = ['name', 'metric', 'timestamp', 'value', 'dimensions',
|
|
'value_meta']
|
|
|
|
|
|
def _now():
|
|
"""Makes unit testing easier"""
|
|
return time.time()
|
|
|
|
|
|
class JsonPlugin(checks.AgentCheck):
|
|
"""Read measurements from JSON-formatted files
|
|
|
|
This plugin reads measurements from JSON-formatted files.
|
|
|
|
The format of the file is shown in the following example:
|
|
|
|
{
|
|
"stale_age": 300,
|
|
"replace_timestamps": false,
|
|
"measurements: [
|
|
{
|
|
"metric": "a_metric",
|
|
"dimensions: ["dim1": "val1"],
|
|
"value: 30.0,
|
|
"timestamp": 1474644040
|
|
},
|
|
{
|
|
"metric": "second_metric",
|
|
"dimensions: ["dim2": "val2"],
|
|
"value: 22.4,
|
|
"timestamp": 1474644040
|
|
},
|
|
]
|
|
}
|
|
|
|
In effect, the file contains a header and a list of measurements. The
|
|
header has the following fields:
|
|
|
|
stale_age:
|
|
|
|
A time in seconds. If the timestamp of a measurement is
|
|
older than this, this plugin reports a json_plugin.check metric
|
|
with a value of 1. The value_meta contains the name of
|
|
the JSON file that is aged.
|
|
|
|
This header is optional. It defaults to 4 minutes
|
|
|
|
replace_timestamps:
|
|
|
|
A boolean. If set, the next time the plugin is called, it will
|
|
send all the measurements with a timestamp equal to the current
|
|
time (ignoring the timestamp in the measurements list). The
|
|
stale_age value is ignored with this setting.
|
|
|
|
This header is optional. It defaults to false.
|
|
|
|
measurements:
|
|
|
|
This is a list of the measurements, formatted in the same way
|
|
that measurements are presented to the Monasca API. However,
|
|
if replace_timestamps is set, the timestamp key can be omitted
|
|
(since it is set to current time).
|
|
|
|
An alternate format for the file is that the header is omitted, i.e.,
|
|
if the first item in the file is a list, it is assumed this is the
|
|
measurement list and the header values are defaulted.
|
|
"""
|
|
|
|
def __init__(self, name, init_config, agent_config, instances=None,
|
|
logger=None):
|
|
super(JsonPlugin, self).__init__(name, init_config, agent_config,
|
|
instances)
|
|
self.log = logger or self.log
|
|
self.plugin_failures = {}
|
|
self.now = time.time()
|
|
self.posted_metrics = {}
|
|
|
|
def _plugin_failed(self, file_name, msg):
|
|
self.plugin_failures[file_name] = msg
|
|
self.log.warn('%s: %s' % (file_name, msg))
|
|
|
|
def _plugin_check_metric(self):
|
|
"""Generate metric to report status of the plugin"""
|
|
plugin_metric = dict(metric=PLUGIN_METRIC_NAME,
|
|
dimensions={},
|
|
value=OK,
|
|
timestamp=self.now)
|
|
# If there were any failures, put the path
|
|
# and error message into value_meta.
|
|
errors = []
|
|
for path, err in self.plugin_failures.items():
|
|
if err:
|
|
errors.append('%s: %s' % (path, err))
|
|
msg = ''
|
|
if errors:
|
|
msg = ', '.join(errors)
|
|
if msg:
|
|
if len(msg) > 1024: # keep well below length limit
|
|
msg = msg[:-1021] + '...'
|
|
plugin_metric.update(dict(value_meta=dict(msg=msg),
|
|
value=FAIL))
|
|
return plugin_metric
|
|
|
|
@staticmethod
|
|
def _take_shared_lock(fd):
|
|
"""Take shared lock on a file descriptor
|
|
|
|
Assuming the writer of the JSON file also takes a lock, this
|
|
function locks a file descriptor so that we can read the file
|
|
without worrying that the content is changing as we read.
|
|
|
|
Raises IOError if lock cannot be taken after a number of attempts.
|
|
|
|
:param fd: the file descriptor of the file being read
|
|
"""
|
|
max_retries = 5
|
|
delay = 0.02
|
|
attempts = 0
|
|
while True:
|
|
attempts += 1
|
|
try:
|
|
fcntl.flock(fd, fcntl.LOCK_SH | fcntl.LOCK_NB)
|
|
break
|
|
except IOError as err:
|
|
if (err.errno not in [errno.EWOULDBLOCK, errno.EACCES] or
|
|
attempts > max_retries):
|
|
raise
|
|
time.sleep(delay * attempts)
|
|
|
|
def _load_measurements_from_file(self, file_name):
|
|
handling = {}
|
|
file_data = {'measurements': []}
|
|
try:
|
|
with open(file_name, 'r') as f:
|
|
self._take_shared_lock(f)
|
|
f.seek(0)
|
|
file_data = json.load(f)
|
|
except (ValueError, TypeError) as e:
|
|
self._plugin_failed(file_name,
|
|
'failed parsing json: %s' % e)
|
|
except Exception as e: # noqa
|
|
self._plugin_failed(file_name,
|
|
'loading error: %s' % e)
|
|
try:
|
|
if isinstance(file_data, list):
|
|
metrics = file_data
|
|
handling['stale_age'] = DEFAULT_STALE_AGE
|
|
handling['replace_timestamps'] = False
|
|
else:
|
|
metrics = file_data.get('measurements', [])
|
|
handling['stale_age'] = file_data.get('stale_age',
|
|
DEFAULT_STALE_AGE)
|
|
handling['replace_timestamps'] = file_data.get(
|
|
'replace_timestamps', False)
|
|
except Exception as e: # noqa
|
|
self._plugin_failed(file_name,
|
|
'unable to process file contents: %s' % e)
|
|
metrics = []
|
|
|
|
metrics = self._filter_metrics(metrics, file_name)
|
|
return self._remove_duplicate_metrics(handling, metrics, file_name)
|
|
|
|
def _filter_metrics(self, metrics, file_name):
|
|
"""Remove invalid metrics from the metric list
|
|
|
|
Validate and clean up so the metric is suitable for passing to
|
|
AgentCheck.gauge(). The metric might be invalid (e.g., value_meta too
|
|
long), but that's not our concern here.
|
|
"""
|
|
invalid_metric = None
|
|
valid_metrics = []
|
|
for metric in metrics:
|
|
if not isinstance(metric, dict):
|
|
invalid_metric = metric # not a dict
|
|
continue
|
|
for key in metric.keys():
|
|
if key not in METRIC_KEYS:
|
|
invalid_metric = metric # spurious attribute
|
|
continue
|
|
if 'name' not in metric.keys() and 'metric' not in metric.keys():
|
|
invalid_metric = metric # missing name
|
|
continue
|
|
if 'value' not in metric.keys():
|
|
invalid_metric = metric # missing value
|
|
continue
|
|
|
|
if 'name' in metric:
|
|
# API use 'name'; AgentCheck uses 'metric'
|
|
metric['metric'] = metric.get('name')
|
|
del metric['name']
|
|
if not metric.get('dimensions', None):
|
|
metric['dimensions'] = {}
|
|
valid_metrics.append(metric)
|
|
|
|
if invalid_metric:
|
|
# Only report one invalid metric per file
|
|
self._plugin_failed(file_name, 'invalid metric found: %s' % metric)
|
|
return valid_metrics
|
|
|
|
def _remove_duplicate_metrics(self, handling, metrics, file_name):
|
|
"""Remove metrics if we've already reported them
|
|
|
|
We track the metrics we've posted to the Monasca Agent This allows us
|
|
to discard duplicate metrics. The most common cause of duplicates is
|
|
that the agent runs more often than the update period of the JSON file.
|
|
|
|
We also discard metrics that seem stale. This can occur when the
|
|
program creating the metrics file has died, so the JSON file
|
|
does not update with new metrics.
|
|
|
|
:param: handling: options for how measurements are handled
|
|
:param metrics: The metrics we found in the JSON file
|
|
:param file_name: the path of the JSON file
|
|
:returns: A list of metrics that should be posted
|
|
"""
|
|
|
|
# Set timestamp if asked
|
|
if handling['replace_timestamps']:
|
|
for metric in metrics:
|
|
metric['timestamp'] = self.now
|
|
# Since we've set the timestamp, these are unique (not duplicate)
|
|
# so no further processing is required
|
|
return metrics
|
|
|
|
# Remove metrics we've already posted. Also remove stale metrics.
|
|
if file_name not in self.posted_metrics:
|
|
self.posted_metrics[file_name] = []
|
|
stale_metrics = False
|
|
for metric in deepcopy(metrics):
|
|
if ((self.now - metric.get('timestamp', 0)) >
|
|
handling.get('stale_age')):
|
|
metrics.remove(metric) # too old
|
|
stale_metrics = True
|
|
elif metric in self.posted_metrics[file_name]:
|
|
metrics.remove(metric) # already sent to Monasca
|
|
else:
|
|
# New metric; not stale.
|
|
self.posted_metrics[file_name].append(metric)
|
|
|
|
# Purge really old metrics from posted
|
|
for metric in list(self.posted_metrics[file_name]):
|
|
if ((self.now - metric.get('timestamp', 0)) >=
|
|
handling.get('stale_age') * 2):
|
|
self.posted_metrics[file_name].remove(metric)
|
|
|
|
if stale_metrics:
|
|
self._plugin_failed(file_name, 'Metrics are older than %s seconds;'
|
|
' file not updating?' %
|
|
handling.get('stale_age'))
|
|
return metrics
|
|
|
|
def _get_metrics(self):
|
|
reported = []
|
|
for file_name in self.metrics_files:
|
|
metrics = self._load_measurements_from_file(file_name)
|
|
for metric in metrics:
|
|
reported.append(metric)
|
|
return reported
|
|
|
|
def _load_instance_config(self, instance):
|
|
self.metrics_files = []
|
|
self.metrics_dir = instance.get('metrics_dir', '')
|
|
if self.metrics_dir:
|
|
self.plugin_failures[self.metrics_dir] = ''
|
|
try:
|
|
file_names = os.listdir(self.metrics_dir)
|
|
for name in [os.path.join(self.metrics_dir, name) for
|
|
name in file_names]:
|
|
# .json extension protects from reading .swp and similar
|
|
if os.path.isfile(name) and name.lower().endswith('.json'):
|
|
self.metrics_files.append(name)
|
|
except OSError as err:
|
|
self._plugin_failed(self.metrics_dir,
|
|
'Error processing: %s' % err)
|
|
else:
|
|
metric_file = instance.get('metrics_file', '')
|
|
if metric_file:
|
|
self.metrics_files = [metric_file]
|
|
self.log.debug('Using metrics files %s' % ','.join(self.metrics_files))
|
|
for file_name in self.metrics_files:
|
|
self.plugin_failures[file_name] = ''
|
|
|
|
def check(self, instance):
|
|
self._load_instance_config(instance)
|
|
all_metrics = []
|
|
self.now = _now()
|
|
|
|
# Load measurements from files
|
|
metrics = self._get_metrics()
|
|
all_metrics.extend(metrics)
|
|
|
|
# Add this plugin status
|
|
all_metrics.append(self._plugin_check_metric())
|
|
|
|
for metric in all_metrics:
|
|
# apply any instance dimensions that may be configured,
|
|
# overriding any dimension with same key that check has set.
|
|
metric['dimensions'] = self._set_dimensions(metric['dimensions'],
|
|
instance)
|
|
self.log.debug('Posting metric: %s' % metric)
|
|
try:
|
|
self.gauge(**metric)
|
|
except Exception as e: # noqa
|
|
self.log.exception('Exception while reporting metric: %s' % e)
|