From 46e76a488a579c506aa158005bee9339f4535351 Mon Sep 17 00:00:00 2001 From: Joseph Davis Date: Fri, 31 Mar 2017 12:19:52 -0700 Subject: [PATCH] Support jsonpath definitions in monasca_field_definitions.yaml To enhance flexibility when mapping a ceilometer sample to the monasca fields, add support for jsonpath parsing of metadata. For example, the cinder samples have metadata stored in lists, which the current parsing (including . notation) cannot handle. Similar requests have been made in the past, and this should give enough power to meet most requests. For a jsonpath that does not resolve to a simple leaf (string value), a CeiloscaMappingDefinitionException is raised. See https://storyboard.openstack.org/#!/story/2000954 Change-Id: I8d7f5e1b87a1de6078b4d397e96dab624fde42bb --- .../publisher/monasca_data_filter.py | 121 +++++-- .../publisher/test_monasca_data_filter.py | 312 +++++++++++++++++- 2 files changed, 387 insertions(+), 46 deletions(-) diff --git a/ceilosca/ceilometer/publisher/monasca_data_filter.py b/ceilosca/ceilometer/publisher/monasca_data_filter.py index 104144d..d68635f 100644 --- a/ceilosca/ceilometer/publisher/monasca_data_filter.py +++ b/ceilosca/ceilometer/publisher/monasca_data_filter.py @@ -14,12 +14,15 @@ # under the License. import datetime + +from jsonpath_rw_ext import parser from oslo_config import cfg from oslo_log import log from oslo_utils import timeutils import yaml - +from ceilometer.ceilosca_mapping.ceilosca_mapping import ( + CeiloscaMappingDefinitionException) from ceilometer import sample as sample_util OPTS = [ @@ -27,6 +30,7 @@ OPTS = [ default='/etc/ceilometer/monasca_field_definitions.yaml', help='Monasca static and dynamic field mappings'), ] + cfg.CONF.register_opts(OPTS, group='monasca') MULTI_REGION_OPTS = [ @@ -54,6 +58,8 @@ class NoMappingsFound(Exception): class MonascaDataFilter(object): + JSONPATH_RW_PARSER = parser.ExtentedJsonPathParser() + def __init__(self): self._mapping = {} self._mapping = self._get_mapping() @@ -62,8 +68,21 @@ class MonascaDataFilter(object): with open(cfg.CONF.monasca.monasca_mappings, 'r') as f: try: return yaml.safe_load(f) - except yaml.YAMLError as exc: - raise UnableToLoadMappings(exc.message) + except yaml.YAMLError as err: + if hasattr(err, 'problem_mark'): + mark = err.problem_mark + errmsg = ("Invalid YAML syntax in Monasca Data " + "Filter file %(file)s at line: " + "%(line)s, column: %(column)s." + % dict(file=cfg.CONF.monasca.monasca_mappings, + line=mark.line + 1, + column=mark.column + 1)) + else: + errmsg = ("YAML error reading Monasca Data Filter " + "file %(file)s" % + dict(file=cfg.CONF.monasca.monasca_mappings)) + LOG.error(errmsg) + raise UnableToLoadMappings(err.message) def _convert_timestamp(self, timestamp): if isinstance(timestamp, datetime.datetime): @@ -96,6 +115,64 @@ class MonascaDataFilter(object): return return val + def parse_jsonpath(self, field): + try: + parts = self.JSONPATH_RW_PARSER.parse(field) + except Exception as e: + raise CeiloscaMappingDefinitionException( + "Parse error in JSONPath specification " + "'%(jsonpath)s': %(err)s" + % dict(jsonpath=field, err=e)) + return parts + + def _get_value_metadata_for_key(self, sample_meta, meta_key): + """Get the data for the given key, supporting JSONPath""" + if isinstance(meta_key, dict): + # extract key and jsonpath + # If following convention, dict will have one and only one + # element of the form : + if len(meta_key.keys()) == 1: + mon_key = meta_key.keys()[0] + else: + # If no keys or more keys than one + raise CeiloscaMappingDefinitionException( + "Field definition format mismatch, should " + "have only one key:value pair. %(meta_key)s" % + {'meta_key': meta_key}, meta_key) + json_path = meta_key[mon_key] + parts = self.parse_jsonpath(json_path) + val_matches = parts.find(sample_meta) + if len(val_matches) > 0: + # resolve the find to the first match and get value + val = val_matches[0].value + if not isinstance(val, str) and not isinstance(val, int): + # Don't support lists or dicts or ... + raise CeiloscaMappingDefinitionException( + "Metadata format mismatch, value " + "should be a simple string. %(valuev)s" % + {'valuev': val}, meta_key) + else: + val = 'None' + return mon_key, val + else: + # simple string + val = sample_meta.get(meta_key, None) + if val is not None: + return meta_key, val + else: + # one more attempt using a dotted notation + # TODO(joadavis) Deprecate this . notation code + # in favor of jsonpath + if len(meta_key.split('.')) > 1: + val = self.get_value_for_nested_dictionary( + meta_key.split('.'), sample_meta) + if val is not None: + return meta_key, val + else: + return meta_key, 'None' + else: + return meta_key, 'None' + def process_sample_for_monasca(self, sample_obj): if not self._mapping: raise NoMappingsFound("Unable to process the sample") @@ -115,48 +192,28 @@ class MonascaDataFilter(object): else: sample = sample_obj - sample_meta = sample.get('resource_metadata', None) - for dim in self._mapping['dimensions']: val = sample.get(dim, None) - if val is not None: + if val: dimensions[dim] = val else: dimensions[dim] = 'None' + sample_meta = sample.get('resource_metadata', None) value_meta = {} + meter_name = sample.get('name') or sample.get('counter_name') if sample_meta: for meta_key in self._mapping['metadata']['common']: - val = sample_meta.get(meta_key, None) - if val is not None: - value_meta[meta_key] = val - else: - if len(meta_key.split('.')) > 1: - val = self.get_value_for_nested_dictionary( - meta_key.split('.'), sample_meta) - if val is not None: - value_meta[meta_key] = val - else: - value_meta[meta_key] = 'None' - else: - value_meta[meta_key] = 'None' + monasca_key, val = self._get_value_metadata_for_key( + sample_meta, meta_key) + value_meta[monasca_key] = val if meter_name in self._mapping['metadata'].keys(): for meta_key in self._mapping['metadata'][meter_name]: - val = sample_meta.get(meta_key, None) - if val is not None: - value_meta[meta_key] = val - else: - if len(meta_key.split('.')) > 1: - val = self.get_value_for_nested_dictionary( - meta_key.split('.'), sample_meta) - if val is not None: - value_meta[meta_key] = val - else: - value_meta[meta_key] = 'None' - else: - value_meta[meta_key] = 'None' + monasca_key, val = self._get_value_metadata_for_key( + sample_meta, meta_key) + value_meta[monasca_key] = val meter_value = sample.get('volume') or sample.get('counter_volume') if meter_value is None: diff --git a/ceilosca/ceilometer/tests/unit/publisher/test_monasca_data_filter.py b/ceilosca/ceilometer/tests/unit/publisher/test_monasca_data_filter.py index d21e980..af320d8 100644 --- a/ceilosca/ceilometer/tests/unit/publisher/test_monasca_data_filter.py +++ b/ceilosca/ceilometer/tests/unit/publisher/test_monasca_data_filter.py @@ -18,6 +18,8 @@ import mock from oslo_utils import timeutils from oslotest import base +from ceilometer.ceilosca_mapping.ceilosca_mapping import ( + CeiloscaMappingDefinitionException) from ceilometer.publisher import monasca_data_filter as mdf from ceilometer import sample @@ -52,6 +54,59 @@ class TestMonUtils(base.BaseTestCase): 'volume.size': ['status'], } } + self._field_mappings_cinder = { + 'dimensions': ['resource_id', + 'project_id', + 'user_id', + 'geolocation', + 'region', + 'source', + 'availability_zone'], + + 'metadata': { + 'common': ['event_type', + 'audit_period_beginning', + 'audit_period_ending', + 'arbitrary_new_field'], + 'volume.create.end': + ['size', 'status', + {'metering.prn_name': + "$.metadata[?(@.key = 'metering.prn_name')].value"}, + {'metering.prn_type': + "$.metadata[?(@.key = 'metering.prn_type')].value"}, + 'volume_type', 'created_at', + 'host'], + 'volume': ['status'], + 'volume.size': ['status'], + } + } + + self._field_mappings_bad_format = { + 'dimensions': ['resource_id', + 'project_id', + 'user_id', + 'geolocation', + 'region', + 'source', + 'availability_zone'], + + 'metadata': { + 'common': ['event_type', + 'audit_period_beginning', + 'audit_period_ending', + 'arbitrary_new_field'], + 'volume.create.end': + ['size', 'status', + {'metering.prn_name': + "$.metadata[?(@.key = 'metering.prn_name')].value", + 'metering.prn_type': + "$.metadata[?(@.key = 'metering.prn_type')].value"}, + 'volume_type', 'created_at', + 'host'], + 'volume': ['status'], + 'volume.size': ['status'], + } + } def test_process_sample(self): s = sample.Sample( @@ -112,15 +167,15 @@ class TestMonUtils(base.BaseTestCase): self.assertIsNone(r['dimensions'].get('user_id')) def convert_dict_to_list(self, dct, prefix=None, outlst={}): - prefix = prefix+'.' if prefix else "" + prefix = prefix + '.' if prefix else "" for k, v in dct.items(): if type(v) is dict: - self.convert_dict_to_list(v, prefix+k, outlst) + self.convert_dict_to_list(v, prefix + k, outlst) else: if v is not None: - outlst[prefix+k] = v + outlst[prefix + k] = v else: - outlst[prefix+k] = 'None' + outlst[prefix + k] = 'None' return outlst def test_process_sample_metadata(self): @@ -139,7 +194,7 @@ class TestMonUtils(base.BaseTestCase): 'base_url2': '', 'base_url3': None}, 'size': 1500}, - ) + ) to_patch = ("ceilometer.publisher.monasca_data_filter." "MonascaDataFilter._get_mapping") @@ -148,8 +203,9 @@ class TestMonUtils(base.BaseTestCase): r = data_filter.process_sample_for_monasca(s) self.assertEqual(s.name, r['name']) self.assertIsNotNone(r.get('value_meta')) - self.assertTrue(set(self.convert_dict_to_list(s.resource_metadata). - items()).issubset(set(r['value_meta'].items()))) + self.assertTrue(set(self.convert_dict_to_list( + s.resource_metadata + ).items()).issubset(set(r['value_meta'].items()))) def test_process_sample_metadata_with_empty_data(self): s = sample.Sample( @@ -168,7 +224,7 @@ class TestMonUtils(base.BaseTestCase): 'base_url2': '', 'base_url3': None}, 'size': 0}, - ) + ) to_patch = ("ceilometer.publisher.monasca_data_filter." "MonascaDataFilter._get_mapping") @@ -179,8 +235,9 @@ class TestMonUtils(base.BaseTestCase): self.assertEqual(s.name, r['name']) self.assertIsNotNone(r.get('value_meta')) self.assertEqual(s.source, r['dimensions']['source']) - self.assertTrue(set(self.convert_dict_to_list(s.resource_metadata). - items()).issubset(set(r['value_meta'].items()))) + self.assertTrue(set(self.convert_dict_to_list( + s.resource_metadata + ).items()).issubset(set(r['value_meta'].items()))) def test_process_sample_metadata_with_extendedKey(self): s = sample.Sample( @@ -199,7 +256,7 @@ class TestMonUtils(base.BaseTestCase): 'base_url2': '', 'base_url3': None}, 'size': 0}, - ) + ) to_patch = ("ceilometer.publisher.monasca_data_filter." "MonascaDataFilter._get_mapping") @@ -209,8 +266,9 @@ class TestMonUtils(base.BaseTestCase): self.assertEqual(s.name, r['name']) self.assertIsNotNone(r.get('value_meta')) - self.assertTrue(set(self.convert_dict_to_list(s.resource_metadata). - items()).issubset(set(r['value_meta'].items()))) + self.assertTrue(set(self.convert_dict_to_list( + s.resource_metadata + ).items()).issubset(set(r['value_meta'].items()))) self.assertEqual(r.get('value_meta')['image_meta.base_url'], s.resource_metadata.get('image_meta') ['base_url']) @@ -219,6 +277,232 @@ class TestMonUtils(base.BaseTestCase): ['base_url2']) self.assertEqual(r.get('value_meta')['image_meta.base_url3'], str(s.resource_metadata.get('image_meta') - ['base_url3'])) + ['base_url3'])) self.assertEqual(r.get('value_meta')['image_meta.base_url4'], 'None') + + def test_process_sample_metadata_with_jsonpath(self): + """Test meter sample in a format produced by cinder.""" + s = sample.Sample( + name='volume.create.end', + type=sample.TYPE_CUMULATIVE, + unit='', + volume=1, + user_id='test', + project_id='test', + resource_id='test_run_tasks', + source='', + timestamp=datetime.datetime.utcnow().isoformat(), + resource_metadata={'event_type': 'volume.create.end', + 'status': 'available', + 'volume_type': None, + # 'created_at': '2017-03-21T21:05:44+00:00', + 'host': 'testhost', + # this "value: , key: " format is + # how cinder reports metadata + 'metadata': + [{'value': 'aaa0001', + 'key': 'metering.prn_name'}, + {'value': 'Cust001', + 'key': 'metering.prn_type'}], + 'size': 0}, + ) + + to_patch = ("ceilometer.publisher.monasca_data_filter." + "MonascaDataFilter._get_mapping") + # use the cinder specific mapping + with mock.patch(to_patch, side_effect=[self._field_mappings_cinder]): + data_filter = mdf.MonascaDataFilter() + r = data_filter.process_sample_for_monasca(s) + + self.assertEqual(s.name, r['name']) + self.assertIsNotNone(r.get('value_meta')) + # Using convert_dict_to_list is too simplistic for this + self.assertEqual(r.get('value_meta')['event_type'], + s.resource_metadata.get('event_type'), + "Failed to match common element.") + self.assertEqual(r.get('value_meta')['host'], + s.resource_metadata.get('host'), + "Failed to match meter specific element.") + self.assertEqual(r.get('value_meta')['size'], + s.resource_metadata.get('size'), + "Unable to handle an int.") + self.assertEqual(r.get('value_meta')['metering.prn_name'], + 'aaa0001', + "Failed to extract a value " + "using specified jsonpath.") + + def test_process_sample_metadata_with_jsonpath_nomatch(self): + """Test meter sample in a format produced by cinder. + + Behavior when no matching element is found for the specified jsonpath + """ + + s = sample.Sample( + name='volume.create.end', + type=sample.TYPE_CUMULATIVE, + unit='', + volume=1, + user_id='test', + project_id='test', + resource_id='test_run_tasks', + source='', + timestamp=datetime.datetime.utcnow().isoformat(), + resource_metadata={'event_type': 'volume.create.end', + 'status': 'available', + 'volume_type': None, + # 'created_at': '2017-03-21T21:05:44+00:00', + 'host': 'testhost', + 'metadata': [{'value': 'aaa0001', + 'key': 'metering.THISWONTMATCH'}], + 'size': 0}, + ) + + to_patch = ("ceilometer.publisher.monasca_data_filter." + "MonascaDataFilter._get_mapping") + # use the cinder specific mapping + with mock.patch(to_patch, side_effect=[self._field_mappings_cinder]): + data_filter = mdf.MonascaDataFilter() + r = data_filter.process_sample_for_monasca(s) + + self.assertEqual(s.name, r['name']) + self.assertIsNotNone(r.get('value_meta')) + # Using convert_dict_to_list is too simplistic for this + self.assertEqual(r.get('value_meta')['event_type'], + s.resource_metadata.get('event_type'), + "Failed to match common element.") + self.assertEqual(r.get('value_meta')['host'], + s.resource_metadata.get('host'), + "Failed to match meter specific element.") + self.assertEqual(r.get('value_meta')['size'], + s.resource_metadata.get('size'), + "Unable to handle an int.") + self.assertEqual(r.get('value_meta')['metering.prn_name'], + 'None', "This metadata should fail to match " + "and then return 'None'.") + + def test_process_sample_metadata_with_jsonpath_value_not_str(self): + """Test where jsonpath is used but result is not a simple string""" + + s = sample.Sample( + name='volume.create.end', + type=sample.TYPE_CUMULATIVE, + unit='', + volume=1, + user_id='test', + project_id='test', + resource_id='test_run_tasks', + source='', + timestamp=datetime.datetime.utcnow().isoformat(), + resource_metadata={'event_type': 'volume.create.end', + 'status': 'available', + 'volume_type': None, + # 'created_at': '2017-03-21T21:05:44+00:00', + 'host': 'testhost', + 'metadata': [{'value': ['aaa0001', 'bbbb002'], + 'key': 'metering.prn_name'}], + 'size': 0}, + ) + + to_patch = ("ceilometer.publisher.monasca_data_filter." + "MonascaDataFilter._get_mapping") + # use the cinder specific mapping + with mock.patch(to_patch, side_effect=[self._field_mappings_cinder]): + data_filter = mdf.MonascaDataFilter() + try: + # Don't assign to a variable, this should raise + data_filter.process_sample_for_monasca(s) + except CeiloscaMappingDefinitionException as e: + self.assertEqual( + 'Metadata format mismatch, value should be ' + 'a simple string. [\'aaa0001\', \'bbbb002\']', + e.message) + + def test_process_sample_metadata_with_jsonpath_value_is_int(self): + """Test meter sample where jsonpath result is an int.""" + + s = sample.Sample( + name='volume.create.end', + type=sample.TYPE_CUMULATIVE, + unit='', + volume=1, + user_id='test', + project_id='test', + resource_id='test_run_tasks', + source='', + timestamp=datetime.datetime.utcnow().isoformat(), + resource_metadata={'event_type': 'volume.create.end', + 'status': 'available', + 'volume_type': None, + # 'created_at': '2017-03-21T21:05:44+00:00', + 'host': 'testhost', + 'metadata': [{'value': 13, + 'key': 'metering.prn_name'}], + 'size': 0}, + ) + + to_patch = ("ceilometer.publisher.monasca_data_filter." + "MonascaDataFilter._get_mapping") + # use the cinder specific mapping + with mock.patch(to_patch, side_effect=[self._field_mappings_cinder]): + data_filter = mdf.MonascaDataFilter() + r = data_filter.process_sample_for_monasca(s) + + self.assertEqual(s.name, r['name']) + self.assertIsNotNone(r.get('value_meta')) + # Using convert_dict_to_list is too simplistic for this + self.assertEqual(r.get('value_meta')['event_type'], + s.resource_metadata.get('event_type'), + "Failed to match common element.") + self.assertEqual(r.get('value_meta')['host'], + s.resource_metadata.get('host'), + "Failed to match meter specific element.") + self.assertEqual(r.get('value_meta')['size'], + s.resource_metadata.get('size'), + "Unable to handle an int.") + self.assertEqual(r.get('value_meta')['metering.prn_name'], + 13, + "Unable to handle an int " + "through the jsonpath processing") + + def test_process_sample_metadata_with_jsonpath_bad_format(self): + """Test handling of definition that is not written correctly""" + + s = sample.Sample( + name='volume.create.end', + type=sample.TYPE_CUMULATIVE, + unit='', + volume=1, + user_id='test', + project_id='test', + resource_id='test_run_tasks', + source='', + timestamp=datetime.datetime.utcnow().isoformat(), + resource_metadata={'event_type': 'volume.create.end', + 'status': 'available', + 'volume_type': None, + # 'created_at': '2017-03-21T21:05:44+00:00', + 'host': 'testhost', + 'metadata': [{'value': 13, + 'key': 'metering.prn_name'}], + 'size': 0}, + ) + + to_patch = ("ceilometer.publisher.monasca_data_filter." + "MonascaDataFilter._get_mapping") + # use the bad mapping + with mock.patch(to_patch, + side_effect=[self._field_mappings_bad_format]): + data_filter = mdf.MonascaDataFilter() + try: + # Don't assign to a variable as this should raise + data_filter.process_sample_for_monasca(s) + except CeiloscaMappingDefinitionException as e: + # Make sure we got the right kind of error + # Cannot check the whole message text, as python + # may reorder a dict when producing a string version + self.assertIn( + 'Field definition format mismatch, should ' + 'have only one key:value pair.', + e.message, + "Did raise exception but wrong message - %s" % e.message)