From b796efd45f2a5640ab4eb915d6f4e31291c62b9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafael=20Weing=C3=A4rtner?= Date: Mon, 14 Aug 2023 08:23:25 -0300 Subject: [PATCH] Patch for `use_all_resource_revisions` option The option 'use_all_resource_revisions' is useful when using Gnocchi with the patch introduced in [1]. That patch can cause queries to return more than one entry per granularity (timespan), according to the revisions a resource has. This can be problematic when using the 'mutate' option of Cloudkitty. Therefore, this option ('use_all_resource_revisions') allows operators to discard all datapoints returned from Gnocchi, but the last one in the granularity that is queried by CloudKitty. The default behavior is maintained, which means, CloudKitty always uses all the data points returned. However, when the 'mutate' option is not used, we need to sum all the quantities and use this value with the latest version of the attributes received. Otherwise, we will miss the complete accounting for the time frame where the revision happened. [1] https://github.com/gnocchixyz/gnocchi/pull/1059 Change-Id: I45bdaa3783ff483d49ecca70571caf529f3ccbc3 --- cloudkitty/collector/gnocchi.py | 138 ++++++++++++++++-- ...tch-use-all-revision-0325eeb0f7871c35.yaml | 5 + 2 files changed, 130 insertions(+), 13 deletions(-) create mode 100644 releasenotes/notes/patch-use-all-revision-0325eeb0f7871c35.yaml diff --git a/cloudkitty/collector/gnocchi.py b/cloudkitty/collector/gnocchi.py index e02067f3..bd71e986 100644 --- a/cloudkitty/collector/gnocchi.py +++ b/cloudkitty/collector/gnocchi.py @@ -540,25 +540,137 @@ class GnocchiCollector(collector.BaseCollector): Therefore, this option ('use_all_resource_revisions') allows operators to discard all datapoints returned from Gnocchi, but the last one in the granularity queried by CloudKitty. The default behavior is - maintained, which means, CloudKitty always use all of the data - points returned. + maintained, which means, CloudKitty always use all the data points + returned. + + When the 'mutate' option is not used, we need to sum all the + quantities, and use this value with the latest version of the + attributes received. Otherwise, we will miss the complete accounting + for the time frame where the revision happened. """ - use_all_resource_revisions = \ - met['extra_args']['use_all_resource_revisions'] + use_all_resource_revisions = met[ + 'extra_args']['use_all_resource_revisions'] + LOG.debug("Configuration use_all_resource_revisions set to [%s] for " - "%s", use_all_resource_revisions, metric_name) + "metric [%s]", use_all_resource_revisions, metric_name) if data and not use_all_resource_revisions: - data.sort( - key=lambda x: (x["group"]["id"], x["group"]["revision_start"]), - reverse=False) + if "id" not in data[0].get('group', {}).keys(): + LOG.debug("There is no ID id in the groupby section and we " + "are trying to use 'use_all_resource_revisions'. " + "However, without an ID there is not much we can do " + "to identify the revisions for a resource.") + return data + + original_data = copy.deepcopy(data) + # Here we order the data in a way to maintain the latest revision + # as the principal element to be used. We are assuming that there + # is a revision_start attribute, which denotes when the revision + # was created. If there is no revision start, we cannot do much. + data.sort(key=lambda x: (x["group"]["id"], + x["group"]["revision_start"]), + reverse=False) + + # We just care about the oldest entry per resource in the + # given time slice (configured granularity in Cloudkitty) regarding + # the attributes. For the quantity, we still want to use all the + # quantity elements summing up the value for all the revisions. + map_id_entry = {d["group"]['id']: d for d in data} + single_entries_per_id = list(map_id_entry.values()) + + GnocchiCollector.zero_quantity_values(single_entries_per_id) + + for element in original_data: + LOG.debug("Processing entry [%s] for original data from " + "Gnocchi to sum all of the revisions if needed for " + "metric [%s].", element, metric_name) + group_entry = element.get('group') + if not group_entry: + LOG.warning("No groupby section found for element [%s].", + element) + continue + + entry_id = group_entry.get('id') + if not entry_id: + LOG.warning("No ID attribute found for element [%s].", + element) + continue + + first_measure = element.get('measures') + if first_measure: + second_measure = first_measure.get('measures') + if second_measure: + aggregated_value = second_measure.get('aggregated', []) + if len(aggregated_value) == 1: + actual_aggregated_value = aggregated_value[0] + + if len(actual_aggregated_value) == 3: + value_to_add = actual_aggregated_value[2] + entry = map_id_entry[entry_id] + old_value = list( + entry['measures']['measures'][ + 'aggregated'][0]) + + new_value = copy.deepcopy(old_value) + new_value[2] += value_to_add + entry['measures']['measures'][ + 'aggregated'][0] = tuple(new_value) + + LOG.debug("Adding value [%s] to value [%s] " + "in entry [%s] for metric [%s].", + value_to_add, old_value, entry, + metric_name) - # We just care about the oldest entry per resource ID in the - # given time slice (configured granularity in Cloudkitty). - single_entries_per_id = {d["group"]["id"]: d for d in - data}.values() LOG.debug("Replaced list of data points [%s] with [%s] for " - "metric [%s]", data, single_entries_per_id, metric_name) + "metric [%s]", original_data, single_entries_per_id, + metric_name) + data = single_entries_per_id return data + + @staticmethod + def zero_quantity_values(single_entries_per_id): + """Cleans the quantity value of the entry for further processing.""" + for single_entry in single_entries_per_id: + first_measure = single_entry.get('measures') + if first_measure: + second_measure = first_measure.get('measures') + if second_measure: + aggregated_value = second_measure.get('aggregated', []) + + if len(aggregated_value) == 1: + actual_aggregated_value = aggregated_value[0] + + # We need to convert the tuple to a list + actual_aggregated_value = list(actual_aggregated_value) + if len(actual_aggregated_value) == 3: + LOG.debug("Zeroing aggregated value for single " + "entry [%s].", single_entry) + # We are going to zero this elements, as we + # will be summing all of them later. + actual_aggregated_value[2] = 0 + + # Convert back to tuple + aggregated_value[0] = tuple( + actual_aggregated_value) + else: + LOG.warning("We expect the actual aggregated " + "value to be a list of 3 elements." + " The first one is a timestamp, " + "the second the granularity, and " + "the last one the quantity " + "measured. But we got a different " + "structure: [%s]. for entry [%s].", + actual_aggregated_value, + single_entry) + else: + LOG.warning("Aggregated value return does not " + "have the expected size. Expected 1, " + "but got [%s].", len(aggregated_value)) + else: + LOG.debug('Second measure of the aggregates API for ' + 'entry [%s] is empty.', single_entry) + else: + LOG.debug('First measure of the aggregates API for entry ' + '[%s] is empty.', single_entry) diff --git a/releasenotes/notes/patch-use-all-revision-0325eeb0f7871c35.yaml b/releasenotes/notes/patch-use-all-revision-0325eeb0f7871c35.yaml new file mode 100644 index 00000000..c135845d --- /dev/null +++ b/releasenotes/notes/patch-use-all-revision-0325eeb0f7871c35.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes accounting of quantity values when ``use_all_resource_revisions`` + option is used in the Gnocchi collector.