Merge "Create 'use_all_resource_revisions' for Gnocchi collector"

2020-12-22 17:18:58 +00:00 · 2020-12-22 17:18:58 +00:00 · 213087869a
parent 84ad7377e8 abffd13426
commit 213087869a
5 changed files with 109 additions and 2 deletions
--- a/cloudkitty/collector/gnocchi.py
+++ b/cloudkitty/collector/gnocchi.py
@ -15,6 +15,7 @@
 #
 from datetime import timedelta
 import requests
+
 import six

 from gnocchiclient import auth as gauth
@ -36,7 +37,6 @@ from cloudkitty import dataframe
 from cloudkitty import utils as ck_utils
 from cloudkitty.utils import tz as tzutils

-
 LOG = logging.getLogger(__name__)

 COLLECTOR_GNOCCHI_OPTS = 'collector_gnocchi'
@ -115,6 +115,7 @@ GNOCCHI_EXTRA_SCHEMA = {
        Required('re_aggregation_method', default='max'):
            In(BASIC_AGGREGATION_METHODS),
        Required('force_granularity', default=3600): All(int, Range(min=0)),
+        Required('use_all_resource_revisions', default=True): All(bool),
    },
 }

@ -413,6 +414,9 @@ class GnocchiCollector(collector.BaseCollector):
            q_filter=q_filter,
        )

+        data = GnocchiCollector.filter_unecessary_measurements(
+            data, met, metric_name)
+
        resources_info = None
        if met['metadata']:
            resources_info = self._fetch_resources(
@ -422,9 +426,13 @@ class GnocchiCollector(collector.BaseCollector):
                project_id=project_id,
                q_filter=q_filter
            )
+
        formated_resources = list()
        for d in data:
            # Only if aggregates have been found
+            LOG.debug("Processing entry [%s] for [%s] in timestamp ["
+                      "start=%s, end=%s] and project id [%s]", d,
+                      metric_name, start, end, project_id)
            if d['measures']['measures']['aggregated']:
                try:
                    metadata, groupby, qty = self._format_data(
@ -444,3 +452,40 @@ class GnocchiCollector(collector.BaseCollector):
                    metadata,
                ))
        return formated_resources
+
+    @staticmethod
+    def filter_unecessary_measurements(data, met, metric_name):
+        """Filter unecessary measurements if not 'use_all_resource_revisions'
+
+        The option 'use_all_resource_revisions' is useful when using Gnocchi
+        with the patch introduced in
+        https://github.com/gnocchixyz/gnocchi/pull/1059.
+
+        That patch can cause queries to return more than one entry per
+        granularity (timespan), according to the revisions a resource has.
+        This can be problematic when using the 'mutate' option of Cloudkitty.
+        Therefore, this option ('use_all_resource_revisions') allows operators
+        to discard all datapoints returned from Gnocchi, but the last one in
+        the granularity queried by CloudKitty. The default behavior is
+        maintained, which means, CloudKitty always use all of the data
+        points returned.
+        """
+
+        use_all_resource_revisions = \
+            met['extra_args']['use_all_resource_revisions']
+        LOG.debug("Configuration use_all_resource_revisions set to [%s] for "
+                  "%s", use_all_resource_revisions, metric_name)
+
+        if data and not use_all_resource_revisions:
+            data.sort(
+                key=lambda x: (x["group"]["id"], x["group"]["revision_start"]),
+                reverse=False)
+
+            # We just care about the oldest entry per resource ID in the
+            # given time slice (configured granularity in Cloudkitty).
+            single_entries_per_id = {d["group"]["id"]: d for d in
+                                     data}.values()
+            LOG.debug("Replaced list of data points [%s] with [%s] for "
+                      "metric [%s]", data, single_entries_per_id, metric_name)
+            data = single_entries_per_id
+        return data
--- a/cloudkitty/tests/collectors/test_gnocchi.py
+++ b/cloudkitty/tests/collectors/test_gnocchi.py
@ -215,3 +215,50 @@ class GnocchiCollectorAggregationOperationTest(tests.TestCase):
            ["metric", "metric_one", "rate:mean"],
        ]
        self.do_test(expected_op, extra_args=extra_args)
+
+    def test_filter_unecessary_measurements_use_all_datapoints(self):
+        data = [
+            {"group":
+                {
+                    "id": "id-1",
+                    "revision_start": datetime.datetime(
+                        2020, 1, 1, tzinfo=tz.tzutc())}},
+            {"group":
+                {"id": "id-1",
+                 "revision_start": datetime.datetime(
+                     2020, 1, 1, 1, 10, 0, tzinfo=tz.tzutc())}}
+        ]
+
+        expected_data = data.copy()
+        metric_name = 'test_metric'
+        metric = {
+            'name': metric_name,
+            'extra_args': {'use_all_resource_revisions': True}}
+
+        data_filtered = gnocchi.GnocchiCollector.\
+            filter_unecessary_measurements(data, metric, metric_name)
+
+        self.assertEqual(expected_data, data_filtered)
+
+    def test_filter_unecessary_measurements_use_only_last_datapoint(self):
+        expected_data = {"group": {"id": "id-1",
+                                   "revision_start": datetime.datetime(
+                                       2020, 1, 1, 1, 10, 0, tzinfo=tz.tzutc())
+                                   }}
+
+        data = [
+            {"group": {"id": "id-1", "revision_start": datetime.datetime(
+                     2020, 1, 1, tzinfo=tz.tzutc())}},
+            expected_data
+        ]
+
+        metric_name = 'test_metric'
+        metric = {'name': metric_name, 'extra_args': {
+            'use_all_resource_revisions': False}}
+
+        data_filtered = gnocchi.GnocchiCollector.\
+            filter_unecessary_measurements(data, metric, metric_name)
+
+        data_filtered = list(data_filtered)
+        self.assertEqual(1, len(data_filtered))
+        self.assertEqual(expected_data, data_filtered[0])
--- a/cloudkitty/tests/collectors/test_validation.py
+++ b/cloudkitty/tests/collectors/test_validation.py
@ -71,7 +71,7 @@ class MetricConfigValidationTest(tests.TestCase):
        expected_output['metric_one']['extra_args'] = {
            'aggregation_method': 'max', 're_aggregation_method': 'max',
            'force_granularity': 3600, 'resource_type': 'res',
-            'resource_key': 'id'}
+            'resource_key': 'id', 'use_all_resource_revisions': True}

        self.assertEqual(
            collector.gnocchi.GnocchiCollector.check_configuration(data),
--- a/doc/source/admin/configuration/collector.rst
+++ b/doc/source/admin/configuration/collector.rst
@ -279,6 +279,17 @@ Gnocchi
  used for metric aggregations. Else, the lowest available granularity will be
  used (meaning the granularity covering the longest period).

+* ``use_all_resource_revisions``: Defaults to ``True``. This option is useful
+  when using Gnocchi with the patch introduced via https://github
+  .com/gnocchixyz/gnocchi/pull/1059. That patch can cause queries to return
+  more than one entry per granularity (timespan), according to the revisions a
+  resource has. This can be problematic when using the 'mutate' option
+  of Cloudkitty. This option to allow operators to discard all datapoints
+  returned from Gnocchi, but the last one in the granularity queried by
+  CloudKitty for a resource id. The default behavior is maintained, which
+  means, CloudKitty always use all of the data points returned.
+
+
 Monasca
 ~~~~~~~

--- a/releasenotes/notes/create-use_all_entries_for_timespan-option-for-gnocchi-collector-39d29603b1f554e1.yaml
+++ b/releasenotes/notes/create-use_all_entries_for_timespan-option-for-gnocchi-collector-39d29603b1f554e1.yaml
@ -0,0 +1,4 @@
+---
+features:
+  - |
+    Create the option 'use_all_resource_revisions' for Gnocchi collector.