Add groupby options by different timeframes

Introduce new default groupby options: (i) time: to group data by hourly; (ii) time-d: to group data by day of the year; (iii) time-w: to group data by week of the year; (iv) time-m: to group data by month; and, (v) time-y: to group data by year. If you have old data in CloudKitty and you wish to use these group by methods, you will need to reprocess the desired timeframe. Story: #2009839 Task: #44438 Depends-On: https://review.opendev.org/c/x/wsme/+/893677 Change-Id: Iad296f54f6701af84e168796aec9b1033a2a8a2d
2022-10-11 16:15:38 -03:00 · 2022-10-11 16:15:38 -03:00 · 45f5e72472
parent 60077a3cc4
commit 45f5e72472
14 changed files with 220 additions and 30 deletions
--- a/api-ref/source/v2/summary/summary_parameters.yml
+++ b/api-ref/source/v2/summary/summary_parameters.yml
@ -52,7 +52,19 @@ filters:
 groupby:
  in: query
  description: |
-    Optional attributes to group the summary by.
+    Optional attributes to group the summary by. The ``groupby`` elements are
+    defined in the collector YML settings. Therefore, one can group the
+    result using any of the ``groupby`` attributes defined in the collector
+    settings of CloudKitty. Besides those attributes, by default, starting
+    in CloudKitty ``2024.1`` release, we will have the following new groupby
+    options: (i) time: to group data hourly; (ii) time-d: to group data
+    by day of the year; (iii) time-w: to group data by week of the year;
+    (iv) time-m: to group data by month; and, (v) time-y: to group data by
+    year. If you have old data in CloudKitty and you wish to use these
+    group by methods, you will need to reprocess the desired timeframe.
+    The `groupby` options ``time-d``, ``time-w``, ``time-m``, ``time-y`` are the
+    short versions of the following `groupby` options ``day_of_the_year``,
+    ``week_of_the_year``, ``month``, and ``year`` respectively.
  type: list of strings
  required: false

--- a/cloudkitty/collector/init.py
+++ b/cloudkitty/collector/init.py
@ -14,6 +14,7 @@
 #    under the License.
 #
 import abc
+import datetime
 import fractions

 from oslo_config import cfg
@ -30,9 +31,9 @@ from voluptuous import Optional
 from voluptuous import Required
 from voluptuous import Schema

+from cloudkitty.dataframe import DataPoint
 from cloudkitty import utils as ck_utils

-
 LOG = logging.getLogger(__name__)

 collect_opts = [
@ -248,6 +249,30 @@ class BaseCollector(object, metaclass=abc.ABCMeta):

        return name, data

+    def _create_data_point(self, unit, qty, price, groupby, metadata, start):
+        if not start:
+            start = datetime.datetime.now()
+            LOG.debug("Collector [%s]. No start datetime defined for "
+                      "datapoint[unit=%s, quantity=%s, price=%s, groupby=%s, "
+                      "metadata=%s]. Therefore, we use the current time as "
+                      "the start time for this datapoint.",
+                      self.collector_name, unit, qty, price, groupby, metadata)
+
+        week_of_the_year = start.strftime("%U")
+        day_of_the_year = start.strftime("%-j")
+        month_of_the_year = start.strftime("%-m")
+        year = start.strftime("%Y")
+
+        if groupby is None:
+            groupby = {}
+
+        groupby['week_of_the_year'] = week_of_the_year
+        groupby['day_of_the_year'] = day_of_the_year
+        groupby['month'] = month_of_the_year
+        groupby['year'] = year
+
+        return DataPoint(unit, qty, price, groupby, metadata)
+

 class InvalidConfiguration(Exception):
    pass
--- a/cloudkitty/collector/gnocchi.py
+++ b/cloudkitty/collector/gnocchi.py
@ -33,7 +33,6 @@ from voluptuous import Schema

 from cloudkitty import collector
 from cloudkitty.common import custom_session
-from cloudkitty import dataframe
 from cloudkitty import utils as ck_utils
 from cloudkitty.utils import tz as tzutils

@ -517,13 +516,9 @@ class GnocchiCollector(collector.BaseCollector):
                            project_id, start, end, e),
                    )
                    continue
-                formated_resources.append(dataframe.DataPoint(
-                    met['unit'],
-                    qty,
-                    0,
-                    groupby,
-                    metadata,
-                ))
+                point = self._create_data_point(
+                    met['unit'], qty, 0, groupby, metadata, start)
+                formated_resources.append(point)
        return formated_resources

    @staticmethod
--- a/cloudkitty/collector/monasca.py
+++ b/cloudkitty/collector/monasca.py
@ -24,7 +24,6 @@ from voluptuous import Schema

 from cloudkitty import collector
 from cloudkitty.common import monasca_client as mon_client_utils
-from cloudkitty import dataframe
 from cloudkitty import utils as ck_utils

 LOG = logging.getLogger(__name__)
@ -231,11 +230,8 @@ class MonascaCollector(collector.BaseCollector):
            if len(d['statistics']):
                metadata, groupby, qty = self._format_data(
                    met, d, resources_info)
-                formated_resources.append(dataframe.DataPoint(
-                    met['unit'],
-                    qty,
-                    0,
-                    groupby,
-                    metadata,
-                ))
+
+                point = self._create_data_point(
+                    met['unit'], qty, 0, groupby, metadata, start)
+                formated_resources.append(point)
        return formated_resources
--- a/cloudkitty/collector/prometheus.py
+++ b/cloudkitty/collector/prometheus.py
@ -28,7 +28,6 @@ from cloudkitty import collector
 from cloudkitty.collector.exceptions import CollectError
 from cloudkitty.common.prometheus_client import PrometheusClient
 from cloudkitty.common.prometheus_client import PrometheusResponseError
-from cloudkitty import dataframe
 from cloudkitty import utils as ck_utils
 from cloudkitty.utils import tz as tzutils

@ -243,12 +242,8 @@ class PrometheusCollector(collector.BaseCollector):
                item,
            )

-            formatted_resources.append(dataframe.DataPoint(
-                self.conf[metric_name]['unit'],
-                qty,
-                0,
-                groupby,
-                metadata,
-            ))
+            point = self._create_data_point(self.conf[metric_name]['unit'],
+                                            qty, 0, groupby, metadata, start)
+            formatted_resources.append(point)

        return formatted_resources
--- a/cloudkitty/storage/init.py
+++ b/cloudkitty/storage/init.py
@ -177,6 +177,9 @@ class V1StorageAdapter(storage_v2.BaseStorage):
                    storage_gby.append('res_type')
                elif elem == 'project_id':
                    storage_gby.append('tenant_id')
+                else:
+                    LOG.warning("The groupby [%s] is not supported by MySQL "
+                                "storage backend.", elem)
        return ','.join(storage_gby) if storage_gby else None

    def get_tenants(self, begin, end):
--- a/cloudkitty/storage/v2/init.py
+++ b/cloudkitty/storage/v2/init.py
@ -16,10 +16,14 @@
 import abc
 import datetime

+from oslo_log import log as logging
+
 from oslo_config import cfg

 from cloudkitty import storage_state

+from werkzeug import exceptions as http_exceptions
+

 storage_opts = [
    cfg.IntOpt(
@ -33,6 +37,8 @@ storage_opts = [
 CONF = cfg.CONF
 CONF.register_opts(storage_opts, 'storage')

+LOG = logging.getLogger(__name__)
+

 class BaseStorage(object, metaclass=abc.ABCMeta):
    """Abstract class for v2 storage objects."""
@ -159,3 +165,35 @@ class BaseStorage(object, metaclass=abc.ABCMeta):
    # NOTE(lpeschke): This is only kept for v1 storage backward compatibility
    def get_tenants(self, begin=None, end=None):
        return storage_state.StateManager().get_tenants(begin, end)
+
+    TIME_COMMANDS_MAP = {"d": "day_of_the_year", "w": "week_of_the_year",
+                         "m": "month", "y": "year"}
+
+    def parse_groupby_syntax_to_groupby_elements(self, groupbys):
+        if not groupbys:
+            LOG.debug("No groupby to process syntax.")
+            return groupbys
+
+        groupbys_parsed = []
+        for elem in groupbys:
+            if 'time' in elem:
+                time_command = elem.split('-')
+                number_of_parts = len(time_command)
+                if number_of_parts == 2:
+                    g = self.TIME_COMMANDS_MAP.get(time_command[1])
+                    if not g:
+                        raise http_exceptions.BadRequest(
+                            "Invalid groupby time option. There is no "
+                            "groupby processing for [%s]." % elem)
+
+                    LOG.debug("Replacing API groupby time command [%s] with "
+                              "internal groupby command [%s].", elem, g)
+                    elem = g
+
+                elif number_of_parts > 2:
+                    LOG.warning("The groupby [%s] command is not expected for "
+                                "storage backend [%s]. Therefore, we leave it "
+                                "as is.", elem, self)
+
+            groupbys_parsed.append(elem)
+        return groupbys_parsed
--- a/cloudkitty/storage/v2/elasticsearch/init.py
+++ b/cloudkitty/storage/v2/elasticsearch/init.py
@ -194,6 +194,7 @@ class ElasticsearchStorage(v2_storage.BaseStorage):
        begin, end = self._local_to_utc(begin or tzutils.get_month_start(),
                                        end or tzutils.get_next_month())

+        groupby = self.parse_groupby_syntax_to_groupby_elements(groupby)
        total, docs = self._conn.total(begin, end, metric_types, filters,
                                       groupby, custom_fields=custom_fields,
                                       offset=offset, limit=limit,
--- a/cloudkitty/storage/v2/influx.py
+++ b/cloudkitty/storage/v2/influx.py
@ -394,6 +394,7 @@ class InfluxStorage(v2_storage.BaseStorage):
              custom_fields="SUM(qty) AS qty, SUM(price) AS rate"):

        begin, end = self._check_begin_end(begin, end)
+        groupby = self.parse_groupby_syntax_to_groupby_elements(groupby)

        total = self._conn.get_total(metric_types, begin, end,
                                     custom_fields, groupby, filters)
--- a/cloudkitty/tests/collectors/test_prometheus.py
+++ b/cloudkitty/tests/collectors/test_prometheus.py
@ -256,14 +256,16 @@ class PrometheusCollectorTest(tests.TestCase):

    def test_format_retrieve(self):
        expected_name = 'http_requests_total'
+        group_by = {'bar': '', 'foo': '', 'project_id': '',
+                    'week_of_the_year': '00', 'day_of_the_year': '1',
+                    'month': '1', 'year': '2015'}
+
        expected_data = [
            dataframe.DataPoint(
-                'instance', '7', '0',
-                {'bar': '', 'foo': '', 'project_id': ''},
+                'instance', '7', '0', group_by,
                {'code': '200', 'instance': 'localhost:9090'}),
            dataframe.DataPoint(
-                'instance', '42', '0',
-                {'bar': '', 'foo': '', 'project_id': ''},
+                'instance', '42', '0', group_by,
                {'code': '200', 'instance': 'localhost:9090'}),
        ]

--- a/cloudkitty/tests/gabbi/gabbits/v2-summary.yaml
+++ b/cloudkitty/tests/gabbi/gabbits/v2-summary.yaml
@ -86,3 +86,32 @@ tests:
    response_json_paths:
      $.results.`len`: 2
      $.total: 2
+
+  - name: Get a summary grouped by time-w and project_id
+    url: /v2/summary
+    status: 200
+    query_parameters:
+      groupby: [time-w, project_id]
+    response_json_paths:
+      $.results.`len`: 4
+      $.total: 4
+
+  - name: Get a summary grouped by time-d
+    url: /v2/summary
+    status: 200
+    query_parameters:
+      groupby: [time-d]
+    response_json_paths:
+      $.results.`len`: 2
+      $.total: 2
+
+  - name: Get a summary grouped by time-y
+    url: /v2/summary
+    status: 200
+    query_parameters:
+      groupby: [time-y]
+    response_json_paths:
+      $.results.`len`: 3
+      $.total: 3
+
+
--- a/cloudkitty/tests/storage/v2/test_storage_unit.py
+++ b/cloudkitty/tests/storage/v2/test_storage_unit.py
@ -16,8 +16,10 @@ import datetime
 from unittest import mock

 import testscenarios
+from werkzeug import exceptions as http_exceptions

 from cloudkitty import storage
+
 from cloudkitty.tests import samples
 from cloudkitty.tests.storage.v2 import es_utils
 from cloudkitty.tests.storage.v2 import influx_utils
@ -340,5 +342,68 @@ class StorageUnitTest(TestCase):

        self.assertEqual(expected_length, retrieved_length)

+    def test_parse_groupby_syntax_to_groupby_elements_no_time_groupby(self):
+        groupby = ["something"]
+
+        out = self.storage.parse_groupby_syntax_to_groupby_elements(groupby)
+
+        self.assertEqual(groupby, out)
+
+    def test_parse_groupby_syntax_to_groupby_elements_time_groupby(self):
+        groupby = ["something", "time"]
+
+        out = self.storage.parse_groupby_syntax_to_groupby_elements(groupby)
+
+        self.assertEqual(groupby, out)
+
+    def test_parse_groupby_syntax_to_groupby_elements_odd_time(self):
+        groupby = ["something", "time-odd-time-element"]
+
+        with mock.patch.object(storage.v2.LOG, 'warning') as log_mock:
+            out = self.storage.parse_groupby_syntax_to_groupby_elements(
+                groupby)
+            log_mock.assert_has_calls([
+                mock.call("The groupby [%s] command is not expected for "
+                          "storage backend [%s]. Therefore, we leave it as "
+                          "is.", "time-odd-time-element", self.storage)])
+
+        self.assertEqual(groupby, out)
+
+    def test_parse_groupby_syntax_to_groupby_elements_wrong_time_frame(self):
+        groupby = ["something", "time-u"]
+
+        expected_message = r"400 Bad Request: Invalid groupby time option. " \
+                           r"There is no groupby processing for \[time-u\]."
+
+        self.assertRaisesRegex(
+            http_exceptions.BadRequest, expected_message,
+            self.storage.parse_groupby_syntax_to_groupby_elements,
+            groupby)
+
+    def test_parse_groupby_syntax_to_groupby_elements_all_time_options(self):
+        groupby = ["something", "time", "time-d", "time-w", "time-m", "time-y"]
+
+        expected_log_calls = []
+        for k, v in storage.v2.BaseStorage.TIME_COMMANDS_MAP.items():
+            expected_log_calls.append(
+                mock.call("Replacing API groupby time command [%s] with "
+                          "internal groupby command [%s].", "time-%s" % k, v))
+
+        with mock.patch.object(storage.v2.LOG, 'debug') as log_debug_mock:
+            out = self.storage.parse_groupby_syntax_to_groupby_elements(
+                groupby)
+            log_debug_mock.assert_has_calls(expected_log_calls)
+
+        self.assertEqual(["something", "time", "day_of_the_year",
+                          "week_of_the_year", "month", "year"], out)
+
+    def test_parse_groupby_syntax_to_groupby_elements_no_groupby(self):
+        with mock.patch.object(storage.v2.LOG, 'debug') as log_debug_mock:
+            out = self.storage.parse_groupby_syntax_to_groupby_elements(None)
+            log_debug_mock.assert_has_calls([
+                mock.call("No groupby to process syntax.")])
+
+            self.assertIsNone(out)
+

 StorageUnitTest.generate_scenarios()
--- a/cloudkitty/tests/utils.py
+++ b/cloudkitty/tests/utils.py
@ -39,9 +39,27 @@ def generate_v2_storage_data(min_length=10,
        for project_id in project_ids:
            data = [copy.deepcopy(sample)
                    for i in range(min_length + random.randint(1, 10))]
+
+            first_group = data[:round(len(data)/2)]
+            second_group = data[round(len(data)/2):]
+
+            for elem in first_group:
+                elem['groupby']['year'] = 2022
+                elem['groupby']['week_of_the_year'] = 1
+                elem['groupby']['day_of_the_year'] = 1
+                elem['groupby']['month'] = 10
+
+            for elem in second_group:
+                elem['groupby']['year'] = 2023
+                elem['groupby']['week_of_the_year'] = 2
+                elem['groupby']['day_of_the_year'] = 2
+                elem['groupby']['month'] = 12
+
+            data[0]['groupby']['year'] = 2021
            for elem in data:
                elem['groupby']['id'] = uuidutils.generate_uuid()
                elem['groupby']['project_id'] = project_id
+
            datapoints += [dataframe.DataPoint(
                elem['vol']['unit'],
                elem['vol']['qty'],
--- a/releasenotes/notes/support-group-by-timeframes-1247aa336916f3b6.yaml
+++ b/releasenotes/notes/support-group-by-timeframes-1247aa336916f3b6.yaml
@ -0,0 +1,10 @@
+---
+features:
+  - |
+    Introduce new default groupby options: (i) time: to group data hourly.
+    The actual group by process will depend on the ``period`` parameter. The
+    default value is ``3600``, which represents one hour; (ii) time-d: to
+    group data by day of the year; (iii) time-w: to group data by week of
+    the year; (iv) time-m: to group data by month; and, (v) time-y: to group
+    data by year. If you have old data in CloudKitty and you wish to use
+    these group by methods, you will need to reprocess the desired timeframe.