diff --git a/api-ref/source/v2/summary/summary_parameters.yml b/api-ref/source/v2/summary/summary_parameters.yml index 49d7fdc0..de3c6e4e 100644 --- a/api-ref/source/v2/summary/summary_parameters.yml +++ b/api-ref/source/v2/summary/summary_parameters.yml @@ -52,7 +52,19 @@ filters: groupby: in: query description: | - Optional attributes to group the summary by. + Optional attributes to group the summary by. The ``groupby`` elements are + defined in the collector YML settings. Therefore, one can group the + result using any of the ``groupby`` attributes defined in the collector + settings of CloudKitty. Besides those attributes, by default, starting + in CloudKitty ``2024.1`` release, we will have the following new groupby + options: (i) time: to group data hourly; (ii) time-d: to group data + by day of the year; (iii) time-w: to group data by week of the year; + (iv) time-m: to group data by month; and, (v) time-y: to group data by + year. If you have old data in CloudKitty and you wish to use these + group by methods, you will need to reprocess the desired timeframe. + The `groupby` options ``time-d``, ``time-w``, ``time-m``, ``time-y`` are the + short versions of the following `groupby` options ``day_of_the_year``, + ``week_of_the_year``, ``month``, and ``year`` respectively. type: list of strings required: false diff --git a/cloudkitty/collector/__init__.py b/cloudkitty/collector/__init__.py index 827d66ec..171c00a3 100644 --- a/cloudkitty/collector/__init__.py +++ b/cloudkitty/collector/__init__.py @@ -14,6 +14,7 @@ # under the License. # import abc +import datetime import fractions from oslo_config import cfg @@ -30,9 +31,9 @@ from voluptuous import Optional from voluptuous import Required from voluptuous import Schema +from cloudkitty.dataframe import DataPoint from cloudkitty import utils as ck_utils - LOG = logging.getLogger(__name__) collect_opts = [ @@ -248,6 +249,30 @@ class BaseCollector(object, metaclass=abc.ABCMeta): return name, data + def _create_data_point(self, unit, qty, price, groupby, metadata, start): + if not start: + start = datetime.datetime.now() + LOG.debug("Collector [%s]. No start datetime defined for " + "datapoint[unit=%s, quantity=%s, price=%s, groupby=%s, " + "metadata=%s]. Therefore, we use the current time as " + "the start time for this datapoint.", + self.collector_name, unit, qty, price, groupby, metadata) + + week_of_the_year = start.strftime("%U") + day_of_the_year = start.strftime("%-j") + month_of_the_year = start.strftime("%-m") + year = start.strftime("%Y") + + if groupby is None: + groupby = {} + + groupby['week_of_the_year'] = week_of_the_year + groupby['day_of_the_year'] = day_of_the_year + groupby['month'] = month_of_the_year + groupby['year'] = year + + return DataPoint(unit, qty, price, groupby, metadata) + class InvalidConfiguration(Exception): pass diff --git a/cloudkitty/collector/gnocchi.py b/cloudkitty/collector/gnocchi.py index e02067f3..e18d9376 100644 --- a/cloudkitty/collector/gnocchi.py +++ b/cloudkitty/collector/gnocchi.py @@ -33,7 +33,6 @@ from voluptuous import Schema from cloudkitty import collector from cloudkitty.common import custom_session -from cloudkitty import dataframe from cloudkitty import utils as ck_utils from cloudkitty.utils import tz as tzutils @@ -517,13 +516,9 @@ class GnocchiCollector(collector.BaseCollector): project_id, start, end, e), ) continue - formated_resources.append(dataframe.DataPoint( - met['unit'], - qty, - 0, - groupby, - metadata, - )) + point = self._create_data_point( + met['unit'], qty, 0, groupby, metadata, start) + formated_resources.append(point) return formated_resources @staticmethod diff --git a/cloudkitty/collector/monasca.py b/cloudkitty/collector/monasca.py index 4b11c087..ce5d5389 100644 --- a/cloudkitty/collector/monasca.py +++ b/cloudkitty/collector/monasca.py @@ -24,7 +24,6 @@ from voluptuous import Schema from cloudkitty import collector from cloudkitty.common import monasca_client as mon_client_utils -from cloudkitty import dataframe from cloudkitty import utils as ck_utils LOG = logging.getLogger(__name__) @@ -231,11 +230,8 @@ class MonascaCollector(collector.BaseCollector): if len(d['statistics']): metadata, groupby, qty = self._format_data( met, d, resources_info) - formated_resources.append(dataframe.DataPoint( - met['unit'], - qty, - 0, - groupby, - metadata, - )) + + point = self._create_data_point( + met['unit'], qty, 0, groupby, metadata, start) + formated_resources.append(point) return formated_resources diff --git a/cloudkitty/collector/prometheus.py b/cloudkitty/collector/prometheus.py index e23cd6a7..5f6e95d6 100644 --- a/cloudkitty/collector/prometheus.py +++ b/cloudkitty/collector/prometheus.py @@ -28,7 +28,6 @@ from cloudkitty import collector from cloudkitty.collector.exceptions import CollectError from cloudkitty.common.prometheus_client import PrometheusClient from cloudkitty.common.prometheus_client import PrometheusResponseError -from cloudkitty import dataframe from cloudkitty import utils as ck_utils from cloudkitty.utils import tz as tzutils @@ -243,12 +242,8 @@ class PrometheusCollector(collector.BaseCollector): item, ) - formatted_resources.append(dataframe.DataPoint( - self.conf[metric_name]['unit'], - qty, - 0, - groupby, - metadata, - )) + point = self._create_data_point(self.conf[metric_name]['unit'], + qty, 0, groupby, metadata, start) + formatted_resources.append(point) return formatted_resources diff --git a/cloudkitty/storage/__init__.py b/cloudkitty/storage/__init__.py index 2b66dfe9..8001908d 100644 --- a/cloudkitty/storage/__init__.py +++ b/cloudkitty/storage/__init__.py @@ -177,6 +177,9 @@ class V1StorageAdapter(storage_v2.BaseStorage): storage_gby.append('res_type') elif elem == 'project_id': storage_gby.append('tenant_id') + else: + LOG.warning("The groupby [%s] is not supported by MySQL " + "storage backend.", elem) return ','.join(storage_gby) if storage_gby else None def get_tenants(self, begin, end): diff --git a/cloudkitty/storage/v2/__init__.py b/cloudkitty/storage/v2/__init__.py index 830c46c6..113a731c 100644 --- a/cloudkitty/storage/v2/__init__.py +++ b/cloudkitty/storage/v2/__init__.py @@ -16,10 +16,14 @@ import abc import datetime +from oslo_log import log as logging + from oslo_config import cfg from cloudkitty import storage_state +from werkzeug import exceptions as http_exceptions + storage_opts = [ cfg.IntOpt( @@ -33,6 +37,8 @@ storage_opts = [ CONF = cfg.CONF CONF.register_opts(storage_opts, 'storage') +LOG = logging.getLogger(__name__) + class BaseStorage(object, metaclass=abc.ABCMeta): """Abstract class for v2 storage objects.""" @@ -159,3 +165,35 @@ class BaseStorage(object, metaclass=abc.ABCMeta): # NOTE(lpeschke): This is only kept for v1 storage backward compatibility def get_tenants(self, begin=None, end=None): return storage_state.StateManager().get_tenants(begin, end) + + TIME_COMMANDS_MAP = {"d": "day_of_the_year", "w": "week_of_the_year", + "m": "month", "y": "year"} + + def parse_groupby_syntax_to_groupby_elements(self, groupbys): + if not groupbys: + LOG.debug("No groupby to process syntax.") + return groupbys + + groupbys_parsed = [] + for elem in groupbys: + if 'time' in elem: + time_command = elem.split('-') + number_of_parts = len(time_command) + if number_of_parts == 2: + g = self.TIME_COMMANDS_MAP.get(time_command[1]) + if not g: + raise http_exceptions.BadRequest( + "Invalid groupby time option. There is no " + "groupby processing for [%s]." % elem) + + LOG.debug("Replacing API groupby time command [%s] with " + "internal groupby command [%s].", elem, g) + elem = g + + elif number_of_parts > 2: + LOG.warning("The groupby [%s] command is not expected for " + "storage backend [%s]. Therefore, we leave it " + "as is.", elem, self) + + groupbys_parsed.append(elem) + return groupbys_parsed diff --git a/cloudkitty/storage/v2/elasticsearch/__init__.py b/cloudkitty/storage/v2/elasticsearch/__init__.py index 058e9126..e5924cd8 100644 --- a/cloudkitty/storage/v2/elasticsearch/__init__.py +++ b/cloudkitty/storage/v2/elasticsearch/__init__.py @@ -194,6 +194,7 @@ class ElasticsearchStorage(v2_storage.BaseStorage): begin, end = self._local_to_utc(begin or tzutils.get_month_start(), end or tzutils.get_next_month()) + groupby = self.parse_groupby_syntax_to_groupby_elements(groupby) total, docs = self._conn.total(begin, end, metric_types, filters, groupby, custom_fields=custom_fields, offset=offset, limit=limit, diff --git a/cloudkitty/storage/v2/influx.py b/cloudkitty/storage/v2/influx.py index 3ff6bfe0..c9501f81 100644 --- a/cloudkitty/storage/v2/influx.py +++ b/cloudkitty/storage/v2/influx.py @@ -394,6 +394,7 @@ class InfluxStorage(v2_storage.BaseStorage): custom_fields="SUM(qty) AS qty, SUM(price) AS rate"): begin, end = self._check_begin_end(begin, end) + groupby = self.parse_groupby_syntax_to_groupby_elements(groupby) total = self._conn.get_total(metric_types, begin, end, custom_fields, groupby, filters) diff --git a/cloudkitty/tests/collectors/test_prometheus.py b/cloudkitty/tests/collectors/test_prometheus.py index 7fd7320f..86d0cc28 100644 --- a/cloudkitty/tests/collectors/test_prometheus.py +++ b/cloudkitty/tests/collectors/test_prometheus.py @@ -256,14 +256,16 @@ class PrometheusCollectorTest(tests.TestCase): def test_format_retrieve(self): expected_name = 'http_requests_total' + group_by = {'bar': '', 'foo': '', 'project_id': '', + 'week_of_the_year': '00', 'day_of_the_year': '1', + 'month': '1', 'year': '2015'} + expected_data = [ dataframe.DataPoint( - 'instance', '7', '0', - {'bar': '', 'foo': '', 'project_id': ''}, + 'instance', '7', '0', group_by, {'code': '200', 'instance': 'localhost:9090'}), dataframe.DataPoint( - 'instance', '42', '0', - {'bar': '', 'foo': '', 'project_id': ''}, + 'instance', '42', '0', group_by, {'code': '200', 'instance': 'localhost:9090'}), ] diff --git a/cloudkitty/tests/gabbi/gabbits/v2-summary.yaml b/cloudkitty/tests/gabbi/gabbits/v2-summary.yaml index b584fcef..f3966708 100644 --- a/cloudkitty/tests/gabbi/gabbits/v2-summary.yaml +++ b/cloudkitty/tests/gabbi/gabbits/v2-summary.yaml @@ -86,3 +86,32 @@ tests: response_json_paths: $.results.`len`: 2 $.total: 2 + + - name: Get a summary grouped by time-w and project_id + url: /v2/summary + status: 200 + query_parameters: + groupby: [time-w, project_id] + response_json_paths: + $.results.`len`: 4 + $.total: 4 + + - name: Get a summary grouped by time-d + url: /v2/summary + status: 200 + query_parameters: + groupby: [time-d] + response_json_paths: + $.results.`len`: 2 + $.total: 2 + + - name: Get a summary grouped by time-y + url: /v2/summary + status: 200 + query_parameters: + groupby: [time-y] + response_json_paths: + $.results.`len`: 3 + $.total: 3 + + diff --git a/cloudkitty/tests/storage/v2/test_storage_unit.py b/cloudkitty/tests/storage/v2/test_storage_unit.py index dbd6b1ff..030ba51b 100644 --- a/cloudkitty/tests/storage/v2/test_storage_unit.py +++ b/cloudkitty/tests/storage/v2/test_storage_unit.py @@ -16,8 +16,10 @@ import datetime from unittest import mock import testscenarios +from werkzeug import exceptions as http_exceptions from cloudkitty import storage + from cloudkitty.tests import samples from cloudkitty.tests.storage.v2 import es_utils from cloudkitty.tests.storage.v2 import influx_utils @@ -340,5 +342,68 @@ class StorageUnitTest(TestCase): self.assertEqual(expected_length, retrieved_length) + def test_parse_groupby_syntax_to_groupby_elements_no_time_groupby(self): + groupby = ["something"] + + out = self.storage.parse_groupby_syntax_to_groupby_elements(groupby) + + self.assertEqual(groupby, out) + + def test_parse_groupby_syntax_to_groupby_elements_time_groupby(self): + groupby = ["something", "time"] + + out = self.storage.parse_groupby_syntax_to_groupby_elements(groupby) + + self.assertEqual(groupby, out) + + def test_parse_groupby_syntax_to_groupby_elements_odd_time(self): + groupby = ["something", "time-odd-time-element"] + + with mock.patch.object(storage.v2.LOG, 'warning') as log_mock: + out = self.storage.parse_groupby_syntax_to_groupby_elements( + groupby) + log_mock.assert_has_calls([ + mock.call("The groupby [%s] command is not expected for " + "storage backend [%s]. Therefore, we leave it as " + "is.", "time-odd-time-element", self.storage)]) + + self.assertEqual(groupby, out) + + def test_parse_groupby_syntax_to_groupby_elements_wrong_time_frame(self): + groupby = ["something", "time-u"] + + expected_message = r"400 Bad Request: Invalid groupby time option. " \ + r"There is no groupby processing for \[time-u\]." + + self.assertRaisesRegex( + http_exceptions.BadRequest, expected_message, + self.storage.parse_groupby_syntax_to_groupby_elements, + groupby) + + def test_parse_groupby_syntax_to_groupby_elements_all_time_options(self): + groupby = ["something", "time", "time-d", "time-w", "time-m", "time-y"] + + expected_log_calls = [] + for k, v in storage.v2.BaseStorage.TIME_COMMANDS_MAP.items(): + expected_log_calls.append( + mock.call("Replacing API groupby time command [%s] with " + "internal groupby command [%s].", "time-%s" % k, v)) + + with mock.patch.object(storage.v2.LOG, 'debug') as log_debug_mock: + out = self.storage.parse_groupby_syntax_to_groupby_elements( + groupby) + log_debug_mock.assert_has_calls(expected_log_calls) + + self.assertEqual(["something", "time", "day_of_the_year", + "week_of_the_year", "month", "year"], out) + + def test_parse_groupby_syntax_to_groupby_elements_no_groupby(self): + with mock.patch.object(storage.v2.LOG, 'debug') as log_debug_mock: + out = self.storage.parse_groupby_syntax_to_groupby_elements(None) + log_debug_mock.assert_has_calls([ + mock.call("No groupby to process syntax.")]) + + self.assertIsNone(out) + StorageUnitTest.generate_scenarios() diff --git a/cloudkitty/tests/utils.py b/cloudkitty/tests/utils.py index b29d28bd..0d807383 100644 --- a/cloudkitty/tests/utils.py +++ b/cloudkitty/tests/utils.py @@ -39,9 +39,27 @@ def generate_v2_storage_data(min_length=10, for project_id in project_ids: data = [copy.deepcopy(sample) for i in range(min_length + random.randint(1, 10))] + + first_group = data[:round(len(data)/2)] + second_group = data[round(len(data)/2):] + + for elem in first_group: + elem['groupby']['year'] = 2022 + elem['groupby']['week_of_the_year'] = 1 + elem['groupby']['day_of_the_year'] = 1 + elem['groupby']['month'] = 10 + + for elem in second_group: + elem['groupby']['year'] = 2023 + elem['groupby']['week_of_the_year'] = 2 + elem['groupby']['day_of_the_year'] = 2 + elem['groupby']['month'] = 12 + + data[0]['groupby']['year'] = 2021 for elem in data: elem['groupby']['id'] = uuidutils.generate_uuid() elem['groupby']['project_id'] = project_id + datapoints += [dataframe.DataPoint( elem['vol']['unit'], elem['vol']['qty'], diff --git a/releasenotes/notes/support-group-by-timeframes-1247aa336916f3b6.yaml b/releasenotes/notes/support-group-by-timeframes-1247aa336916f3b6.yaml new file mode 100644 index 00000000..62ca30b9 --- /dev/null +++ b/releasenotes/notes/support-group-by-timeframes-1247aa336916f3b6.yaml @@ -0,0 +1,10 @@ +--- +features: + - | + Introduce new default groupby options: (i) time: to group data hourly. + The actual group by process will depend on the ``period`` parameter. The + default value is ``3600``, which represents one hour; (ii) time-d: to + group data by day of the year; (iii) time-w: to group data by week of + the year; (iv) time-m: to group data by month; and, (v) time-y: to group + data by year. If you have old data in CloudKitty and you wish to use + these group by methods, you will need to reprocess the desired timeframe.