Add groupby options by different timeframes
Introduce new default groupby options: (i) time: to group data by hourly; (ii) time-d: to group data by day of the year; (iii) time-w: to group data by week of the year; (iv) time-m: to group data by month; and, (v) time-y: to group data by year. If you have old data in CloudKitty and you wish to use these group by methods, you will need to reprocess the desired timeframe. Story: #2009839 Task: #44438 Depends-On: https://review.opendev.org/c/x/wsme/+/893677 Change-Id: Iad296f54f6701af84e168796aec9b1033a2a8a2d
This commit is contained in:
parent
60077a3cc4
commit
45f5e72472
|
@ -52,7 +52,19 @@ filters:
|
|||
groupby:
|
||||
in: query
|
||||
description: |
|
||||
Optional attributes to group the summary by.
|
||||
Optional attributes to group the summary by. The ``groupby`` elements are
|
||||
defined in the collector YML settings. Therefore, one can group the
|
||||
result using any of the ``groupby`` attributes defined in the collector
|
||||
settings of CloudKitty. Besides those attributes, by default, starting
|
||||
in CloudKitty ``2024.1`` release, we will have the following new groupby
|
||||
options: (i) time: to group data hourly; (ii) time-d: to group data
|
||||
by day of the year; (iii) time-w: to group data by week of the year;
|
||||
(iv) time-m: to group data by month; and, (v) time-y: to group data by
|
||||
year. If you have old data in CloudKitty and you wish to use these
|
||||
group by methods, you will need to reprocess the desired timeframe.
|
||||
The `groupby` options ``time-d``, ``time-w``, ``time-m``, ``time-y`` are the
|
||||
short versions of the following `groupby` options ``day_of_the_year``,
|
||||
``week_of_the_year``, ``month``, and ``year`` respectively.
|
||||
type: list of strings
|
||||
required: false
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
# under the License.
|
||||
#
|
||||
import abc
|
||||
import datetime
|
||||
import fractions
|
||||
|
||||
from oslo_config import cfg
|
||||
|
@ -30,9 +31,9 @@ from voluptuous import Optional
|
|||
from voluptuous import Required
|
||||
from voluptuous import Schema
|
||||
|
||||
from cloudkitty.dataframe import DataPoint
|
||||
from cloudkitty import utils as ck_utils
|
||||
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
collect_opts = [
|
||||
|
@ -248,6 +249,30 @@ class BaseCollector(object, metaclass=abc.ABCMeta):
|
|||
|
||||
return name, data
|
||||
|
||||
def _create_data_point(self, unit, qty, price, groupby, metadata, start):
|
||||
if not start:
|
||||
start = datetime.datetime.now()
|
||||
LOG.debug("Collector [%s]. No start datetime defined for "
|
||||
"datapoint[unit=%s, quantity=%s, price=%s, groupby=%s, "
|
||||
"metadata=%s]. Therefore, we use the current time as "
|
||||
"the start time for this datapoint.",
|
||||
self.collector_name, unit, qty, price, groupby, metadata)
|
||||
|
||||
week_of_the_year = start.strftime("%U")
|
||||
day_of_the_year = start.strftime("%-j")
|
||||
month_of_the_year = start.strftime("%-m")
|
||||
year = start.strftime("%Y")
|
||||
|
||||
if groupby is None:
|
||||
groupby = {}
|
||||
|
||||
groupby['week_of_the_year'] = week_of_the_year
|
||||
groupby['day_of_the_year'] = day_of_the_year
|
||||
groupby['month'] = month_of_the_year
|
||||
groupby['year'] = year
|
||||
|
||||
return DataPoint(unit, qty, price, groupby, metadata)
|
||||
|
||||
|
||||
class InvalidConfiguration(Exception):
|
||||
pass
|
||||
|
|
|
@ -33,7 +33,6 @@ from voluptuous import Schema
|
|||
|
||||
from cloudkitty import collector
|
||||
from cloudkitty.common import custom_session
|
||||
from cloudkitty import dataframe
|
||||
from cloudkitty import utils as ck_utils
|
||||
from cloudkitty.utils import tz as tzutils
|
||||
|
||||
|
@ -517,13 +516,9 @@ class GnocchiCollector(collector.BaseCollector):
|
|||
project_id, start, end, e),
|
||||
)
|
||||
continue
|
||||
formated_resources.append(dataframe.DataPoint(
|
||||
met['unit'],
|
||||
qty,
|
||||
0,
|
||||
groupby,
|
||||
metadata,
|
||||
))
|
||||
point = self._create_data_point(
|
||||
met['unit'], qty, 0, groupby, metadata, start)
|
||||
formated_resources.append(point)
|
||||
return formated_resources
|
||||
|
||||
@staticmethod
|
||||
|
|
|
@ -24,7 +24,6 @@ from voluptuous import Schema
|
|||
|
||||
from cloudkitty import collector
|
||||
from cloudkitty.common import monasca_client as mon_client_utils
|
||||
from cloudkitty import dataframe
|
||||
from cloudkitty import utils as ck_utils
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
@ -231,11 +230,8 @@ class MonascaCollector(collector.BaseCollector):
|
|||
if len(d['statistics']):
|
||||
metadata, groupby, qty = self._format_data(
|
||||
met, d, resources_info)
|
||||
formated_resources.append(dataframe.DataPoint(
|
||||
met['unit'],
|
||||
qty,
|
||||
0,
|
||||
groupby,
|
||||
metadata,
|
||||
))
|
||||
|
||||
point = self._create_data_point(
|
||||
met['unit'], qty, 0, groupby, metadata, start)
|
||||
formated_resources.append(point)
|
||||
return formated_resources
|
||||
|
|
|
@ -28,7 +28,6 @@ from cloudkitty import collector
|
|||
from cloudkitty.collector.exceptions import CollectError
|
||||
from cloudkitty.common.prometheus_client import PrometheusClient
|
||||
from cloudkitty.common.prometheus_client import PrometheusResponseError
|
||||
from cloudkitty import dataframe
|
||||
from cloudkitty import utils as ck_utils
|
||||
from cloudkitty.utils import tz as tzutils
|
||||
|
||||
|
@ -243,12 +242,8 @@ class PrometheusCollector(collector.BaseCollector):
|
|||
item,
|
||||
)
|
||||
|
||||
formatted_resources.append(dataframe.DataPoint(
|
||||
self.conf[metric_name]['unit'],
|
||||
qty,
|
||||
0,
|
||||
groupby,
|
||||
metadata,
|
||||
))
|
||||
point = self._create_data_point(self.conf[metric_name]['unit'],
|
||||
qty, 0, groupby, metadata, start)
|
||||
formatted_resources.append(point)
|
||||
|
||||
return formatted_resources
|
||||
|
|
|
@ -177,6 +177,9 @@ class V1StorageAdapter(storage_v2.BaseStorage):
|
|||
storage_gby.append('res_type')
|
||||
elif elem == 'project_id':
|
||||
storage_gby.append('tenant_id')
|
||||
else:
|
||||
LOG.warning("The groupby [%s] is not supported by MySQL "
|
||||
"storage backend.", elem)
|
||||
return ','.join(storage_gby) if storage_gby else None
|
||||
|
||||
def get_tenants(self, begin, end):
|
||||
|
|
|
@ -16,10 +16,14 @@
|
|||
import abc
|
||||
import datetime
|
||||
|
||||
from oslo_log import log as logging
|
||||
|
||||
from oslo_config import cfg
|
||||
|
||||
from cloudkitty import storage_state
|
||||
|
||||
from werkzeug import exceptions as http_exceptions
|
||||
|
||||
|
||||
storage_opts = [
|
||||
cfg.IntOpt(
|
||||
|
@ -33,6 +37,8 @@ storage_opts = [
|
|||
CONF = cfg.CONF
|
||||
CONF.register_opts(storage_opts, 'storage')
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseStorage(object, metaclass=abc.ABCMeta):
|
||||
"""Abstract class for v2 storage objects."""
|
||||
|
@ -159,3 +165,35 @@ class BaseStorage(object, metaclass=abc.ABCMeta):
|
|||
# NOTE(lpeschke): This is only kept for v1 storage backward compatibility
|
||||
def get_tenants(self, begin=None, end=None):
|
||||
return storage_state.StateManager().get_tenants(begin, end)
|
||||
|
||||
TIME_COMMANDS_MAP = {"d": "day_of_the_year", "w": "week_of_the_year",
|
||||
"m": "month", "y": "year"}
|
||||
|
||||
def parse_groupby_syntax_to_groupby_elements(self, groupbys):
|
||||
if not groupbys:
|
||||
LOG.debug("No groupby to process syntax.")
|
||||
return groupbys
|
||||
|
||||
groupbys_parsed = []
|
||||
for elem in groupbys:
|
||||
if 'time' in elem:
|
||||
time_command = elem.split('-')
|
||||
number_of_parts = len(time_command)
|
||||
if number_of_parts == 2:
|
||||
g = self.TIME_COMMANDS_MAP.get(time_command[1])
|
||||
if not g:
|
||||
raise http_exceptions.BadRequest(
|
||||
"Invalid groupby time option. There is no "
|
||||
"groupby processing for [%s]." % elem)
|
||||
|
||||
LOG.debug("Replacing API groupby time command [%s] with "
|
||||
"internal groupby command [%s].", elem, g)
|
||||
elem = g
|
||||
|
||||
elif number_of_parts > 2:
|
||||
LOG.warning("The groupby [%s] command is not expected for "
|
||||
"storage backend [%s]. Therefore, we leave it "
|
||||
"as is.", elem, self)
|
||||
|
||||
groupbys_parsed.append(elem)
|
||||
return groupbys_parsed
|
||||
|
|
|
@ -194,6 +194,7 @@ class ElasticsearchStorage(v2_storage.BaseStorage):
|
|||
begin, end = self._local_to_utc(begin or tzutils.get_month_start(),
|
||||
end or tzutils.get_next_month())
|
||||
|
||||
groupby = self.parse_groupby_syntax_to_groupby_elements(groupby)
|
||||
total, docs = self._conn.total(begin, end, metric_types, filters,
|
||||
groupby, custom_fields=custom_fields,
|
||||
offset=offset, limit=limit,
|
||||
|
|
|
@ -394,6 +394,7 @@ class InfluxStorage(v2_storage.BaseStorage):
|
|||
custom_fields="SUM(qty) AS qty, SUM(price) AS rate"):
|
||||
|
||||
begin, end = self._check_begin_end(begin, end)
|
||||
groupby = self.parse_groupby_syntax_to_groupby_elements(groupby)
|
||||
|
||||
total = self._conn.get_total(metric_types, begin, end,
|
||||
custom_fields, groupby, filters)
|
||||
|
|
|
@ -256,14 +256,16 @@ class PrometheusCollectorTest(tests.TestCase):
|
|||
|
||||
def test_format_retrieve(self):
|
||||
expected_name = 'http_requests_total'
|
||||
group_by = {'bar': '', 'foo': '', 'project_id': '',
|
||||
'week_of_the_year': '00', 'day_of_the_year': '1',
|
||||
'month': '1', 'year': '2015'}
|
||||
|
||||
expected_data = [
|
||||
dataframe.DataPoint(
|
||||
'instance', '7', '0',
|
||||
{'bar': '', 'foo': '', 'project_id': ''},
|
||||
'instance', '7', '0', group_by,
|
||||
{'code': '200', 'instance': 'localhost:9090'}),
|
||||
dataframe.DataPoint(
|
||||
'instance', '42', '0',
|
||||
{'bar': '', 'foo': '', 'project_id': ''},
|
||||
'instance', '42', '0', group_by,
|
||||
{'code': '200', 'instance': 'localhost:9090'}),
|
||||
]
|
||||
|
||||
|
|
|
@ -86,3 +86,32 @@ tests:
|
|||
response_json_paths:
|
||||
$.results.`len`: 2
|
||||
$.total: 2
|
||||
|
||||
- name: Get a summary grouped by time-w and project_id
|
||||
url: /v2/summary
|
||||
status: 200
|
||||
query_parameters:
|
||||
groupby: [time-w, project_id]
|
||||
response_json_paths:
|
||||
$.results.`len`: 4
|
||||
$.total: 4
|
||||
|
||||
- name: Get a summary grouped by time-d
|
||||
url: /v2/summary
|
||||
status: 200
|
||||
query_parameters:
|
||||
groupby: [time-d]
|
||||
response_json_paths:
|
||||
$.results.`len`: 2
|
||||
$.total: 2
|
||||
|
||||
- name: Get a summary grouped by time-y
|
||||
url: /v2/summary
|
||||
status: 200
|
||||
query_parameters:
|
||||
groupby: [time-y]
|
||||
response_json_paths:
|
||||
$.results.`len`: 3
|
||||
$.total: 3
|
||||
|
||||
|
||||
|
|
|
@ -16,8 +16,10 @@ import datetime
|
|||
from unittest import mock
|
||||
|
||||
import testscenarios
|
||||
from werkzeug import exceptions as http_exceptions
|
||||
|
||||
from cloudkitty import storage
|
||||
|
||||
from cloudkitty.tests import samples
|
||||
from cloudkitty.tests.storage.v2 import es_utils
|
||||
from cloudkitty.tests.storage.v2 import influx_utils
|
||||
|
@ -340,5 +342,68 @@ class StorageUnitTest(TestCase):
|
|||
|
||||
self.assertEqual(expected_length, retrieved_length)
|
||||
|
||||
def test_parse_groupby_syntax_to_groupby_elements_no_time_groupby(self):
|
||||
groupby = ["something"]
|
||||
|
||||
out = self.storage.parse_groupby_syntax_to_groupby_elements(groupby)
|
||||
|
||||
self.assertEqual(groupby, out)
|
||||
|
||||
def test_parse_groupby_syntax_to_groupby_elements_time_groupby(self):
|
||||
groupby = ["something", "time"]
|
||||
|
||||
out = self.storage.parse_groupby_syntax_to_groupby_elements(groupby)
|
||||
|
||||
self.assertEqual(groupby, out)
|
||||
|
||||
def test_parse_groupby_syntax_to_groupby_elements_odd_time(self):
|
||||
groupby = ["something", "time-odd-time-element"]
|
||||
|
||||
with mock.patch.object(storage.v2.LOG, 'warning') as log_mock:
|
||||
out = self.storage.parse_groupby_syntax_to_groupby_elements(
|
||||
groupby)
|
||||
log_mock.assert_has_calls([
|
||||
mock.call("The groupby [%s] command is not expected for "
|
||||
"storage backend [%s]. Therefore, we leave it as "
|
||||
"is.", "time-odd-time-element", self.storage)])
|
||||
|
||||
self.assertEqual(groupby, out)
|
||||
|
||||
def test_parse_groupby_syntax_to_groupby_elements_wrong_time_frame(self):
|
||||
groupby = ["something", "time-u"]
|
||||
|
||||
expected_message = r"400 Bad Request: Invalid groupby time option. " \
|
||||
r"There is no groupby processing for \[time-u\]."
|
||||
|
||||
self.assertRaisesRegex(
|
||||
http_exceptions.BadRequest, expected_message,
|
||||
self.storage.parse_groupby_syntax_to_groupby_elements,
|
||||
groupby)
|
||||
|
||||
def test_parse_groupby_syntax_to_groupby_elements_all_time_options(self):
|
||||
groupby = ["something", "time", "time-d", "time-w", "time-m", "time-y"]
|
||||
|
||||
expected_log_calls = []
|
||||
for k, v in storage.v2.BaseStorage.TIME_COMMANDS_MAP.items():
|
||||
expected_log_calls.append(
|
||||
mock.call("Replacing API groupby time command [%s] with "
|
||||
"internal groupby command [%s].", "time-%s" % k, v))
|
||||
|
||||
with mock.patch.object(storage.v2.LOG, 'debug') as log_debug_mock:
|
||||
out = self.storage.parse_groupby_syntax_to_groupby_elements(
|
||||
groupby)
|
||||
log_debug_mock.assert_has_calls(expected_log_calls)
|
||||
|
||||
self.assertEqual(["something", "time", "day_of_the_year",
|
||||
"week_of_the_year", "month", "year"], out)
|
||||
|
||||
def test_parse_groupby_syntax_to_groupby_elements_no_groupby(self):
|
||||
with mock.patch.object(storage.v2.LOG, 'debug') as log_debug_mock:
|
||||
out = self.storage.parse_groupby_syntax_to_groupby_elements(None)
|
||||
log_debug_mock.assert_has_calls([
|
||||
mock.call("No groupby to process syntax.")])
|
||||
|
||||
self.assertIsNone(out)
|
||||
|
||||
|
||||
StorageUnitTest.generate_scenarios()
|
||||
|
|
|
@ -39,9 +39,27 @@ def generate_v2_storage_data(min_length=10,
|
|||
for project_id in project_ids:
|
||||
data = [copy.deepcopy(sample)
|
||||
for i in range(min_length + random.randint(1, 10))]
|
||||
|
||||
first_group = data[:round(len(data)/2)]
|
||||
second_group = data[round(len(data)/2):]
|
||||
|
||||
for elem in first_group:
|
||||
elem['groupby']['year'] = 2022
|
||||
elem['groupby']['week_of_the_year'] = 1
|
||||
elem['groupby']['day_of_the_year'] = 1
|
||||
elem['groupby']['month'] = 10
|
||||
|
||||
for elem in second_group:
|
||||
elem['groupby']['year'] = 2023
|
||||
elem['groupby']['week_of_the_year'] = 2
|
||||
elem['groupby']['day_of_the_year'] = 2
|
||||
elem['groupby']['month'] = 12
|
||||
|
||||
data[0]['groupby']['year'] = 2021
|
||||
for elem in data:
|
||||
elem['groupby']['id'] = uuidutils.generate_uuid()
|
||||
elem['groupby']['project_id'] = project_id
|
||||
|
||||
datapoints += [dataframe.DataPoint(
|
||||
elem['vol']['unit'],
|
||||
elem['vol']['qty'],
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
features:
|
||||
- |
|
||||
Introduce new default groupby options: (i) time: to group data hourly.
|
||||
The actual group by process will depend on the ``period`` parameter. The
|
||||
default value is ``3600``, which represents one hour; (ii) time-d: to
|
||||
group data by day of the year; (iii) time-w: to group data by week of
|
||||
the year; (iv) time-m: to group data by month; and, (v) time-y: to group
|
||||
data by year. If you have old data in CloudKitty and you wish to use
|
||||
these group by methods, you will need to reprocess the desired timeframe.
|
Loading…
Reference in New Issue