Merge "Add groupby options by different timeframes"

This commit is contained in:
Zuul 2023-11-27 15:22:12 +00:00 committed by Gerrit Code Review
commit bf208c517e
14 changed files with 220 additions and 30 deletions

View File

@ -52,7 +52,19 @@ filters:
groupby:
in: query
description: |
Optional attributes to group the summary by.
Optional attributes to group the summary by. The ``groupby`` elements are
defined in the collector YML settings. Therefore, one can group the
result using any of the ``groupby`` attributes defined in the collector
settings of CloudKitty. Besides those attributes, by default, starting
in CloudKitty ``2024.1`` release, we will have the following new groupby
options: (i) time: to group data hourly; (ii) time-d: to group data
by day of the year; (iii) time-w: to group data by week of the year;
(iv) time-m: to group data by month; and, (v) time-y: to group data by
year. If you have old data in CloudKitty and you wish to use these
group by methods, you will need to reprocess the desired timeframe.
The `groupby` options ``time-d``, ``time-w``, ``time-m``, ``time-y`` are the
short versions of the following `groupby` options ``day_of_the_year``,
``week_of_the_year``, ``month``, and ``year`` respectively.
type: list of strings
required: false

View File

@ -14,6 +14,7 @@
# under the License.
#
import abc
import datetime
import fractions
from oslo_config import cfg
@ -30,9 +31,9 @@ from voluptuous import Optional
from voluptuous import Required
from voluptuous import Schema
from cloudkitty.dataframe import DataPoint
from cloudkitty import utils as ck_utils
LOG = logging.getLogger(__name__)
collect_opts = [
@ -248,6 +249,30 @@ class BaseCollector(object, metaclass=abc.ABCMeta):
return name, data
def _create_data_point(self, unit, qty, price, groupby, metadata, start):
if not start:
start = datetime.datetime.now()
LOG.debug("Collector [%s]. No start datetime defined for "
"datapoint[unit=%s, quantity=%s, price=%s, groupby=%s, "
"metadata=%s]. Therefore, we use the current time as "
"the start time for this datapoint.",
self.collector_name, unit, qty, price, groupby, metadata)
week_of_the_year = start.strftime("%U")
day_of_the_year = start.strftime("%-j")
month_of_the_year = start.strftime("%-m")
year = start.strftime("%Y")
if groupby is None:
groupby = {}
groupby['week_of_the_year'] = week_of_the_year
groupby['day_of_the_year'] = day_of_the_year
groupby['month'] = month_of_the_year
groupby['year'] = year
return DataPoint(unit, qty, price, groupby, metadata)
class InvalidConfiguration(Exception):
pass

View File

@ -33,7 +33,6 @@ from voluptuous import Schema
from cloudkitty import collector
from cloudkitty.common import custom_session
from cloudkitty import dataframe
from cloudkitty import utils as ck_utils
from cloudkitty.utils import tz as tzutils
@ -518,13 +517,9 @@ class GnocchiCollector(collector.BaseCollector):
project_id, start, end, e),
)
continue
formated_resources.append(dataframe.DataPoint(
met['unit'],
qty,
0,
groupby,
metadata,
))
point = self._create_data_point(
met['unit'], qty, 0, groupby, metadata, start)
formated_resources.append(point)
return formated_resources
@staticmethod

View File

@ -24,7 +24,6 @@ from voluptuous import Schema
from cloudkitty import collector
from cloudkitty.common import monasca_client as mon_client_utils
from cloudkitty import dataframe
from cloudkitty import utils as ck_utils
LOG = logging.getLogger(__name__)
@ -231,11 +230,8 @@ class MonascaCollector(collector.BaseCollector):
if len(d['statistics']):
metadata, groupby, qty = self._format_data(
met, d, resources_info)
formated_resources.append(dataframe.DataPoint(
met['unit'],
qty,
0,
groupby,
metadata,
))
point = self._create_data_point(
met['unit'], qty, 0, groupby, metadata, start)
formated_resources.append(point)
return formated_resources

View File

@ -28,7 +28,6 @@ from cloudkitty import collector
from cloudkitty.collector.exceptions import CollectError
from cloudkitty.common.prometheus_client import PrometheusClient
from cloudkitty.common.prometheus_client import PrometheusResponseError
from cloudkitty import dataframe
from cloudkitty import utils as ck_utils
from cloudkitty.utils import tz as tzutils
@ -243,12 +242,8 @@ class PrometheusCollector(collector.BaseCollector):
item,
)
formatted_resources.append(dataframe.DataPoint(
self.conf[metric_name]['unit'],
qty,
0,
groupby,
metadata,
))
point = self._create_data_point(self.conf[metric_name]['unit'],
qty, 0, groupby, metadata, start)
formatted_resources.append(point)
return formatted_resources

View File

@ -177,6 +177,9 @@ class V1StorageAdapter(storage_v2.BaseStorage):
storage_gby.append('res_type')
elif elem == 'project_id':
storage_gby.append('tenant_id')
else:
LOG.warning("The groupby [%s] is not supported by MySQL "
"storage backend.", elem)
return ','.join(storage_gby) if storage_gby else None
def get_tenants(self, begin, end):

View File

@ -16,10 +16,14 @@
import abc
import datetime
from oslo_log import log as logging
from oslo_config import cfg
from cloudkitty import storage_state
from werkzeug import exceptions as http_exceptions
storage_opts = [
cfg.IntOpt(
@ -33,6 +37,8 @@ storage_opts = [
CONF = cfg.CONF
CONF.register_opts(storage_opts, 'storage')
LOG = logging.getLogger(__name__)
class BaseStorage(object, metaclass=abc.ABCMeta):
"""Abstract class for v2 storage objects."""
@ -159,3 +165,35 @@ class BaseStorage(object, metaclass=abc.ABCMeta):
# NOTE(lpeschke): This is only kept for v1 storage backward compatibility
def get_tenants(self, begin=None, end=None):
return storage_state.StateManager().get_tenants(begin, end)
TIME_COMMANDS_MAP = {"d": "day_of_the_year", "w": "week_of_the_year",
"m": "month", "y": "year"}
def parse_groupby_syntax_to_groupby_elements(self, groupbys):
if not groupbys:
LOG.debug("No groupby to process syntax.")
return groupbys
groupbys_parsed = []
for elem in groupbys:
if 'time' in elem:
time_command = elem.split('-')
number_of_parts = len(time_command)
if number_of_parts == 2:
g = self.TIME_COMMANDS_MAP.get(time_command[1])
if not g:
raise http_exceptions.BadRequest(
"Invalid groupby time option. There is no "
"groupby processing for [%s]." % elem)
LOG.debug("Replacing API groupby time command [%s] with "
"internal groupby command [%s].", elem, g)
elem = g
elif number_of_parts > 2:
LOG.warning("The groupby [%s] command is not expected for "
"storage backend [%s]. Therefore, we leave it "
"as is.", elem, self)
groupbys_parsed.append(elem)
return groupbys_parsed

View File

@ -194,6 +194,7 @@ class ElasticsearchStorage(v2_storage.BaseStorage):
begin, end = self._local_to_utc(begin or tzutils.get_month_start(),
end or tzutils.get_next_month())
groupby = self.parse_groupby_syntax_to_groupby_elements(groupby)
total, docs = self._conn.total(begin, end, metric_types, filters,
groupby, custom_fields=custom_fields,
offset=offset, limit=limit,

View File

@ -394,6 +394,7 @@ class InfluxStorage(v2_storage.BaseStorage):
custom_fields="SUM(qty) AS qty, SUM(price) AS rate"):
begin, end = self._check_begin_end(begin, end)
groupby = self.parse_groupby_syntax_to_groupby_elements(groupby)
total = self._conn.get_total(metric_types, begin, end,
custom_fields, groupby, filters)

View File

@ -256,14 +256,16 @@ class PrometheusCollectorTest(tests.TestCase):
def test_format_retrieve(self):
expected_name = 'http_requests_total'
group_by = {'bar': '', 'foo': '', 'project_id': '',
'week_of_the_year': '00', 'day_of_the_year': '1',
'month': '1', 'year': '2015'}
expected_data = [
dataframe.DataPoint(
'instance', '7', '0',
{'bar': '', 'foo': '', 'project_id': ''},
'instance', '7', '0', group_by,
{'code': '200', 'instance': 'localhost:9090'}),
dataframe.DataPoint(
'instance', '42', '0',
{'bar': '', 'foo': '', 'project_id': ''},
'instance', '42', '0', group_by,
{'code': '200', 'instance': 'localhost:9090'}),
]

View File

@ -86,3 +86,32 @@ tests:
response_json_paths:
$.results.`len`: 2
$.total: 2
- name: Get a summary grouped by time-w and project_id
url: /v2/summary
status: 200
query_parameters:
groupby: [time-w, project_id]
response_json_paths:
$.results.`len`: 4
$.total: 4
- name: Get a summary grouped by time-d
url: /v2/summary
status: 200
query_parameters:
groupby: [time-d]
response_json_paths:
$.results.`len`: 2
$.total: 2
- name: Get a summary grouped by time-y
url: /v2/summary
status: 200
query_parameters:
groupby: [time-y]
response_json_paths:
$.results.`len`: 3
$.total: 3

View File

@ -16,8 +16,10 @@ import datetime
from unittest import mock
import testscenarios
from werkzeug import exceptions as http_exceptions
from cloudkitty import storage
from cloudkitty.tests import samples
from cloudkitty.tests.storage.v2 import es_utils
from cloudkitty.tests.storage.v2 import influx_utils
@ -340,5 +342,68 @@ class StorageUnitTest(TestCase):
self.assertEqual(expected_length, retrieved_length)
def test_parse_groupby_syntax_to_groupby_elements_no_time_groupby(self):
groupby = ["something"]
out = self.storage.parse_groupby_syntax_to_groupby_elements(groupby)
self.assertEqual(groupby, out)
def test_parse_groupby_syntax_to_groupby_elements_time_groupby(self):
groupby = ["something", "time"]
out = self.storage.parse_groupby_syntax_to_groupby_elements(groupby)
self.assertEqual(groupby, out)
def test_parse_groupby_syntax_to_groupby_elements_odd_time(self):
groupby = ["something", "time-odd-time-element"]
with mock.patch.object(storage.v2.LOG, 'warning') as log_mock:
out = self.storage.parse_groupby_syntax_to_groupby_elements(
groupby)
log_mock.assert_has_calls([
mock.call("The groupby [%s] command is not expected for "
"storage backend [%s]. Therefore, we leave it as "
"is.", "time-odd-time-element", self.storage)])
self.assertEqual(groupby, out)
def test_parse_groupby_syntax_to_groupby_elements_wrong_time_frame(self):
groupby = ["something", "time-u"]
expected_message = r"400 Bad Request: Invalid groupby time option. " \
r"There is no groupby processing for \[time-u\]."
self.assertRaisesRegex(
http_exceptions.BadRequest, expected_message,
self.storage.parse_groupby_syntax_to_groupby_elements,
groupby)
def test_parse_groupby_syntax_to_groupby_elements_all_time_options(self):
groupby = ["something", "time", "time-d", "time-w", "time-m", "time-y"]
expected_log_calls = []
for k, v in storage.v2.BaseStorage.TIME_COMMANDS_MAP.items():
expected_log_calls.append(
mock.call("Replacing API groupby time command [%s] with "
"internal groupby command [%s].", "time-%s" % k, v))
with mock.patch.object(storage.v2.LOG, 'debug') as log_debug_mock:
out = self.storage.parse_groupby_syntax_to_groupby_elements(
groupby)
log_debug_mock.assert_has_calls(expected_log_calls)
self.assertEqual(["something", "time", "day_of_the_year",
"week_of_the_year", "month", "year"], out)
def test_parse_groupby_syntax_to_groupby_elements_no_groupby(self):
with mock.patch.object(storage.v2.LOG, 'debug') as log_debug_mock:
out = self.storage.parse_groupby_syntax_to_groupby_elements(None)
log_debug_mock.assert_has_calls([
mock.call("No groupby to process syntax.")])
self.assertIsNone(out)
StorageUnitTest.generate_scenarios()

View File

@ -39,9 +39,27 @@ def generate_v2_storage_data(min_length=10,
for project_id in project_ids:
data = [copy.deepcopy(sample)
for i in range(min_length + random.randint(1, 10))]
first_group = data[:round(len(data)/2)]
second_group = data[round(len(data)/2):]
for elem in first_group:
elem['groupby']['year'] = 2022
elem['groupby']['week_of_the_year'] = 1
elem['groupby']['day_of_the_year'] = 1
elem['groupby']['month'] = 10
for elem in second_group:
elem['groupby']['year'] = 2023
elem['groupby']['week_of_the_year'] = 2
elem['groupby']['day_of_the_year'] = 2
elem['groupby']['month'] = 12
data[0]['groupby']['year'] = 2021
for elem in data:
elem['groupby']['id'] = uuidutils.generate_uuid()
elem['groupby']['project_id'] = project_id
datapoints += [dataframe.DataPoint(
elem['vol']['unit'],
elem['vol']['qty'],

View File

@ -0,0 +1,10 @@
---
features:
- |
Introduce new default groupby options: (i) time: to group data hourly.
The actual group by process will depend on the ``period`` parameter. The
default value is ``3600``, which represents one hour; (ii) time-d: to
group data by day of the year; (iii) time-w: to group data by week of
the year; (iv) time-m: to group data by month; and, (v) time-y: to group
data by year. If you have old data in CloudKitty and you wish to use
these group by methods, you will need to reprocess the desired timeframe.