Support grouping by timestamp in GET /v2/summary

Work items: * Updated Elasticsearch and InfluxDB v2 storage backends in order to support grouping on time. * Added a "dt_from_ts()" function to "cloudkitty.tzutils", along with unit tests. This function converts an epoch timestamp to a timezone-aware datetime object. * Added unit tests for GET /v2/summary to validate grouping by time works as expected. * Updated the API documentation. Change-Id: Ia01fced0bdb3a9b389a89d56f02029b9456781c2 Story: 2006730 Task: 37153
2019-10-16 11:58:22 +02:00 · 2019-10-16 11:58:22 +02:00 · 0d8a636755
parent 33edfb483d
commit 0d8a636755
10 changed files with 145 additions and 15 deletions
--- a/cloudkitty/storage/v2/elasticsearch/init.py
+++ b/cloudkitty/storage/v2/elasticsearch/init.py
@ -184,6 +184,9 @@ class ElasticsearchStorage(v2_storage.BaseStorage):
        # Means we had a composite aggregation
        if 'key' in doc.keys():
            for key, value in doc['key'].items():
                if key == 'begin' or key == 'end':
                    # Elasticsearch returns ts in milliseconds
                    value = tzutils.dt_from_ts(value // 1000)
                output[key] = value
        return output
--- a/cloudkitty/storage/v2/elasticsearch/client.py
+++ b/cloudkitty/storage/v2/elasticsearch/client.py
@ -92,14 +92,18 @@ class ElasticsearchClient(object):
                           {'term': {'metadata.' + k: v}}]
        return should
-    @staticmethod
+    def _build_composite(self, groupby):
    def _build_composite(groupby):
        if not groupby:
            return []
        sources = []
        for elem in groupby:
            if elem == 'type':
                sources.append({'type': {'terms': {'field': 'type'}}})
            elif elem == 'time':
                # Not doing a date_histogram aggregation because we don't know
                # the period
                sources.append({'begin': {'terms': {'field': 'start'}}})
                sources.append({'end': {'terms': {'field': 'end'}}})
            else:
                sources.append({elem: {'terms': {'field': 'groupby.' + elem}}})
--- a/cloudkitty/storage/v2/influx.py
+++ b/cloudkitty/storage/v2/influx.py
@ -65,19 +65,27 @@ CONF.register_opts(influx_storage_opts, INFLUX_STORAGE_GROUP)
 PERIOD_FIELD_NAME = '__ck_collect_period'
 def _sanitized_groupby(groupby):
    forbidden = ('time',)
    return [g for g in groupby if g not in forbidden] if groupby else []
 class InfluxClient(object):
    """Classe used to ease interaction with InfluxDB"""
-    def __init__(self, chunk_size=500, autocommit=True):
+    def __init__(self, chunk_size=500, autocommit=True, default_period=3600):
        """Creates an InfluxClient object.
        :param chunk_size: Size after which points should be pushed.
        :param autocommit: Set to false to disable autocommit
        :param default_period: Placeholder for the period in cae it can't
                               be determined.
        """
        self._conn = self._get_influx_client()
        self._chunk_size = chunk_size
        self._autocommit = autocommit
        self._retention_policy = CONF.storage_influxdb.retention_policy
        self._default_period = default_period
        self._points = []
    @staticmethod
@ -185,7 +193,13 @@ class InfluxClient(object):
        query += self._get_type_query(types)
        if groupby:
-            groupby_query = '"' + '","'.join(groupby) + '"'
+            groupby_query = ''
            if 'time' in groupby:
                groupby_query += 'time(' + str(self._default_period) + 's)'
                groupby_query += ',' if groupby else ''
            if groupby:
                groupby_query += '"' + '","'.join(
                    _sanitized_groupby(groupby)) + '"'
            query += ' GROUP BY ' + groupby_query
        query += ';'
@ -243,7 +257,7 @@ class InfluxStorage(v2_storage.BaseStorage):
    def __init__(self, *args, **kwargs):
        super(InfluxStorage, self).__init__(*args, **kwargs)
        self._default_period = kwargs.get('period') or CONF.collect.period
-        self._conn = InfluxClient()
+        self._conn = InfluxClient(default_period=self._default_period)
    def init(self):
        policy = CONF.storage_influxdb.retention_policy
@ -326,8 +340,11 @@ class InfluxStorage(v2_storage.BaseStorage):
    def delete(self, begin=None, end=None, filters=None):
        self._conn.delete(begin, end, filters)
-    @staticmethod
+    def _get_total_elem(self, begin, end, groupby, series_groupby, point):
-    def _get_total_elem(begin, end, groupby, series_groupby, point):
+        if groupby and 'time' in groupby:
            begin = tzutils.dt_from_iso(point['time'])
            period = point.get(PERIOD_FIELD_NAME) or self._default_period
            end = tzutils.add_delta(begin, datetime.timedelta(seconds=period))
        output = {
            'begin': begin,
            'end': end,
@ -335,7 +352,7 @@ class InfluxStorage(v2_storage.BaseStorage):
            'rate': point['price'],
        }
        if groupby:
-            for group in groupby:
+            for group in _sanitized_groupby(groupby):
                output[group] = series_groupby.get(group, '')
        return output
@ -353,12 +370,18 @@ class InfluxStorage(v2_storage.BaseStorage):
        output = []
        for (series_name, series_groupby), points in total.items():
            for point in points:
-                output.append(self._get_total_elem(
+                # NOTE(peschk_l): InfluxDB returns all timestamps for a given
-                    begin, end,
+                # period and interval, even those with no data. This filters
-                    groupby,
+                # out periods with no data
-                    series_groupby,
+                if point['qty'] is not None and point['price'] is not None:
-                    point))
+                    output.append(self._get_total_elem(
                        tzutils.utc_to_local(begin),
                        tzutils.utc_to_local(end),
                        groupby,
                        series_groupby,
                        point))
        groupby = _sanitized_groupby(groupby)
        if groupby:
            output.sort(key=lambda x: [x[group] for group in groupby])
        return {
--- a/cloudkitty/tests/gabbi/gabbits/v2-summary.yaml
+++ b/cloudkitty/tests/gabbi/gabbits/v2-summary.yaml
@ -68,3 +68,21 @@ tests:
    response_json_paths:
      $.results.`len`: 0
      $.total: 0
  - name: Get a summary grouped by time
    url: /v2/summary
    status: 200
    query_parameters:
      groupby: [time]
    response_json_paths:
      $.results.`len`: 1
      $.total: 1
  - name: Get a summary grouped by time and project_id
    url: /v2/summary
    status: 200
    query_parameters:
      groupby: [time, project_id]
    response_json_paths:
      $.results.`len`: 2
      $.total: 2
--- a/cloudkitty/tests/storage/v2/influx_utils.py
+++ b/cloudkitty/tests/storage/v2/influx_utils.py
@ -17,6 +17,7 @@ import functools
 from influxdb import resultset
 from cloudkitty.storage.v2.influx import _sanitized_groupby
 from cloudkitty.storage.v2.influx import InfluxClient
@ -63,6 +64,12 @@ class FakeInfluxClient(InfluxClient):
                break
            valid = True
            for tag in serie['tags'].keys():
                if tag == 'time':
                    if point['time'].isoformat() != serie['values'][0][0]:
                        valid = False
                        break
                    else:
                        continue
                if tag not in point['tags'].keys() or \
                   point['tags'][tag] != serie['tags'][tag]:
                    valid = False
@ -74,10 +81,11 @@ class FakeInfluxClient(InfluxClient):
        if target_serie is None:
            target_serie = copy.deepcopy(self.total_series_sample)
            if groupby:
-                target_serie['tags'] = {k: point['tags'][k] for k in groupby}
+                target_serie['tags'] = {k: point['tags'][k] for k in
                                        _sanitized_groupby(groupby)}
            else:
                target_serie['tags'] = {}
-            target_serie['values'] = [['1970-01-01T00:00:00Z', 0, 0]]
+            target_serie['values'] = [[point['time'].isoformat(), 0, 0]]
            series.append(target_serie)
        return target_serie
--- a/cloudkitty/tests/test_tzutils.py
+++ b/cloudkitty/tests/test_tzutils.py
@ -16,6 +16,7 @@ import datetime
 import unittest
 from dateutil import tz
 import mock
 from oslo_utils import timeutils
 from cloudkitty import tzutils
@ -132,3 +133,15 @@ class TestTZUtils(unittest.TestCase):
        two = datetime.datetime(2019, 3, 31, 3,
                                tzinfo=tz.gettz('Europe/Paris'))
        self.assertEqual(tzutils.diff_seconds(two, one), 3600)
    def test_cloudkitty_dt_from_ts_as_utc(self):
        ts = 1569902400
        dt = datetime.datetime(2019, 10, 1, 4, tzinfo=tz.UTC)
        self.assertEqual(dt, tzutils.dt_from_ts(ts, as_utc=True))
    def test_cloudkitty_dt_from_ts_local_tz(self):
        ts = 1569902400
        timezone = tz.gettz('Europe/Paris')
        dt = datetime.datetime(2019, 10, 1, 6, tzinfo=timezone)
        with mock.patch.object(tzutils, '_LOCAL_TZ', new=timezone):
            self.assertEqual(dt, tzutils.dt_from_ts(ts))
--- a/cloudkitty/tzutils.py
+++ b/cloudkitty/tzutils.py
@ -84,6 +84,14 @@ def dt_from_iso(time_str, as_utc=False):
        tz.UTC if as_utc else _LOCAL_TZ).replace(microsecond=0)
 def dt_from_ts(ts, as_utc=False):
    """Parses a timezone-aware datetime object from an epoch timestamp.
    Returns the object as being from the local timezone.
    """
    return datetime.datetime.fromtimestamp(ts, tz.UTC if as_utc else _LOCAL_TZ)
 def add_delta(dt, delta):
    """Adds a timedelta to a datetime object.
--- a/doc/source/api-reference/v2/api_samples/summary/summary_get_groupby_time.json
+++ b/doc/source/api-reference/v2/api_samples/summary/summary_get_groupby_time.json
@ -0,0 +1,33 @@
 {
  "total": 232,
  "columns": [
    "begin",
    "end",
    "qty",
    "rate",
    "project_id"
  ],
  "results": [
    [
      "2019-10-01T06:00:00+02:00",
      "2019-10-01T07:00:00+02:00",
      3.5533905029296875,
      1.7766952514648438,
      "84631866b2d84db49b29828052bdc287"
    ],
    [
      "2019-10-01T07:00:00+02:00",
      "2019-10-01T08:00:00+02:00",
      3.5533905029296875,
      1.7766952514648438,
      "84631866b2d84db49b29828052bdc287"
    ],
    [
      "2019-10-01T08:00:00+02:00",
      "2019-10-01T09:00:00+02:00",
      3.5533905029296875,
      1.7766952514648438,
      "84631866b2d84db49b29828052bdc287"
    ]
  ]
 }
--- a/doc/source/api-reference/v2/summary/summary.inc
+++ b/doc/source/api-reference/v2/summary/summary.inc
@ -65,6 +65,11 @@ the columns for each element of ``results``. The columns are the four mandatory
 (``begin``, ``end``, ``qty``, ``rate``) along with each attribute the result is
 grouped by.
 .. note:: It is also possible to group data by time, in order to obtain timeseries.
          In order to do this, group by ``time``. No extra column will be added,
          but you'll get one entry per collect period in the queried timeframe.
          See examples below.
 .. rest_parameters:: summary/summary_parameters.yml
   - begin: begin_resp
@ -75,6 +80,15 @@ grouped by.
 Response Example
 ----------------
 Grouping by time and project_id:
 .. code-block:: shell
   curl "http://cloudkitty-api:8889/v2/summary?groupby=time&groupby=project_id&limit=3"
 .. literalinclude:: ./api_samples/summary/summary_get_groupby_time.json
   :language: javascript
 .. code-block:: shell
   curl "http://cloudkitty-api:8889/v2/summary?filters=project_id%3Afe9c35372db6420089883805b37a34af&groupby=type&groupby=project_id"
--- a/releasenotes/notes/support-groupby-time-v2-summary-48ff5ad671f8c7c5.yaml
+++ b/releasenotes/notes/support-groupby-time-v2-summary-48ff5ad671f8c7c5.yaml
@ -0,0 +1,6 @@
 ---
 upgrade:
  - |
    It is now possible to group v2 summaries by timestamp. In order to do this,
    the ``time`` parameter must be specified in the ``groupby`` list:
    ``cloudkitty summary get -g time,type``.