Don't allow duplicate timestamps in carbonara series

If the calling code provides timestamp/value pairs with duplicate
timestamps pandas will raise a ValueError when trying to operate on the
index of the data set. We avoid this problem by removing duplicates in
the timeseries. The assumption being made here is that some single
metric can't have two different measurements at the same time, that's a
violation of physics.

The change is in place for both __init__ and set_values to insure
that the internal representation of the timeseries is consistent no
matter how it is created.

Change-Id: Ib293714e26eca6b5de7cd4a2307569ab3d8a9c43
Closes-Bug: #1475329
This commit is contained in:
Chris Dent 2015-07-21 17:38:05 +00:00
parent 2e96d9c4a0
commit 262e8454f9
2 changed files with 30 additions and 2 deletions

View File

@ -65,9 +65,15 @@ class SerializableMixin(object):
class TimeSerie(SerializableMixin):
"""A representation of series of a timestamp with a value.
Duplicate timestamps are not allowed and will be filtered to use the
last in the group when the TimeSerie is created or extended.
"""
def __init__(self, timestamps=None, values=None):
self.ts = pandas.Series(values, timestamps).sort_index()
self.ts = pandas.Series(values, timestamps).groupby(
level=0).last().sort_index()
def __eq__(self, other):
return (isinstance(other, TimeSerie)
@ -77,7 +83,8 @@ class TimeSerie(SerializableMixin):
return self.ts[key]
def set_values(self, values):
t = pandas.Series(*reversed(list(zip(*values))))
t = pandas.Series(*reversed(list(zip(*values)))).groupby(
level=0).last()
self.ts = t.combine_first(self.ts).sort_index()
def __len__(self):

View File

@ -74,6 +74,27 @@ class TestBoundTimeSerie(base.BaseTestCase):
(datetime.datetime(2014, 1, 1, 12, 0, 10), 4)])
self.assertEqual(2, len(ts))
def test_duplicate_timestamps(self):
ts = carbonara.BoundTimeSerie(
[datetime.datetime(2014, 1, 1, 12, 0, 0),
datetime.datetime(2014, 1, 1, 12, 0, 9),
datetime.datetime(2014, 1, 1, 12, 0, 9)],
[10, 5, 23])
self.assertEqual(2, len(ts))
self.assertEqual(10.0, ts[0])
self.assertEqual(23.0, ts[1])
ts.set_values([(datetime.datetime(2014, 1, 1, 13, 0, 10), 3),
(datetime.datetime(2014, 1, 1, 13, 0, 11), 9),
(datetime.datetime(2014, 1, 1, 13, 0, 11), 8),
(datetime.datetime(2014, 1, 1, 13, 0, 11), 7),
(datetime.datetime(2014, 1, 1, 13, 0, 11), 4)])
self.assertEqual(4, len(ts))
self.assertEqual(10.0, ts[0])
self.assertEqual(23.0, ts[1])
self.assertEqual(3.0, ts[2])
self.assertEqual(4.0, ts[3])
class TestAggregatedTimeSerie(base.BaseTestCase):
@staticmethod