Don't allow duplicate timestamps in carbonara series
If the calling code provides timestamp/value pairs with duplicate timestamps pandas will raise a ValueError when trying to operate on the index of the data set. We avoid this problem by removing duplicates in the timeseries. The assumption being made here is that some single metric can't have two different measurements at the same time, that's a violation of physics. The change is in place for both __init__ and set_values to insure that the internal representation of the timeseries is consistent no matter how it is created. Change-Id: Ib293714e26eca6b5de7cd4a2307569ab3d8a9c43 Closes-Bug: #1475329
This commit is contained in:
parent
2e96d9c4a0
commit
262e8454f9
|
@ -65,9 +65,15 @@ class SerializableMixin(object):
|
|||
|
||||
|
||||
class TimeSerie(SerializableMixin):
|
||||
"""A representation of series of a timestamp with a value.
|
||||
|
||||
Duplicate timestamps are not allowed and will be filtered to use the
|
||||
last in the group when the TimeSerie is created or extended.
|
||||
"""
|
||||
|
||||
def __init__(self, timestamps=None, values=None):
|
||||
self.ts = pandas.Series(values, timestamps).sort_index()
|
||||
self.ts = pandas.Series(values, timestamps).groupby(
|
||||
level=0).last().sort_index()
|
||||
|
||||
def __eq__(self, other):
|
||||
return (isinstance(other, TimeSerie)
|
||||
|
@ -77,7 +83,8 @@ class TimeSerie(SerializableMixin):
|
|||
return self.ts[key]
|
||||
|
||||
def set_values(self, values):
|
||||
t = pandas.Series(*reversed(list(zip(*values))))
|
||||
t = pandas.Series(*reversed(list(zip(*values)))).groupby(
|
||||
level=0).last()
|
||||
self.ts = t.combine_first(self.ts).sort_index()
|
||||
|
||||
def __len__(self):
|
||||
|
|
|
@ -74,6 +74,27 @@ class TestBoundTimeSerie(base.BaseTestCase):
|
|||
(datetime.datetime(2014, 1, 1, 12, 0, 10), 4)])
|
||||
self.assertEqual(2, len(ts))
|
||||
|
||||
def test_duplicate_timestamps(self):
|
||||
ts = carbonara.BoundTimeSerie(
|
||||
[datetime.datetime(2014, 1, 1, 12, 0, 0),
|
||||
datetime.datetime(2014, 1, 1, 12, 0, 9),
|
||||
datetime.datetime(2014, 1, 1, 12, 0, 9)],
|
||||
[10, 5, 23])
|
||||
self.assertEqual(2, len(ts))
|
||||
self.assertEqual(10.0, ts[0])
|
||||
self.assertEqual(23.0, ts[1])
|
||||
|
||||
ts.set_values([(datetime.datetime(2014, 1, 1, 13, 0, 10), 3),
|
||||
(datetime.datetime(2014, 1, 1, 13, 0, 11), 9),
|
||||
(datetime.datetime(2014, 1, 1, 13, 0, 11), 8),
|
||||
(datetime.datetime(2014, 1, 1, 13, 0, 11), 7),
|
||||
(datetime.datetime(2014, 1, 1, 13, 0, 11), 4)])
|
||||
self.assertEqual(4, len(ts))
|
||||
self.assertEqual(10.0, ts[0])
|
||||
self.assertEqual(23.0, ts[1])
|
||||
self.assertEqual(3.0, ts[2])
|
||||
self.assertEqual(4.0, ts[3])
|
||||
|
||||
|
||||
class TestAggregatedTimeSerie(base.BaseTestCase):
|
||||
@staticmethod
|
||||
|
|
Loading…
Reference in New Issue