From ceb96144cc72674c8429693c11cf9399955811fc Mon Sep 17 00:00:00 2001 From: Matthew Treinish Date: Fri, 11 Dec 2015 15:35:35 -0500 Subject: [PATCH] Properly interpolate missing data for test_runs This commit modifies the data interpolation of the test_run_aggregator method used by the per test view. Previously the commit just removed rows post resample and d3 would do a linear interpolation between points for missing data points. However, this is disingenous because it gives the illusion that there were results for that period. This commit switches to use pandas' index aware interpolation with a limit of 20 (the same as our sample size for the rolling mean and std dev calculations) consecutive interpolated points. This way we show large gaps in the data properly, but smaller gaps are treated like they were previously. Change-Id: I8998faabe4e7fb523e2a8b5cdddb2b9e16e46e47 --- openstack_health/base_aggregator.py | 7 +++++++ openstack_health/test_run_aggregator.py | 22 ++++++++-------------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/openstack_health/base_aggregator.py b/openstack_health/base_aggregator.py index 452efdc6..85c59a8d 100644 --- a/openstack_health/base_aggregator.py +++ b/openstack_health/base_aggregator.py @@ -12,6 +12,13 @@ # License for the specific language governing permissions and limitations # under the License. +resample_matrix = { + 'day': 'D', + 'hour': '1H', + 'min': '1T', + 'sec': '1S', +} + class BaseAggregator(object): def _update_datetime_to_fit_resolution(self, diff --git a/openstack_health/test_run_aggregator.py b/openstack_health/test_run_aggregator.py index adc9a935..fb945ca9 100644 --- a/openstack_health/test_run_aggregator.py +++ b/openstack_health/test_run_aggregator.py @@ -15,7 +15,7 @@ import pandas as pd from subunit2sql import read_subunit -from base_aggregator import BaseAggregator +import base_aggregator as base def convert_test_runs_list_to_time_series_dict(test_runs_list, resample): @@ -49,22 +49,16 @@ def convert_test_runs_list_to_time_series_dict(test_runs_list, resample): df['stddev_run_time'] = pd.rolling_std(df['run_time'], 20) # Resample numeric data for the run_time graph from successful runs - resample_matrix = { - 'day': 'D', - 'hour': '1H', - 'min': '1T', - 'sec': '1S', - } numeric_df = df[df['status'] == 'success'].resample( - resample_matrix[resample], how='mean') + base.resample_matrix[resample], how='mean') # Drop duplicate or invalid colums del(numeric_df['run_id']) del(df['run_time']) - del(df['avg_run_time']) - del(df['stddev_run_time']) - - # Drop missing data from the resample - numeric_df = numeric_df.dropna(how='all') + # Interpolate missing data + numeric_df['run_time'] = numeric_df.interpolate(method='time', limit=20) + # Add rolling mean and std dev of run_time to datafram + numeric_df['avg_run_time'] = pd.rolling_mean(numeric_df['run_time'], 20) + numeric_df['stddev_run_time'] = pd.rolling_std(numeric_df['run_time'], 20) # Convert the dataframes to a dict numeric_dict = dict( @@ -131,7 +125,7 @@ class Counter(object): return (self.passes, self.failures, self.skips) -class TestRunAggregator(BaseAggregator): +class TestRunAggregator(base.BaseAggregator): def __init__(self, test_runs): self.test_runs = test_runs