Properly interpolate missing data for test_runs
This commit modifies the data interpolation of the test_run_aggregator method used by the per test view. Previously the commit just removed rows post resample and d3 would do a linear interpolation between points for missing data points. However, this is disingenous because it gives the illusion that there were results for that period. This commit switches to use pandas' index aware interpolation with a limit of 20 (the same as our sample size for the rolling mean and std dev calculations) consecutive interpolated points. This way we show large gaps in the data properly, but smaller gaps are treated like they were previously. Change-Id: I8998faabe4e7fb523e2a8b5cdddb2b9e16e46e47
This commit is contained in:
parent
2b17af27d1
commit
ceb96144cc
|
@ -12,6 +12,13 @@
|
|||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
resample_matrix = {
|
||||
'day': 'D',
|
||||
'hour': '1H',
|
||||
'min': '1T',
|
||||
'sec': '1S',
|
||||
}
|
||||
|
||||
|
||||
class BaseAggregator(object):
|
||||
def _update_datetime_to_fit_resolution(self,
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
import pandas as pd
|
||||
from subunit2sql import read_subunit
|
||||
|
||||
from base_aggregator import BaseAggregator
|
||||
import base_aggregator as base
|
||||
|
||||
|
||||
def convert_test_runs_list_to_time_series_dict(test_runs_list, resample):
|
||||
|
@ -49,22 +49,16 @@ def convert_test_runs_list_to_time_series_dict(test_runs_list, resample):
|
|||
df['stddev_run_time'] = pd.rolling_std(df['run_time'], 20)
|
||||
|
||||
# Resample numeric data for the run_time graph from successful runs
|
||||
resample_matrix = {
|
||||
'day': 'D',
|
||||
'hour': '1H',
|
||||
'min': '1T',
|
||||
'sec': '1S',
|
||||
}
|
||||
numeric_df = df[df['status'] == 'success'].resample(
|
||||
resample_matrix[resample], how='mean')
|
||||
base.resample_matrix[resample], how='mean')
|
||||
# Drop duplicate or invalid colums
|
||||
del(numeric_df['run_id'])
|
||||
del(df['run_time'])
|
||||
del(df['avg_run_time'])
|
||||
del(df['stddev_run_time'])
|
||||
|
||||
# Drop missing data from the resample
|
||||
numeric_df = numeric_df.dropna(how='all')
|
||||
# Interpolate missing data
|
||||
numeric_df['run_time'] = numeric_df.interpolate(method='time', limit=20)
|
||||
# Add rolling mean and std dev of run_time to datafram
|
||||
numeric_df['avg_run_time'] = pd.rolling_mean(numeric_df['run_time'], 20)
|
||||
numeric_df['stddev_run_time'] = pd.rolling_std(numeric_df['run_time'], 20)
|
||||
|
||||
# Convert the dataframes to a dict
|
||||
numeric_dict = dict(
|
||||
|
@ -131,7 +125,7 @@ class Counter(object):
|
|||
return (self.passes, self.failures, self.skips)
|
||||
|
||||
|
||||
class TestRunAggregator(BaseAggregator):
|
||||
class TestRunAggregator(base.BaseAggregator):
|
||||
def __init__(self, test_runs):
|
||||
self.test_runs = test_runs
|
||||
|
||||
|
|
Loading…
Reference in New Issue