197 lines
6.6 KiB
Python
197 lines
6.6 KiB
Python
#!/usr/bin/env python
|
|
|
|
# Copyright (c) 2016 Hewlett Packard Enterprise Development Company, L.P.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import logging
|
|
import voluptuous
|
|
|
|
import monasca_analytics.banana.typeck.type_util as type_util
|
|
import monasca_analytics.component.params as params
|
|
|
|
import monasca_analytics.ldp.base as bt
|
|
import monasca_analytics.ldp.monasca.helpers as helpers
|
|
import monasca_analytics.util.spark_func as fn
|
|
from monasca_analytics.util import validation_utils as vu
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class MonascaAggregateLDP(bt.BaseLDP):
|
|
"""Monasca aggregator live data processor"""
|
|
|
|
def __init__(self, _id, _config):
|
|
super(MonascaAggregateLDP, self).__init__(_id, _config)
|
|
self._aggregation_period = _config["period"]
|
|
self._reducer_func = MonascaAggregateLDP.select_reducer(_config)
|
|
self._suffix = "_" + _config["func"]
|
|
|
|
@staticmethod
|
|
def validate_config(_config):
|
|
monasca_ag_schema = voluptuous.Schema({
|
|
"module": voluptuous.And(basestring, vu.NoSpaceCharacter()),
|
|
"period": voluptuous.Or(float, int),
|
|
"func": voluptuous.Or(
|
|
"avg",
|
|
"max",
|
|
"sum",
|
|
"min",
|
|
"cnt"
|
|
)
|
|
}, required=True)
|
|
return monasca_ag_schema(_config)
|
|
|
|
@staticmethod
|
|
def get_default_config():
|
|
return {
|
|
"module": MonascaAggregateLDP.__name__,
|
|
"period": 60.0 * 60.0,
|
|
"func": "avg"
|
|
}
|
|
|
|
@staticmethod
|
|
def get_params():
|
|
return [
|
|
params.ParamDescriptor('period', type_util.Number(), 60 * 60),
|
|
params.ParamDescriptor(
|
|
'func',
|
|
type_util.Enum(['avg', 'max', 'sum', 'min', 'cnt']),
|
|
'avg'
|
|
)
|
|
]
|
|
|
|
def map_dstream(self, dstream):
|
|
"""
|
|
Map the given DStream into a new DStream where metrics
|
|
have been aggregated by name.
|
|
|
|
:type dstream: pyspark.streaming.DStream
|
|
:param dstream: DStream
|
|
:return: Returns the stream of aggregated metrics
|
|
"""
|
|
red = self._reducer_func
|
|
suf = self._suffix
|
|
agg_period = self._aggregation_period
|
|
# TODO(Joan): Add a filter to only aggregate some metrics
|
|
# TODO(Joan): or particular dimensions
|
|
return dstream.map(fn.from_json) \
|
|
.window(agg_period, agg_period) \
|
|
.map(lambda metric: (metric["metric"]["name"], metric)) \
|
|
.groupByKey() \
|
|
.flatMapValues(lambda metrics: MonascaAggregateLDP.aggregate(
|
|
metrics,
|
|
red,
|
|
suf
|
|
))\
|
|
.map(lambda metric_and_name: metric_and_name[1])
|
|
|
|
@staticmethod
|
|
def aggregate(all_metrics, reducer, suffix):
|
|
"""
|
|
Aggregate values produced by different providers together.
|
|
The metric name is assumed to be the same for all providers.
|
|
|
|
:type all_metrics: list[dict]
|
|
:param all_metrics: Values to aggregate mapping to a specific
|
|
metric name.
|
|
:type reducer: ((float, float) -> float,
|
|
(float, float, float) -> float)
|
|
:param reducer: Combine the metrics values together
|
|
:type suffix: str
|
|
:param suffix: Suffix to append to the metric name in its combined form
|
|
"""
|
|
# Collect metric separately
|
|
separated_metrics = {} # type: dict[frozenset, list[dict]]
|
|
for el in all_metrics:
|
|
key = frozenset(el["metric"]["dimensions"].items())
|
|
if key not in separated_metrics:
|
|
separated_metrics[key] = [el]
|
|
else:
|
|
separated_metrics[key].append(el)
|
|
|
|
# Collect all dimensions
|
|
dims = {}
|
|
for metric_dims in separated_metrics.keys():
|
|
for prop, val in dict(metric_dims).iteritems():
|
|
if prop in dims:
|
|
dims[prop].add(val)
|
|
else:
|
|
dims[prop] = set(val)
|
|
|
|
# Sort each metric
|
|
for _, metric in separated_metrics.iteritems():
|
|
metric.sort(key=lambda v: v["metric"]["timestamp"])
|
|
|
|
separated_metrics = sorted(separated_metrics.values(), key=len)
|
|
separated_metrics.reverse()
|
|
|
|
# Compute the new values
|
|
new_values = []
|
|
all_timestamps = map(
|
|
lambda l: map(
|
|
lambda x: x["metric"]["timestamp"], l),
|
|
separated_metrics)
|
|
metric_count = len(separated_metrics)
|
|
for index in range(0, len(separated_metrics[0])):
|
|
new_value = reducer[0](
|
|
separated_metrics[0][index]["metric"]["value"],
|
|
metric_count)
|
|
new_timestamp = separated_metrics[0][index]["metric"]["timestamp"]
|
|
for metric_index in range(1, metric_count):
|
|
new_value = reducer[1](new_value, helpers.interpolate(
|
|
new_timestamp,
|
|
separated_metrics[metric_index],
|
|
all_timestamps[metric_index]
|
|
), metric_count)
|
|
new_values.append((new_timestamp, new_value))
|
|
|
|
# Aggregate the other details:
|
|
metric_name = separated_metrics[0][0]["metric"]["name"] + suffix
|
|
meta = separated_metrics[0][0]["meta"]
|
|
new_metrics = [
|
|
helpers.create_agg_metric(
|
|
metric_name,
|
|
meta,
|
|
dims,
|
|
val[0],
|
|
val[1]
|
|
) for val in new_values
|
|
]
|
|
return new_metrics
|
|
|
|
@staticmethod
|
|
def select_reducer(_config):
|
|
return {
|
|
"avg": (
|
|
lambda m, cnt: m / cnt,
|
|
lambda acc, m, cnt: m / cnt + acc,
|
|
),
|
|
"max": (
|
|
lambda m, cnt: m,
|
|
lambda acc, m, cnt: max(m, acc),
|
|
),
|
|
"sum": (
|
|
lambda m, cnt: m,
|
|
lambda acc, m, cnt: m + acc,
|
|
),
|
|
"min": (
|
|
lambda m, cnt: m,
|
|
lambda acc, m, cnt: min(m, acc),
|
|
),
|
|
"cnt": (
|
|
lambda m, cnt: m,
|
|
lambda acc, m, cnt: cnt,
|
|
),
|
|
}[_config["func"]]
|