404 lines
14 KiB
Python
404 lines
14 KiB
Python
#!/usr/bin/env python
|
|
|
|
# Copyright (c) 2016 Hewlett Packard Enterprise Development Company, L.P.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import abc
|
|
import json
|
|
import logging
|
|
import numpy as np
|
|
import random
|
|
import six
|
|
from six.moves import socketserver
|
|
import threading as th
|
|
import time
|
|
import uuid
|
|
import voluptuous
|
|
|
|
import monasca_analytics.banana.typeck.type_util as type_util
|
|
import monasca_analytics.component.params as params
|
|
|
|
import monasca_analytics.exception.monanas as err
|
|
from monasca_analytics.source import base
|
|
from monasca_analytics.util import validation_utils as vu
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class RandomSource(base.BaseSource):
|
|
"""A randomly generated data source implementation."""
|
|
|
|
def __init__(self, _id, _config):
|
|
super(RandomSource, self).__init__(_id, _config)
|
|
try:
|
|
self._configure_server()
|
|
except IOError:
|
|
raise err.MonanasInitError("Address already in use.")
|
|
except AttributeError:
|
|
raise err.MonanasInitError("Invalid generate or validate method.")
|
|
|
|
def _configure_server(self):
|
|
"""Creates and configures the Server object
|
|
|
|
The server object is configured according to
|
|
the configuration of this source module
|
|
"""
|
|
self._server = socketserver.ThreadingTCPServer(
|
|
(self._config["params"]["host"],
|
|
self._config["params"]["port"]),
|
|
MonanasTCPHandler, False)
|
|
self._server.generate = getattr(
|
|
self, "_generate_" +
|
|
self._config["params"]["model"]["name"])
|
|
# self._server.validate = getattr(
|
|
# source_model, self._config["validate"])
|
|
self._server.allow_reuse_address = True
|
|
self._server.server_bind()
|
|
self._server.server_activate()
|
|
self._server.terminate = False
|
|
self._server.generate_alerts_per_second =\
|
|
self._config["params"]["alerts_per_burst"]
|
|
self._server.generate_idle_time_between_bursts =\
|
|
self._config["params"]["idle_time_between_bursts"]
|
|
self._server_thread = th.Thread(target=self._server.serve_forever)
|
|
self._is_server_running = False
|
|
|
|
@staticmethod
|
|
def validate_config(_config):
|
|
source_schema = voluptuous.Schema({
|
|
"module": voluptuous.And(six.string_types[0],
|
|
vu.NoSpaceCharacter()),
|
|
"params": {
|
|
"host": voluptuous.And(six.string_types[0],
|
|
vu.NoSpaceCharacter()),
|
|
"port": int,
|
|
"model": {
|
|
"name": voluptuous.And(six.string_types[0],
|
|
vu.NoSpaceCharacter()),
|
|
"params": {
|
|
"origin_types": voluptuous.And([
|
|
{
|
|
"origin_type": voluptuous.And(
|
|
six.string_types[0],
|
|
vu.NoSpaceCharacter()),
|
|
"weight": voluptuous.And(
|
|
voluptuous.Or(int, float),
|
|
voluptuous.Range(
|
|
min=0, min_included=False)),
|
|
}
|
|
], vu.NotEmptyArray()),
|
|
voluptuous.Optional("key_causes"): dict
|
|
}
|
|
},
|
|
"alerts_per_burst": voluptuous.And(
|
|
int, voluptuous.Range(min=1)),
|
|
"idle_time_between_bursts": voluptuous.And(
|
|
voluptuous.Or(int, float),
|
|
voluptuous.Range(min=0, min_included=False))
|
|
}
|
|
}, required=True)
|
|
return source_schema(_config)
|
|
|
|
@staticmethod
|
|
def get_default_config():
|
|
return {
|
|
"module": RandomSource.__name__,
|
|
"params": {
|
|
"host": "localhost",
|
|
"port": 1010,
|
|
"model": {
|
|
"name": "my_model_name",
|
|
"params": {
|
|
"origin_types": [
|
|
{
|
|
"origin_type": "my_origin_type",
|
|
"weight": 1.0
|
|
}
|
|
],
|
|
}
|
|
},
|
|
"alerts_per_burst": 1,
|
|
"idle_time_between_bursts": 1.0
|
|
}
|
|
}
|
|
|
|
@staticmethod
|
|
def get_params():
|
|
return [
|
|
params.ParamDescriptor('host', type_util.String(), 'localhost'),
|
|
params.ParamDescriptor('port', type_util.Number(), 1010),
|
|
params.ParamDescriptor('model', type_util.Object({
|
|
'name': type_util.String(),
|
|
'params': type_util.Object({
|
|
'origin_types': type_util.Object(strict_checking=False)
|
|
})
|
|
})),
|
|
params.ParamDescriptor('alert_per_burst', type_util.Number(), 1),
|
|
params.ParamDescriptor('idle_time_between_bursts',
|
|
type_util.Number(), 1.0),
|
|
]
|
|
|
|
def _start_server(self):
|
|
if not self._is_server_running:
|
|
self._server_thread.start()
|
|
self._is_server_running = True
|
|
|
|
def create_dstream(self, ssc):
|
|
"""Dstream object creation
|
|
|
|
The _dstream object is created before this source is bound
|
|
to the consumers. It uses a socketTextStream, to read data from
|
|
the ThreadingTCPServer.
|
|
|
|
:type ssc: pyspark.streaming.StreamingContext
|
|
:param ssc: Spark Streaming Context
|
|
"""
|
|
self._start_server()
|
|
self._dstream = ssc.socketTextStream(
|
|
self._config["params"]["host"],
|
|
self._config["params"]["port"])
|
|
|
|
def get_feature_list(self):
|
|
raise NotImplementedError("This method needs to be implemented")
|
|
|
|
def terminate_source(self):
|
|
"""Terminates the source with a delay
|
|
|
|
Terminates the source with a delay to allow the messages
|
|
being sent by the handler to clear up.
|
|
"""
|
|
self._server.terminate = True
|
|
time.sleep(1)
|
|
self._server.server_close()
|
|
self._server_thread = None
|
|
|
|
def _generate_simple_model(self):
|
|
"""Generates an alert based on simple_model."""
|
|
current_time = int(round(time.time() * 1000))
|
|
return {
|
|
"created": current_time,
|
|
"id": str(uuid.uuid4()),
|
|
"origin": str(uuid.uuid4()),
|
|
"origin_type": self._random_origin_type(),
|
|
"data": {},
|
|
"state": "",
|
|
"updated": current_time
|
|
}
|
|
|
|
def _random_origin_type(self):
|
|
"""Randomizes the origin_type"""
|
|
origin_types = self._config[
|
|
"params"]["model"]["params"]["origin_types"]
|
|
return origin_types[self._weighted_choice(
|
|
[o["weight"] for o in origin_types])]["origin_type"]
|
|
|
|
def _weighted_choice(self, weights):
|
|
"""Gets an index chosen randomly but weighted from a list of weights"""
|
|
totals = []
|
|
running_total = 0
|
|
|
|
for w in weights:
|
|
running_total += w
|
|
totals.append(running_total)
|
|
|
|
rnd = random.random() * running_total
|
|
|
|
for i, total in enumerate(totals):
|
|
if rnd < total:
|
|
return i
|
|
|
|
|
|
@six.add_metaclass(abc.ABCMeta)
|
|
class BaseDataSourceGenerator(object):
|
|
"""An interface for random data source generators."""
|
|
|
|
@abc.abstractmethod
|
|
def __init__(self, _config):
|
|
"""BaseDataSourceGenerator constructor.
|
|
|
|
:type _config: dict
|
|
:param _config: Configuration of this source
|
|
"""
|
|
self._config = _config
|
|
self.generate = getattr(self, "generate_" +
|
|
self._config["params"]["model"]["name"])
|
|
|
|
@abc.abstractmethod
|
|
def is_burst_over(self):
|
|
"""Should return true when all the burst alerts have been generated"""
|
|
pass
|
|
|
|
def generate_simple_model(self):
|
|
"""Generate alert event that are shaped according to the simple model
|
|
"""
|
|
current_time = time.time()
|
|
return {
|
|
"created": current_time,
|
|
"id": str(uuid.uuid4()),
|
|
"origin": str(uuid.uuid4()),
|
|
"origin_type": self._pick_next_type(),
|
|
"data": {},
|
|
"state": "",
|
|
"updated": current_time
|
|
}
|
|
|
|
@abc.abstractmethod
|
|
def _pick_next_type(self):
|
|
"""Should return the next type for the simple model generation"""
|
|
pass
|
|
|
|
|
|
class LinearlyDependentDataSourceGenerator(BaseDataSourceGenerator):
|
|
"""A data source generator where alerts are linearly dependent
|
|
|
|
:raises: exception -- if the causal matrix is cyclic
|
|
"""
|
|
|
|
def __init__(self, config):
|
|
BaseDataSourceGenerator.__init__(self, config)
|
|
|
|
# Acyclic causality model
|
|
config_key_causes = self._config[
|
|
"params"]["model"]["params"]["key_causes"]
|
|
|
|
# Create the causal matrix (/graph)
|
|
self._features_names = config_key_causes.keys()
|
|
n = len(self._features_names)
|
|
self._causal_matrix = np.zeros((n, n), dtype=np.float32)
|
|
for i in range(n):
|
|
for j in range(n):
|
|
row = self._features_names[i]
|
|
col = self._features_names[j]
|
|
if col in config_key_causes[row]:
|
|
self._causal_matrix[i, j] = 1
|
|
|
|
# Triangulate the causal matrix
|
|
tmp_matrix = np.copy(self._causal_matrix)
|
|
n_t = tmp_matrix.shape[0]
|
|
while n_t != 1:
|
|
for i in range(n_t):
|
|
if np.all(tmp_matrix[i, :] == np.zeros(n_t)):
|
|
tmp_matrix[[i, 0], :] = tmp_matrix[[0, i], :]
|
|
tmp_matrix[:, [i, 0]] = tmp_matrix[:, [0, i]]
|
|
k = n - n_t
|
|
r = i + k
|
|
self._causal_matrix[
|
|
[r, k], :] = self._causal_matrix[[k, r], :]
|
|
self._causal_matrix[
|
|
:, [r, k]] = self._causal_matrix[:, [k, r]]
|
|
self._features_names[r], self._features_names[
|
|
k] = self._features_names[k], self._features_names[r]
|
|
tmp_matrix = tmp_matrix[1:, 1:]
|
|
break
|
|
if i == n_t - 1:
|
|
raise err.MonanasCyclicRandomSourceError
|
|
n_t = tmp_matrix.shape[0]
|
|
|
|
# Prepare a zero buffer that store the random values generated
|
|
# following the causal model
|
|
self._features_random_value = np.zeros(len(self._features_names))
|
|
|
|
# This stack will contains the generated values for one burst (if that
|
|
# make some sense)
|
|
self._features_stack_emitted = []
|
|
logger.debug(
|
|
"Causality Matrix (RandomSource): {0}".format(
|
|
self._causal_matrix))
|
|
|
|
def is_burst_over(self):
|
|
return len(self._features_stack_emitted) == 0
|
|
|
|
def _pick_next_type(self):
|
|
while len(self._features_stack_emitted) == 0:
|
|
# Generate more features that follows the dag defined by the causal
|
|
# matrix
|
|
n = len(self._features_names)
|
|
self._features_random_value = np.random.laplace(size=n)
|
|
for i in range(n):
|
|
self._features_random_value[
|
|
i] += np.dot(self._causal_matrix,
|
|
self._features_random_value)[i]
|
|
|
|
self._features_random_value = np.floor(self._features_random_value)
|
|
for i in range(n):
|
|
nb = np.abs(int(self._features_random_value[i]))
|
|
if nb > 0:
|
|
feature = self._features_names[i]
|
|
self._features_stack_emitted.extend(
|
|
[feature for _ in range(nb)])
|
|
return self._features_stack_emitted.pop()
|
|
|
|
|
|
class UncorrelatedDataSourceGenerator(BaseDataSourceGenerator):
|
|
"""A data source generator where alert item are not correlated.
|
|
|
|
Each item has a unique probability to be generated.
|
|
"""
|
|
|
|
def __init__(self, config):
|
|
BaseDataSourceGenerator.__init__(self, config)
|
|
self.accumulated_alerts = 0
|
|
self._config = config
|
|
|
|
def is_burst_over(self):
|
|
is_over = self.accumulated_alerts == self._config[
|
|
"params"]["alerts_per_burst"]
|
|
if is_over:
|
|
self.accumulated_alerts = 0
|
|
return is_over
|
|
|
|
def _pick_next_type(self):
|
|
self.accumulated_alerts += 1
|
|
origin_types = self._config[
|
|
"params"]["model"]["params"]["origin_types"]
|
|
origin_type = UncorrelatedDataSourceGenerator._weighted_choice(
|
|
[o["weight"] for o in origin_types])
|
|
return origin_types[origin_type]["origin_type"]
|
|
|
|
@staticmethod
|
|
def _weighted_choice(weights):
|
|
"""Gets an index chosen randomly but weighted from a list of weights"""
|
|
totals = []
|
|
running_total = 0
|
|
|
|
for w in weights:
|
|
running_total += w
|
|
totals.append(running_total)
|
|
|
|
rnd = random.random() * running_total
|
|
|
|
for i, total in enumerate(totals):
|
|
if rnd < total:
|
|
return i
|
|
|
|
|
|
class MonanasTCPHandler(socketserver.BaseRequestHandler):
|
|
"""A TCP server handler for the alert generation."""
|
|
|
|
def handle(self):
|
|
"""Handles the incoming messages."""
|
|
accumulated_alerts = 0
|
|
|
|
while True and not self.server.terminate:
|
|
alert = self.server.generate()
|
|
|
|
try:
|
|
validated_alert = self.server.validate(alert)
|
|
self.request.send(json.dumps(validated_alert) + "\n")
|
|
accumulated_alerts += 1
|
|
except voluptuous.Invalid:
|
|
logger.warn("Invalid schema for generated alerts.")
|
|
|
|
time.sleep(self.server.generate_idle_time_between_bursts)
|