monasca-analytics/monasca_analytics/source/randoms.py

#!/usr/bin/env python

# Copyright (c) 2016 Hewlett Packard Enterprise Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

import abc
import json
import logging
import numpy as np
import random
import six
from six.moves import socketserver
import threading as th
import time
import uuid
import voluptuous

import monasca_analytics.banana.typeck.type_util as type_util
import monasca_analytics.component.params as params

import monasca_analytics.exception.monanas as err
from monasca_analytics.source import base
from monasca_analytics.util import validation_utils as vu

logger = logging.getLogger(__name__)


class RandomSource(base.BaseSource):
    """A randomly generated data source implementation."""

    def __init__(self, _id, _config):
        super(RandomSource, self).__init__(_id, _config)
        try:
            self._configure_server()
        except IOError:
            raise err.MonanasInitError("Address already in use.")
        except AttributeError:
            raise err.MonanasInitError("Invalid generate or validate method.")

    def _configure_server(self):
        """Creates and configures the Server object

        The server object is configured according to
        the configuration of this source module
        """
        self._server = socketserver.ThreadingTCPServer(
            (self._config["params"]["host"],
             self._config["params"]["port"]),
            MonanasTCPHandler, False)
        self._server.generate = getattr(
            self, "_generate_" +
            self._config["params"]["model"]["name"])
        # self._server.validate = getattr(
        #    source_model, self._config["validate"])
        self._server.allow_reuse_address = True
        self._server.server_bind()
        self._server.server_activate()
        self._server.terminate = False
        self._server.generate_alerts_per_second =\
            self._config["params"]["alerts_per_burst"]
        self._server.generate_idle_time_between_bursts =\
            self._config["params"]["idle_time_between_bursts"]
        self._server_thread = th.Thread(target=self._server.serve_forever)
        self._is_server_running = False

    @staticmethod
    def validate_config(_config):
        source_schema = voluptuous.Schema({
            "module": voluptuous.And(six.string_types[0],
                                     vu.NoSpaceCharacter()),
            "params": {
                "host": voluptuous.And(six.string_types[0],
                                       vu.NoSpaceCharacter()),
                "port": int,
                "model": {
                    "name": voluptuous.And(six.string_types[0],
                                           vu.NoSpaceCharacter()),
                    "params": {
                        "origin_types": voluptuous.And([
                            {
                                "origin_type": voluptuous.And(
                                    six.string_types[0],
                                    vu.NoSpaceCharacter()),
                                "weight": voluptuous.And(
                                    voluptuous.Or(int, float),
                                    voluptuous.Range(
                                        min=0, min_included=False)),
                            }
                        ], vu.NotEmptyArray()),
                        voluptuous.Optional("key_causes"): dict
                    }
                },
                "alerts_per_burst": voluptuous.And(
                    int, voluptuous.Range(min=1)),
                "idle_time_between_bursts": voluptuous.And(
                    voluptuous.Or(int, float),
                    voluptuous.Range(min=0, min_included=False))
            }
        }, required=True)
        return source_schema(_config)

    @staticmethod
    def get_default_config():
        return {
            "module": RandomSource.__name__,
            "params": {
                "host": "localhost",
                "port": 1010,
                "model": {
                    "name": "my_model_name",
                    "params": {
                        "origin_types": [
                            {
                                "origin_type": "my_origin_type",
                                "weight": 1.0
                            }
                        ],
                    }
                },
                "alerts_per_burst": 1,
                "idle_time_between_bursts": 1.0
            }
        }

    @staticmethod
    def get_params():
        return [
            params.ParamDescriptor('host', type_util.String(), 'localhost'),
            params.ParamDescriptor('port', type_util.Number(), 1010),
            params.ParamDescriptor('model', type_util.Object({
                'name': type_util.String(),
                'params': type_util.Object({
                    'origin_types': type_util.Object(strict_checking=False)
                })
            })),
            params.ParamDescriptor('alert_per_burst', type_util.Number(), 1),
            params.ParamDescriptor('idle_time_between_bursts',
                                   type_util.Number(), 1.0),
        ]

    def _start_server(self):
        if not self._is_server_running:
            self._server_thread.start()
            self._is_server_running = True

    def create_dstream(self, ssc):
        """Dstream object creation

        The _dstream object is created before this source is bound
        to the consumers. It uses a socketTextStream, to read data from
        the ThreadingTCPServer.

        :type ssc: pyspark.streaming.StreamingContext
        :param ssc: Spark Streaming Context
        """
        self._start_server()
        self._dstream = ssc.socketTextStream(
            self._config["params"]["host"],
            self._config["params"]["port"])

    def get_feature_list(self):
        raise NotImplementedError("This method needs to be implemented")

    def terminate_source(self):
        """Terminates the source with a delay

        Terminates the source with a delay to allow the messages
        being sent by the handler to clear up.
        """
        self._server.terminate = True
        time.sleep(1)
        self._server.server_close()
        self._server_thread = None

    def _generate_simple_model(self):
        """Generates an alert based on simple_model."""
        current_time = int(round(time.time() * 1000))
        return {
            "created": current_time,
            "id": str(uuid.uuid4()),
            "origin": str(uuid.uuid4()),
            "origin_type": self._random_origin_type(),
            "data": {},
            "state": "",
            "updated": current_time
        }

    def _random_origin_type(self):
        """Randomizes the origin_type"""
        origin_types = self._config[
            "params"]["model"]["params"]["origin_types"]
        return origin_types[self._weighted_choice(
            [o["weight"] for o in origin_types])]["origin_type"]

    def _weighted_choice(self, weights):
        """Gets an index chosen randomly but weighted from a list of weights"""
        totals = []
        running_total = 0

        for w in weights:
            running_total += w
            totals.append(running_total)

        rnd = random.random() * running_total

        for i, total in enumerate(totals):
            if rnd < total:
                return i


@six.add_metaclass(abc.ABCMeta)
class BaseDataSourceGenerator(object):
    """An interface for random data source generators."""

    @abc.abstractmethod
    def __init__(self, _config):
        """BaseDataSourceGenerator constructor.

        :type _config: dict
        :param _config: Configuration of this source
        """
        self._config = _config
        self.generate = getattr(self, "generate_" +
                                self._config["params"]["model"]["name"])

    @abc.abstractmethod
    def is_burst_over(self):
        """Should return true when all the burst alerts have been generated"""
        pass

    def generate_simple_model(self):
        """Generate alert event that are shaped according to the simple model
        """
        current_time = time.time()
        return {
            "created": current_time,
            "id": str(uuid.uuid4()),
            "origin": str(uuid.uuid4()),
            "origin_type": self._pick_next_type(),
            "data": {},
            "state": "",
            "updated": current_time
        }

    @abc.abstractmethod
    def _pick_next_type(self):
        """Should return the next type for the simple model generation"""
        pass


class LinearlyDependentDataSourceGenerator(BaseDataSourceGenerator):
    """A data source generator where alerts are linearly dependent

    :raises: exception -- if the causal matrix is cyclic
    """

    def __init__(self, config):
        BaseDataSourceGenerator.__init__(self, config)

        # Acyclic causality model
        config_key_causes = self._config[
            "params"]["model"]["params"]["key_causes"]

        # Create the causal matrix (/graph)
        self._features_names = config_key_causes.keys()
        n = len(self._features_names)
        self._causal_matrix = np.zeros((n, n), dtype=np.float32)
        for i in range(n):
            for j in range(n):
                row = self._features_names[i]
                col = self._features_names[j]
                if col in config_key_causes[row]:
                    self._causal_matrix[i, j] = 1

        # Triangulate the causal matrix
        tmp_matrix = np.copy(self._causal_matrix)
        n_t = tmp_matrix.shape[0]
        while n_t != 1:
            for i in range(n_t):
                if np.all(tmp_matrix[i, :] == np.zeros(n_t)):
                    tmp_matrix[[i, 0], :] = tmp_matrix[[0, i], :]
                    tmp_matrix[:, [i, 0]] = tmp_matrix[:, [0, i]]
                    k = n - n_t
                    r = i + k
                    self._causal_matrix[
                        [r, k], :] = self._causal_matrix[[k, r], :]
                    self._causal_matrix[
                        :, [r, k]] = self._causal_matrix[:, [k, r]]
                    self._features_names[r], self._features_names[
                        k] = self._features_names[k], self._features_names[r]
                    tmp_matrix = tmp_matrix[1:, 1:]
                    break
                if i == n_t - 1:
                    raise err.MonanasCyclicRandomSourceError
            n_t = tmp_matrix.shape[0]

        # Prepare a zero buffer that store the random values generated
        # following the causal model
        self._features_random_value = np.zeros(len(self._features_names))

        # This stack will contains the generated values for one burst (if that
        # make some sense)
        self._features_stack_emitted = []
        logger.debug(
            "Causality Matrix (RandomSource): {0}".format(
                self._causal_matrix))

    def is_burst_over(self):
        return len(self._features_stack_emitted) == 0

    def _pick_next_type(self):
        while len(self._features_stack_emitted) == 0:
            # Generate more features that follows the dag defined by the causal
            # matrix
            n = len(self._features_names)
            self._features_random_value = np.random.laplace(size=n)
            for i in range(n):
                self._features_random_value[
                    i] += np.dot(self._causal_matrix,
                                 self._features_random_value)[i]

            self._features_random_value = np.floor(self._features_random_value)
            for i in range(n):
                nb = np.abs(int(self._features_random_value[i]))
                if nb > 0:
                    feature = self._features_names[i]
                    self._features_stack_emitted.extend(
                        [feature for _ in range(nb)])
        return self._features_stack_emitted.pop()


class UncorrelatedDataSourceGenerator(BaseDataSourceGenerator):
    """A data source generator where alert item are not correlated.

    Each item has a unique probability to be generated.
    """

    def __init__(self, config):
        BaseDataSourceGenerator.__init__(self, config)
        self.accumulated_alerts = 0
        self._config = config

    def is_burst_over(self):
        is_over = self.accumulated_alerts == self._config[
            "params"]["alerts_per_burst"]
        if is_over:
            self.accumulated_alerts = 0
        return is_over

    def _pick_next_type(self):
        self.accumulated_alerts += 1
        origin_types = self._config[
            "params"]["model"]["params"]["origin_types"]
        origin_type = UncorrelatedDataSourceGenerator._weighted_choice(
            [o["weight"] for o in origin_types])
        return origin_types[origin_type]["origin_type"]

    @staticmethod
    def _weighted_choice(weights):
        """Gets an index chosen randomly but weighted from a list of weights"""
        totals = []
        running_total = 0

        for w in weights:
            running_total += w
            totals.append(running_total)

        rnd = random.random() * running_total

        for i, total in enumerate(totals):
            if rnd < total:
                return i


class MonanasTCPHandler(socketserver.BaseRequestHandler):
    """A TCP server handler for the alert generation."""

    def handle(self):
        """Handles the incoming messages."""
        accumulated_alerts = 0

        while True and not self.server.terminate:
            alert = self.server.generate()

            try:
                validated_alert = self.server.validate(alert)
                self.request.send(json.dumps(validated_alert) + "\n")
                accumulated_alerts += 1
            except voluptuous.Invalid:
                logger.warn("Invalid schema for generated alerts.")

            time.sleep(self.server.generate_idle_time_between_bursts)