terracotta/terracotta/locals/manager.py

# Copyright 2012 Anton Beloglazov
# Copyright 2015 Huawei Technologies Co. Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

""" The main local manager module.

The local manager component is deployed on every compute host and is
invoked periodically to determine when it necessary to reallocate VM
instances from the host. First of all, it reads from the local storage
the historical data on the resource usage by VMs stored by the data
collector. Then, the local manager invokes the specified in the
configuration underload detection algorithm to determine whether the
host is underloaded. If the host is underloaded, the local manager
sends a request to the global manager's REST API to migrate all the
VMs from the host and switch the host to the sleep mode.

If the host is not underloaded, the local manager proceeds to invoking
the specified in the configuration overload detection algorithm. If
the host is overloaded, the local manager invokes the configured VM
selection algorithm to select the VMs to migrate from the host. Once
the VMs to migrate from the host are selected, the local manager sends
a request to the global manager's REST API to migrate the selected VMs
from the host.

Similarly to the global manager, the local manager can be configured
to use specific underload detection, overload detection, and VM
selection algorithm using the configuration file discussed further in
the paper.

Underload detection is done by a specified in the configuration
underload detection algorithm (algorithm_underload_detection). The
algorithm has a pre-defined interface, which allows substituting
different implementations of the algorithm. The configured algorithm
is invoked by the local manager and accepts historical data on the
resource usage by VMs running on the host as an input. An underload
detection algorithm returns a decision of whether the host is
underloaded.

Overload detection is done by a specified in the configuration
overload detection algorithm (algorithm_overload_detection). Similarly
to underload detection, all overload detection algorithms implement a
pre-defined interface to enable configuration-driven substitution of
difference implementations. The configured algorithm is invoked by the
local manager and accepts historical data on the resource usage by VMs
running on the host as an input. An overload detection algorithm
returns a decision of whether the host is overloaded.

If a host is overloaded, it is necessary to select VMs to migrate from
the host to avoid performance degradation. This is done by a specified
in the configuration VM selection algorithm (algorithm_vm_selection).
Similarly to underload and overload detection algorithms, different VM
selection algorithm can by plugged in according to the configuration.
A VM selection algorithm accepts historical data on the resource usage
by VMs running on the host and returns a set of VMs to migrate from
the host.

The local manager will be implemented as a Linux daemon running in the
background and every local_manager_interval seconds checking whether
some VMs should be migrated from the host. Every time interval, the
local manager performs the following steps:

1. Read the data on resource usage by the VMs running on the host from
   the <local_data_directory>/vm directory.

2. Call the function specified in the algorithm_underload_detection
   configuration option and pass the data on the resource usage by the
   VMs, as well as the frequency of the CPU as arguments.

3. If the host is underloaded, send a request to the REST API of the
   global manager and pass a list of the UUIDs of all the VMs
   currently running on the host in the vm_uuids parameter, as well as
   the reason for migration as being 0.

4. If the host is not underloaded, call the function specified in the
   algorithm_overload_detection configuration option and pass the data
   on the resource usage by the VMs, as well as the frequency of the
   host's CPU as arguments.

5. If the host is overloaded, call the function specified in the
   algorithm_vm_selection configuration option and pass the data on
   the resource usage by the VMs, as well as the frequency of the
   host's CPU as arguments

6. If the host is overloaded, send a request to the REST API of the
   global manager and pass a list of the UUIDs of the VMs selected by
   the VM selection algorithm in the vm_uuids parameter, as well as
   the reason for migration as being 1.

7. Schedule the next execution after local_manager_interval seconds.
"""

from hashlib import sha1
import libvirt
import os
import time

from oslo_config import cfg
from oslo_log import log as logging

from terracotta import common
from terracotta.openstack.common import periodic_task
from terracotta.openstack.common import threadgroup
from terracotta.utils import db_utils


CONF = cfg.CONF
LOG = logging.getLogger(__name__)


class LocalManager(periodic_task.PeriodicTasks):
    def __init__(self):
        super(LocalManager, self).__init__()
        self.state = self.init_state()
        self.tg = threadgroup.ThreadGroup()
        self.tg.add_dynamic_timer(
            self.run_periodic_tasks,
            initial_delay=None,
            periodic_interval_max=1,
            context=None
        )

    def init_state(self):
        """ Initialize a dict for storing the state of the local manager.

        :param config: A config dictionary.
         :type config: dict(str: *)

        :return: A dictionary containing the initial state of the local manager.
         :rtype: dict
        """
        vir_connection = libvirt.openReadOnly(None)
        if vir_connection is None:
            message = 'Failed to open a connection to the hypervisor'
            LOG.critical(message)
            raise OSError(message)

        physical_cpu_mhz_total = int(
            common.physical_cpu_mhz_total(vir_connection) *
            CONF.host_cpu_usable_by_vms)
        return {'previous_time': 0.,
                'vir_connection': vir_connection,
                'db': db_utils.init_db(),
                'physical_cpu_mhz_total': physical_cpu_mhz_total,
                'hostname': vir_connection.getHostname(),
                'hashed_username': sha1(CONF.os_admin_user).hexdigest(),
                'hashed_password': sha1(CONF.os_admin_password).hexdigest()}

    @periodic_task.periodic_task
    def execute(self):
        """ Execute an iteration of the local manager.

        1. Read the data on resource usage by the VMs running on the host from
           the <local_data_directory>/vm directory.

        2. Call the function specified in the algorithm_underload_detection
           configuration option and pass the data on the resource usage by the
           VMs, as well as the frequency of the CPU as arguments.

        3. If the host is underloaded, send a request to the REST API of the
           global manager and pass a list of the UUIDs of all the VMs
           currently running on the host in the vm_uuids parameter, as well as
           the reason for migration as being 0.

        4. If the host is not underloaded, call the function specified in the
           algorithm_overload_detection configuration option and pass the data
           on the resource usage by the VMs, as well as the frequency of the
           host's CPU as arguments.

        5. If the host is overloaded, call the function specified in the
           algorithm_vm_selection configuration option and pass the data on
           the resource usage by the VMs, as well as the frequency of the
           host's CPU as arguments

        6. If the host is overloaded, send a request to the REST API of the
           global manager and pass a list of the UUIDs of the VMs selected by
           the VM selection algorithm in the vm_uuids parameter, as well as
           the reason for migration as being 1.

        """
        LOG.info('Started an iteration')
        state = self.state

        vm_path = common.build_local_vm_path(CONF.local_data_directory)
        vm_cpu_mhz = self.get_local_vm_data(vm_path)
        vm_ram = self.get_ram(state['vir_connection'], vm_cpu_mhz.keys())
        vm_cpu_mhz = self.cleanup_vm_data(vm_cpu_mhz, vm_ram.keys())

        if not vm_cpu_mhz:
            LOG.info('Skipped an iteration')
            return

        host_path = common.build_local_host_path(CONF.local_data_directory)
        host_cpu_mhz = self.get_local_host_data(host_path)

        host_cpu_utilization = self.vm_mhz_to_percentage(
            vm_cpu_mhz.values(),
            host_cpu_mhz,
            state['physical_cpu_mhz_total'])
        LOG.debug('The total physical CPU Mhz: %s',
                  str(state['physical_cpu_mhz_total']))
        LOG.debug('VM CPU MHz: %s', str(vm_cpu_mhz))
        LOG.debug('Host CPU MHz: %s', str(host_cpu_mhz))
        LOG.debug('CPU utilization: %s', str(host_cpu_utilization))

        if not host_cpu_utilization:
            LOG.info('Not enough data yet - skipping to the next iteration')
            LOG.info('Skipped an iteration')
            return

        time_step = CONF.data_collector_interval
        migration_time = common.calculate_migration_time(
            vm_ram, CONF.network_migration_bandwidth)

        if 'underload_detection' not in state:
            underload_detection_params = common.parse_parameters(
                CONF.local_manager.algorithm_underload_detection_parameters)
            underload_detection = common.call_function_by_name(
                CONF.local_manager.algorithm_underload_detection_factory,
                [time_step,
                 migration_time,
                 underload_detection_params])
            state['underload_detection'] = underload_detection
            state['underload_detection_state'] = {}

            overload_detection_params = common.parse_parameters(
                CONF.local_manager.algorithm_overload_detection_parameters)
            overload_detection = common.call_function_by_name(
                CONF.local_manager.algorithm_overload_detection_factory,
                [time_step,
                 migration_time,
                 overload_detection_params])
            state['overload_detection'] = overload_detection
            state['overload_detection_state'] = {}

            vm_selection_params = common.parse_parameters(
                CONF.local_manager.algorithm_vm_selection_parameters)
            vm_selection = common.call_function_by_name(
                CONF.local_manager.algorithm_vm_selection_factory,
                [time_step,
                 migration_time,
                 vm_selection_params])
            state['vm_selection'] = vm_selection
            state['vm_selection_state'] = {}
        else:
            underload_detection = state['underload_detection']
            overload_detection = state['overload_detection']
            vm_selection = state['vm_selection']

        LOG.info('Started underload detection')
        underload, state['underload_detection_state'] = underload_detection(
            host_cpu_utilization, state['underload_detection_state'])
        LOG.info('Completed underload detection')

        LOG.info('Started overload detection')
        overload, state['overload_detection_state'] = overload_detection(
            host_cpu_utilization, state['overload_detection_state'])
        LOG.info('Completed overload detection')

        if underload:
            LOG.info('Underload detected')
            # TODO(xylan): send rpc message to global manager
        else:
            if overload:
                LOG.info('Overload detected')

                LOG.info('Started VM selection')
                vm_uuids, state['vm_selection_state'] = vm_selection(
                    vm_cpu_mhz, vm_ram, state['vm_selection_state'])
                LOG.info('Completed VM selection')

                LOG.info('Selected VMs to migrate: %s', str(vm_uuids))
                # TODO(xylan): send rpc message to global manager
            else:
                LOG.info('No underload or overload detected')

        LOG.info('Completed an iteration')
        self.state = state


    def get_local_vm_data(self, path):
        """ Read the data about VMs from the local storage.

        :param path: A path to read VM UUIDs from.
        :return: A map of VM UUIDs onto the corresponing CPU MHz values.
        """
        result = {}
        for uuid in os.listdir(path):
            with open(os.path.join(path, uuid), 'r') as f:
                result[uuid] = [int(x) for x in f.read().strip().splitlines()]
        return result


    def get_local_host_data(self, path):
        """ Read the data about the host from the local storage.

        :param path: A path to read the host data from.
        :return: A history of the host CPU usage in MHz.

        """
        if not os.access(path, os.F_OK):
            return []
        with open(path, 'r') as f:
            result = [int(x) for x in f.read().strip().splitlines()]
        return result


    def cleanup_vm_data(self, vm_data, uuids):
        """ Remove records for the VMs that are not in the list of UUIDs.

        :param vm_data: A map of VM UUIDs to some data.
        :param uuids: A list of VM UUIDs.
        :return: The cleaned up map of VM UUIDs to data.

        """
        for uuid, _ in vm_data.items():
            if uuid not in uuids:
                del vm_data[uuid]
        return vm_data


    def get_ram(self, vir_connection, vm_ids):
        """ Get the maximum RAM for a set of VM UUIDs.

        :param vir_connection: A libvirt connection object.
        :param vm_ids: A list of VM UUIDs.
        :return: The maximum RAM for the VM UUIDs.
        """
        vms_ram = {}
        for uuid in vm_ids:
            ram = self.get_max_ram(vir_connection, uuid)
            if ram:
                vms_ram[uuid] = ram

        return vms_ram


    def get_max_ram(self, vir_connection, uuid):
        """ Get the max RAM allocated to a VM UUID using libvirt.

        :param vir_connection: A libvirt connection object.
        :param uuid: The UUID of a VM.
        :return: The maximum RAM of the VM in MB.
        """
        try:
            domain = vir_connection.lookupByUUIDString(uuid)
            return domain.maxMemory() / 1024
        except libvirt.libvirtError:
            return None


    def vm_mhz_to_percentage(self, vm_mhz_history, host_mhz_history,
                             physical_cpu_mhz):
        """ Convert VM CPU utilization to the host's CPU utilization.

        :param vm_mhz_history: A list of CPU utilization histories of VMs in MHz.
        :param host_mhz_history: A history if the CPU usage by the host in MHz.
        :param physical_cpu_mhz: The total frequency of the physical CPU in MHz.
        :return: The history of the host's CPU utilization in percentages.
        """
        max_len = max(len(x) for x in vm_mhz_history)
        if len(host_mhz_history) > max_len:
            host_mhz_history = host_mhz_history[-max_len:]
        mhz_history = [[0] * (max_len - len(x)) + x
                       for x in vm_mhz_history + [host_mhz_history]]
        return [float(sum(x)) / physical_cpu_mhz for x in zip(*mhz_history)]