ceilometer-powervm/ceilometer_powervm/compute/virt/powervm/inspector.py

# Copyright 2015, 2016 IBM Corp.
#
# All Rights Reserved.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

import datetime

from oslo_log import log as logging
from pypowervm import adapter as pvm_adpt
from pypowervm.helpers import log_helper as log_hlp
from pypowervm.helpers import vios_busy as vio_hlp
from pypowervm.tasks.monitor import util as pvm_mon_util
from pypowervm.utils import uuid as pvm_uuid
from pypowervm.wrappers import logical_partition as pvm_lpar
from pypowervm.wrappers import managed_system as pvm_ms
from pypowervm.wrappers import network as pvm_net

from ceilometer.compute.virt import inspector as virt_inspector
from ceilometer_powervm.compute.virt.powervm.i18n import _
from ceilometer_powervm.compute.virt.powervm.i18n import _LW

LOG = logging.getLogger(__name__)


class PowerVMInspector(virt_inspector.Inspector):
    """The implementation of the inspector for the PowerVM hypervisor.

    This code requires that it is run on the PowerVM Compute Host directly.
    Utilizes the pypowervm library to gather the instance metrics.
    """

    def __init__(self, conf):
        super(PowerVMInspector, self).__init__(conf)

        # Build the adapter.  May need to attempt the connection multiple times
        # in case the REST server is starting.
        session = pvm_adpt.Session(conn_tries=300)
        self.adpt = pvm_adpt.Adapter(
            session, helpers=[log_hlp.log_helper,
                              vio_hlp.vios_busy_retry_helper])

        # Get the host system UUID
        host_uuid = self._get_host_uuid(self.adpt)

        # Ensure that metrics gathering is running for the host.
        pvm_mon_util.ensure_ltm_monitors(self.adpt, host_uuid)

        # Get the VM Metric Utility
        self.vm_metrics = pvm_mon_util.LparMetricCache(self.adpt, host_uuid)

    @staticmethod
    def _puuid(instance):
        """Derives the PowerVM UUID for an instance.

        :param instance: The OpenStack instance object.
        :return: The PowerVM API's UUID for the instance.
        """
        return pvm_uuid.convert_uuid_to_pvm(instance.id).upper()

    def _get_host_uuid(self, adpt):
        """Returns the Host system's UUID for pypowervm.

        The pypowervm API needs a UUID of the server that it is managing.  This
        method returns the UUID of that host.

        :param adpt: The pypowervm adapter.
        :return: The UUID of the host system.
        """
        hosts = pvm_ms.System.wrap(adpt.read(pvm_ms.System.schema_type))
        if len(hosts) != 1:
            raise Exception(_("Expected exactly one host; found %d."),
                            len(hosts))
        LOG.debug("Host UUID: %s" % hosts[0].uuid)
        return hosts[0].uuid

    def inspect_instance(self, instance, duration=None):
        """Inspect the statistics for an instance.

        :param instance: the target instance
        :param duration: the last 'n' seconds, over which the value should be
               inspected.

               The PowerVM implementation does not make use of the duration
               field.
        :return: the instance statistics
        """

        uuid = self._puuid(instance)
        cur_date, cur_metric = self.vm_metrics.get_latest_metric(uuid)

        # If the current metric is none, then the instance can not be found in
        # the sample set.  An error should be raised.
        if cur_metric is None:
            raise virt_inspector.InstanceNotFoundException(
                _('VM %s not found in PowerVM Metrics Sample.') %
                instance.name)

        cpu_time = (cur_metric.processor.util_cap_proc_cycles +
                    cur_metric.processor.util_uncap_proc_cycles)
        cpu_num = cur_metric.processor.virt_procs

        # The duration is ignored.  There is precedent for this in other
        # inspectors if the platform doesn't support duration.
        #
        # Given the nature of PowerVM to collect samples over coarse periods
        # of time, it does not lend well to duration based collection.
        # Therefore this works by gathering the latest utilization from the
        # samples and ignores the duration.

        # Get the current and previous sample.  Delta is performed between
        # these two.
        prev_date, prev_metric = self.vm_metrics.get_previous_metric(uuid)

        # Get the current data.
        cur_util_cap = cur_metric.processor.util_cap_proc_cycles
        cur_util_uncap = cur_metric.processor.util_uncap_proc_cycles
        cur_idle = cur_metric.processor.idle_proc_cycles
        cur_donated = cur_metric.processor.donated_proc_cycles
        cur_entitled = cur_metric.processor.entitled_proc_cycles

        # Get the previous sample data
        if prev_metric is None:
            # If there is no previous sample, that is either a new VM or is
            # a live migrated system.  A live migrated system will pull all
            # of its metrics with it.  The issue with this is it could have
            # CPU cycles for months of run time.  So we can't really determine
            # the CPU utilization within the last X seconds...because to THIS
            # host it's new (only in the cur_metric).  So we error out, the
            # inspector will use a debug message in the log.
            LOG.warning(_LW("Unable to derive CPU Utilization for VM %s. "
                            "It is either a new VM or was recently migrated. "
                            "It will be collected in the next inspection "
                            "cycle."), instance.name)
            return virt_inspector.InstanceStats(
                cpu_time=cpu_time, cpu_number=cpu_num)

        # Gather the previous metrics
        prev_util_cap = prev_metric.processor.util_cap_proc_cycles
        prev_util_uncap = prev_metric.processor.util_uncap_proc_cycles
        prev_idle = prev_metric.processor.idle_proc_cycles
        prev_donated = prev_metric.processor.donated_proc_cycles
        prev_entitled = prev_metric.processor.entitled_proc_cycles

        # Utilization can be driven by multiple factors on PowerVM.
        # PowerVM has 'entitled' cycles.  These are cycles that, if the VM
        # needs them, they get them no matter what.
        #
        # In terms of how those cycles are returned from the API:
        #   util_cap_proc_cycles - How many cycles from the guaranteed
        #          capacity were used.
        #   util_uncap_proc_cycles - How many cycles were used that were
        #          taken from spare (which is either unused processors cycles
        #          or donated cycles from other VMs).
        #   idle_proc_cycles - How many cycles (as reported by the OS to the
        #          hypervisor) were reported as idle.
        #   donated_proc_cycles - Cycles that were not needed by this VM that
        #          were given to another VM in need of cycles.
        #
        #
        # So the final utilization equation is:
        #   (util cap + util uncap - idle - donated) / entitled
        #
        # It is important to note that idle and donated proc cycles are
        # included in the 'util_cap_proc_cycles'.  That is why they are
        # subtracted out.
        #
        # The interesting aspect of this is that the CPU Utilization can go
        # dramatically above 100% if there are free processors or if the
        # other workloads are in a lull.
        util_cap = cur_util_cap - prev_util_cap
        util_uncap = cur_util_uncap - prev_util_uncap
        idle = cur_idle - prev_idle
        donated = cur_donated - prev_donated
        entitled = cur_entitled - prev_entitled

        # If the entitled is zero, that generally means that the VM has not
        # been started yet (everything else would be zero as well).  So to
        # avoid a divide by zero error, just return 0% in that case.
        util = (float(util_cap + util_uncap - idle - donated) / float(entitled)
                if entitled else 0.0)

        # Utilization is reported as percents.  Therefore, multiply by 100.0
        # to get a readable percentage based format.
        return virt_inspector.InstanceStats(
            cpu_util=util * 100.0, cpu_time=cpu_time, cpu_number=cpu_num)

    @staticmethod
    def mac_for_metric_cna(metric_cna, client_cnas):
        """Finds the mac address for a given metric.

        :param metric_cna: The metric for a given client network adapter (CNA)
        :param client_cnas: The list of wrappers from pypowervm for the CNAs
                            attached to a given instance.
        :return: Mac address of the adapter.  If unable to be found, then None
                 is returned.
        """
        # TODO(thorst) Investigate optimization in pypowervm for this.
        for client_cna in client_cnas:
            if client_cna.loc_code == metric_cna.physical_location:
                # Found the appropriate mac.  The PowerVM format is upper
                # cased without colons.  Convert it.
                mac = client_cna.mac.lower()
                return ':'.join(mac[i:i + 2]
                                for i in range(0, len(mac), 2))
        return None

    def _get_cnas(self, lpar_uuid):
        """Returns the client VM's Network Adapters.

        :param lpar_uuid: The UUID of the VM.
        :return: A list of pypowervm CNA wrappers.
        """
        client_cna_resp = self.adpt.read(
            pvm_lpar.LPAR.schema_type, root_id=lpar_uuid,
            child_type=pvm_net.CNA.schema_type)
        return pvm_net.CNA.wrap(client_cna_resp)

    def inspect_vnics(self, instance):
        """Inspect the vNIC statistics for an instance.

        :param instance: the target instance
        :return: for each vNIC, the number of bytes & packets
                 received and transmitted
        """
        # Get the current and previous sample.  Delta is performed between
        # these two.
        uuid = self._puuid(instance)
        cur_date, cur_metric = self.vm_metrics.get_latest_metric(uuid)

        # If the cur_metric is none, then the instance can not be found in the
        # sample and an error should be raised.
        if cur_metric is None:
            raise virt_inspector.InstanceNotFoundException(
                _('VM %s not found in PowerVM Metrics Sample.') %
                instance.name)

        # If there isn't network information, this is because the Virtual
        # I/O Metrics were turned off.  Have to pass through this method.
        if cur_metric.network is None:
            return

        # Get the network interfaces.  A 'cna' is a Client VM's Network Adapter
        client_cnas = self._get_cnas(uuid)

        for metric_cna in cur_metric.network.cnas:
            # Get the mac, but if it isn't found, then move to the next.  Might
            # have been removed since the last sample.
            mac = self.mac_for_metric_cna(metric_cna, client_cnas)
            if mac is None:
                continue

            # The name will be the location code.  MAC is identified from
            # above.  Others appear libvirt specific.
            #
            # PowerVM doesn't specify drops by receive vs. transmit.  Since we
            # have the client adapter, we assume all are receive drops.
            # There are no error metrics available.
            yield virt_inspector.InterfaceStats(
                name=metric_cna.physical_location,
                mac=mac, fref=None, parameters=None,
                rx_bytes=metric_cna.received_bytes,
                rx_packets=metric_cna.received_packets,
                rx_drop=metric_cna.dropped_packets,
                rx_errors=0,
                tx_bytes=metric_cna.sent_bytes,
                tx_packets=metric_cna.sent_packets,
                tx_drop=0,
                tx_errors=0)

    def inspect_vnic_rates(self, instance, duration=None):
        """Inspect the vNIC rate statistics for an instance.

        :param instance: the target instance
        :param duration: the last 'n' seconds, over which the value should be
               inspected

               The PowerVM implementation does not make use of the duration
               field.
        :return: for each vNIC, the rate of bytes & packets
                 received and transmitted
        """
        # Get the current and previous sample.  Delta is performed between
        # these two.
        uuid = self._puuid(instance)
        cur_date, cur_metric = self.vm_metrics.get_latest_metric(uuid)
        prev_date, prev_metric = self.vm_metrics.get_previous_metric(uuid)

        # If the current is none, then the instance can not be found in the
        # sample and an error should be raised.
        if cur_metric is None:
            raise virt_inspector.InstanceNotFoundException(
                _('VM %s not found in PowerVM Metrics Sample.') %
                instance.name)

        # If there isn't network information, this is because the Virtual
        # I/O Metrics were turned off.  Have to pass through this method.
        if (cur_metric.network is None or prev_metric is None or
                prev_metric.network is None):
            return

        # Get the network interfaces.  A 'cna' is a Client VM's Network Adapter
        client_cnas = self._get_cnas(uuid)

        def find_prev_net(metric_cna):
            """Finds the metric vNIC from the previous sample's vNICs."""
            # If no previous, return None
            if prev_metric is None or prev_metric.network is None:
                return None

            for prev_cna in prev_metric.network.cnas:
                if prev_cna.physical_location == metric_cna.physical_location:
                    return prev_cna

            # Couldn't find a previous.  Maybe the interface was recently
            # added to the instance?  Return None
            return None

        # Need to determine the time delta between the samples.  This is
        # usually 30 seconds from the API, but the metrics will be specific.
        date_delta_num = float((cur_date - prev_date).seconds)

        for metric_cna in cur_metric.network.cnas:
            # Get the mac, but if it isn't found, then move to the next.  Might
            # have been removed since the last sample.
            mac = self.mac_for_metric_cna(metric_cna, client_cnas)
            if mac is None:
                continue

            # Note that here, the previous may be none.  That simply indicates
            # that the adapter was dynamically added to the VM before the
            # previous collection.  Not the migration scenario above.
            # In this case, we can default the base to 0.
            prev = find_prev_net(metric_cna)
            rx_bytes_diff = (metric_cna.received_bytes -
                             (0 if prev is None else prev.received_bytes))
            tx_bytes_diff = (metric_cna.sent_bytes -
                             (0 if prev is None else prev.sent_bytes))

            # Stats are the difference in the bytes, divided by the difference
            # in time between the two samples.
            rx_rate = float(rx_bytes_diff) / float(date_delta_num)
            tx_rate = float(tx_bytes_diff) / float(date_delta_num)

            # The name will be the location code.  MAC is identified from
            # above.  Others appear libvirt specific.
            yield virt_inspector.InterfaceRateStats(
                name=metric_cna.physical_location,
                mac=mac, fref=None, parameters=None,
                rx_bytes_rate=rx_rate, tx_bytes_rate=tx_rate)

    def inspect_disks(self, instance):
        """Inspect the disk statistics for an instance.

        The response is a generator of the values.

        :param instance: the target instance
        :return disk: The Disk indicating the device for the storage device.
        :return stats: The DiskStats indicating the read/write data to the
                       device.
        """
        # Get the current and previous sample.  Delta is performed between
        # these two.
        uuid = self._puuid(instance)
        cur_date, cur_metric = self.vm_metrics.get_latest_metric(uuid)

        # If the cur_metric is none, then the instance can not be found in the
        # sample and an error should be raised.
        if cur_metric is None:
            raise virt_inspector.InstanceNotFoundException(
                _('VM %s not found in PowerVM Metrics Sample.') %
                instance.name)

        # If there isn't storage information, this is because the Virtual
        # I/O Metrics were turned off.  Have to pass through this method.
        if cur_metric.storage is None:
            LOG.debug("Current storage metric was unavailable from the API "
                      "instance %s." % instance.name)
            return

        # Bundle together the SCSI and virtual FC adapters
        adpts = cur_metric.storage.virt_adpts + cur_metric.storage.vfc_adpts

        # Loop through all the storage adapters
        for adpt in adpts:
            # PowerVM only shows the connection (SCSI or FC).  Name after
            # the connection name
            yield virt_inspector.DiskStats(
                device=adpt.name, read_requests=adpt.num_reads,
                read_bytes=adpt.read_bytes, write_requests=adpt.num_writes,
                write_bytes=adpt.write_bytes, errors=0)

    def inspect_disk_iops(self, instance):
        """Inspect the Disk Input/Output operations per second for an instance.

        The response is a generator of the values.

        :param instance: the target instance
        :return disk: The Disk indicating the device for the storage device.
        :return stats: The DiskIOPSStats indicating the I/O operations per
                       second for the device.
        """
        # Get the current and previous sample.  Delta is performed between
        # these two.
        uuid = self._puuid(instance)
        cur_date, cur_metric = self.vm_metrics.get_latest_metric(uuid)
        prev_date, prev_metric = self.vm_metrics.get_previous_metric(uuid)

        # If the cur_metric is none, then the instance can not be found in the
        # sample and an error should be raised.
        if cur_metric is None:
            raise virt_inspector.InstanceNotFoundException(
                _('VM %s not found in PowerVM Metrics Sample.') %
                instance.name)

        # If there isn't storage information, this may be because the Virtual
        # I/O Metrics were turned off.  If the previous metric is unavailable,
        # also have to pass through this method.
        if (cur_metric.storage is None or prev_metric is None or
                prev_metric.storage is None):
            LOG.debug("Current storage metric was unavailable from the API "
                      "instance %s." % instance.name)
            return

        # Need to determine the time delta between the samples.  This is
        # usually 30 seconds from the API, but the metrics will be specific.
        # However, if there is no previous sample, then we have to estimate.
        # Therefore, we estimate 15 seconds - half of the standard 30 seconds.
        date_delta = ((cur_date - prev_date) if prev_date is not None else
                      datetime.timedelta(seconds=15))

        # Bundle together the SCSI and virtual FC adapters
        cur_adpts = (cur_metric.storage.virt_adpts +
                     cur_metric.storage.vfc_adpts)
        prev_adpts = (prev_metric.storage.virt_adpts +
                      prev_metric.storage.vfc_adpts)

        def find_prev(cur_adpt):
            for prev_adpt in prev_adpts:
                if prev_adpt.name == cur_adpt.name:
                    return prev_adpt
            return None

        # Loop through all the storage adapters
        for cur_adpt in cur_adpts:
            # IOPs is the read/write counts of the current - prev divided by
            # second difference between the two, rounded to the integer.  :-)
            cur_ops = cur_adpt.num_reads + cur_adpt.num_writes

            # The previous adapter may be None.  This simply indicates that the
            # adapter was added between the previous sample and this one.  It
            # does not indicate a live migrate scenario like noted above, as
            # the VM itself hasn't moved.
            prev_adpt = find_prev(cur_adpt)
            prev_ops = ((prev_adpt.num_reads + prev_adpt.num_writes)
                        if prev_adpt else 0)
            iops = (cur_ops - prev_ops) // date_delta.seconds

            # PowerVM only shows the connection (SCSI or FC).  Name after
            # the connection name
            yield virt_inspector.DiskIOPSStats(device=cur_adpt.name,
                                               iops_count=iops)