371 lines
16 KiB
Python
371 lines
16 KiB
Python
# Copyright 2014, 2017 IBM Corp.
|
|
#
|
|
# All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import math
|
|
from nova.objects import fields
|
|
from oslo_concurrency import lockutils
|
|
from oslo_log import log as logging
|
|
from oslo_serialization import jsonutils
|
|
from pypowervm.tasks.monitor import util as pcm_util
|
|
import subprocess
|
|
|
|
from nova import conf as cfg
|
|
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
CONF = cfg.CONF
|
|
|
|
# Power VM hypervisor info
|
|
# Normally, the hypervisor version is a string in the form of '8.0.0' and
|
|
# converted to an int with nova.virt.utils.convert_version_to_int() however
|
|
# there isn't currently a mechanism to retrieve the exact version.
|
|
# Complicating this is the fact that nova conductor only allows live migration
|
|
# from the source host to the destination if the source is equal to or less
|
|
# than the destination version. PowerVM live migration limitations are
|
|
# checked by the PowerVM capabilities flags and not specific version levels.
|
|
# For that reason, we'll just publish the major level.
|
|
IBM_POWERVM_HYPERVISOR_VERSION = 8
|
|
|
|
# The types of LPARS that are supported.
|
|
POWERVM_SUPPORTED_INSTANCES = [
|
|
(fields.Architecture.PPC64, fields.HVType.PHYP, fields.VMMode.HVM),
|
|
(fields.Architecture.PPC64LE, fields.HVType.PHYP, fields.VMMode.HVM),
|
|
]
|
|
|
|
# cpu_info that will be returned by build_host_stats_from_entry()
|
|
HOST_STATS_CPU_INFO = jsonutils.dumps({'vendor': 'ibm', 'arch': 'ppc64'})
|
|
|
|
|
|
def build_host_resource_from_ms(ms_wrapper):
|
|
"""Build the host resource dict from an MS adapter wrapper
|
|
|
|
This method builds the host resource dictionary from the
|
|
ManagedSystem Entry wrapper
|
|
|
|
:param ms_wrapper: ManagedSystem Entry Wrapper.
|
|
"""
|
|
data = {}
|
|
|
|
# Calculate the vcpus
|
|
proc_units = ms_wrapper.proc_units_configurable
|
|
proc_units_avail = ms_wrapper.proc_units_avail
|
|
pu_used = float(proc_units) - float(proc_units_avail)
|
|
data['vcpus'] = int(math.ceil(float(proc_units)))
|
|
data['vcpus_used'] = int(math.ceil(pu_used))
|
|
|
|
data['memory_mb'] = ms_wrapper.memory_configurable
|
|
data['memory_mb_used'] = (ms_wrapper.memory_configurable -
|
|
ms_wrapper.memory_free)
|
|
|
|
data["hypervisor_type"] = fields.HVType.PHYP
|
|
data["hypervisor_version"] = IBM_POWERVM_HYPERVISOR_VERSION
|
|
data["hypervisor_hostname"] = CONF.host
|
|
data["cpu_info"] = HOST_STATS_CPU_INFO
|
|
data["numa_topology"] = None
|
|
data["supported_instances"] = POWERVM_SUPPORTED_INSTANCES
|
|
|
|
stats = {'proc_units': '%.2f' % float(proc_units),
|
|
'proc_units_used': '%.2f' % pu_used,
|
|
'memory_region_size': ms_wrapper.memory_region_size
|
|
}
|
|
data["stats"] = stats
|
|
|
|
data["pci_passthrough_devices"] = _build_pci_json(ms_wrapper)
|
|
|
|
return data
|
|
|
|
|
|
def _build_pci_json(sys_w):
|
|
"""Build the JSON string for the pci_passthrough_devices host resource.
|
|
|
|
:param sys_w: pypowervm.wrappers.managed_system.System wrapper of the host.
|
|
:return: JSON string representing a list of "PCI passthrough device" dicts,
|
|
See nova.objects.pci_device.PciDevice.
|
|
"""
|
|
# Produce SR-IOV PCI data. Devices are validated by virtue of the network
|
|
# name associated with their label, which must be cleared via an entry in
|
|
# the pci_passthrough_whitelist in the nova.conf. Each Claim allocates a
|
|
# device and filters it from the list for subsequent claims; so we generate
|
|
# the maximum number of "devices" (VFs) we could possibly create on each
|
|
# port. These are NOT real VFs. The real VFs get created on the fly by
|
|
# VNIC.create.
|
|
pci_devs = [
|
|
{"physical_network": pport.label or 'default',
|
|
"label": pport.label or 'default',
|
|
"dev_type": fields.PciDeviceType.SRIOV_VF,
|
|
"address": '*:%d:%d.%d' % (sriov.sriov_adap_id, pport.port_id, vfn),
|
|
"parent_addr": "*:*:*.*",
|
|
"vendor_id": "*",
|
|
"product_id": "*",
|
|
"numa_node": 1}
|
|
for sriov in sys_w.asio_config.sriov_adapters
|
|
for pport in sriov.phys_ports
|
|
for vfn in range(pport.supp_max_lps)]
|
|
|
|
return jsonutils.dumps(pci_devs)
|
|
|
|
|
|
class HostCPUStats(pcm_util.MetricCache):
|
|
"""Transforms the PowerVM CPU metrics into the Nova format.
|
|
|
|
PowerVM only gathers the CPU statistics once every 30 seconds. It does
|
|
this to reduce overhead. There is a function to gather statistics quicker,
|
|
but that can be very expensive. Therefore, to ensure that the client's
|
|
workload is not impacted, these 'longer term' metrics will be used.
|
|
|
|
This class builds off of a base pypowervm function where it can obtain
|
|
the samples through a PCM 'cache'. If a new sample is available, the cache
|
|
pulls the sample. If it is not, the existing sample is used.
|
|
|
|
This can result in multiple, quickly successive calls to the host stats
|
|
returning the same data (because a new sample may not be available yet).
|
|
|
|
The class analyzes the data and collapses it down to the format needed by
|
|
the Nova manager.
|
|
"""
|
|
|
|
def __init__(self, adapter, host_uuid):
|
|
"""Creates an instance of the HostCPUStats.
|
|
|
|
:param adapter: The pypowervm Adapter.
|
|
:param host_uuid: The UUID of the host CEC to maintain a metrics
|
|
cache for.
|
|
"""
|
|
# This represents the current state of cycles spent on the system.
|
|
# These are used to figure out usage statistics. As such, they are
|
|
# tied to the start of the nova compute process.
|
|
#
|
|
# - idle: Total idle cycles on the compute host.
|
|
# - kernel: How many cycles the hypervisor has consumed. Not a direct
|
|
# analogy to KVM
|
|
# - user: The amount of time spent by the VM's themselves.
|
|
# - iowait: Not used in PowerVM, but needed for nova.
|
|
# - frequency: The CPU frequency
|
|
self.tot_data = {'idle': 0, 'kernel': 0, 'user': 0, 'iowait': 0,
|
|
'frequency': 0}
|
|
|
|
# Invoke the parent to seed the metrics. Don't include VIO - will
|
|
# result in quicker calls.
|
|
super(HostCPUStats, self).__init__(adapter, host_uuid,
|
|
include_vio=False)
|
|
|
|
@lockutils.synchronized('pvm_host_metrics_get')
|
|
def get_host_cpu_stats(self):
|
|
"""Returns the currently known host CPU stats.
|
|
|
|
:return: The dictionary (as defined by the compute driver's
|
|
get_host_cpu_stats). If insufficient data is available,
|
|
then 'None' will be returned.
|
|
"""
|
|
# Refresh if needed. Will no-op if no refresh is required.
|
|
self._refresh_if_needed()
|
|
|
|
# The invoking code needs the total cycles for this to work properly.
|
|
# Return the dictionary format of the cycles as derived by the
|
|
# _update_internal_metric method. If there is no data yet, None would
|
|
# be the result.
|
|
return self.tot_data
|
|
|
|
def _update_internal_metric(self):
|
|
"""Uses the latest stats from the cache, and parses to Nova format.
|
|
|
|
This method is invoked by the parent class after the raw metrics are
|
|
updated.
|
|
"""
|
|
# If there is no 'new' data (perhaps sampling is not turned on) then
|
|
# return no data.
|
|
if self.cur_phyp is None:
|
|
return
|
|
|
|
# Compute the cycles spent in FW since last collection.
|
|
fw_cycles_delta = self._get_fw_cycles_delta()
|
|
|
|
# Compute the cycles the system spent since last run.
|
|
tot_cycles_delta = self._get_total_cycles_delta()
|
|
|
|
# Get the user cycles since last run
|
|
user_cycles_delta = self._gather_user_cycles_delta()
|
|
|
|
# Make sure that the total cycles is higher than the user/fw cycles.
|
|
# Should not happen, but just in case there is any precision loss from
|
|
# CPU data back to system.
|
|
if user_cycles_delta + fw_cycles_delta > tot_cycles_delta:
|
|
LOG.warning(
|
|
"Host CPU Metrics determined that the total cycles reported "
|
|
"was less than the used cycles. This indicates an issue with "
|
|
"the PCM data. Please investigate the results.\n"
|
|
"Total Delta Cycles: %(tot_cycles)d\n"
|
|
"User Delta Cycles: %(user_cycles)d\n"
|
|
"Firmware Delta Cycles: %(fw_cycles)d",
|
|
{'tot_cycles': tot_cycles_delta, 'fw_cycles': fw_cycles_delta,
|
|
'user_cycles': user_cycles_delta})
|
|
tot_cycles_delta = user_cycles_delta + fw_cycles_delta
|
|
|
|
# Idle is the subtraction of all.
|
|
idle_delta_cycles = (tot_cycles_delta - user_cycles_delta -
|
|
fw_cycles_delta)
|
|
|
|
# The only moving cycles are idle, kernel and user.
|
|
self.tot_data['idle'] += idle_delta_cycles
|
|
self.tot_data['kernel'] += fw_cycles_delta
|
|
self.tot_data['user'] += user_cycles_delta
|
|
|
|
# Frequency doesn't accumulate like the others. So this stays static.
|
|
self.tot_data['frequency'] = self._get_cpu_freq()
|
|
|
|
def _gather_user_cycles_delta(self):
|
|
"""The estimated user cycles of all VMs/VIOSes since last run.
|
|
|
|
The sample data includes information about how much CPU has been used
|
|
by workloads and the Virtual I/O Servers. There is not one global
|
|
counter that can be used to obtain the CPU spent cycles.
|
|
|
|
This method will calculate the delta of workload (and I/O Server)
|
|
cycles between the previous sample and the current sample.
|
|
|
|
There are edge cases for this however. If a VM is deleted or migrated
|
|
its cycles will no longer be taken into account. The algorithm takes
|
|
this into account by building on top of the previous sample's user
|
|
cycles.
|
|
|
|
:return: Estimated cycles spent on workload (including VMs and Virtual
|
|
I/O Server). This represents the entire server's current
|
|
'user' load.
|
|
"""
|
|
# Current samples should be guaranteed to be there.
|
|
vm_cur_samples = self.cur_phyp.sample.lpars
|
|
vios_cur_samples = self.cur_phyp.sample.vioses
|
|
|
|
# The previous samples may not have been there.
|
|
vm_prev_samples, vios_prev_samples = None, None
|
|
if self.prev_phyp is not None:
|
|
vm_prev_samples = self.prev_phyp.sample.lpars
|
|
vios_prev_samples = self.prev_phyp.sample.vioses
|
|
|
|
# Gather the delta cycles between the previous and current data sets
|
|
vm_delta_cycles = self._delta_proc_cycles(vm_cur_samples,
|
|
vm_prev_samples)
|
|
vios_delta_cycles = self._delta_proc_cycles(vios_cur_samples,
|
|
vios_prev_samples)
|
|
|
|
return vm_delta_cycles + vios_delta_cycles
|
|
|
|
@staticmethod
|
|
def _get_cpu_freq():
|
|
# The output will be similar to '4116.000000MHz' on a POWER system.
|
|
cmd = ['/usr/bin/awk', '/clock/ {print $3; exit}', '/proc/cpuinfo']
|
|
return int(float(subprocess.check_output(cmd).rstrip("MHz\n")))
|
|
|
|
def _delta_proc_cycles(self, samples, prev_samples):
|
|
"""Sums all the processor delta cycles for a set of VM/VIOS samples.
|
|
|
|
This sum is the difference from the last sample to the current sample.
|
|
|
|
:param samples: A set of PhypVMSample or PhypViosSample samples.
|
|
:param prev_samples: The set of the previous samples. May be None.
|
|
:return: The cycles spent on workload across all of the samples.
|
|
"""
|
|
# Determine the user cycles spent between the last sample and the
|
|
# current.
|
|
user_cycles = 0
|
|
for lpar_sample in samples:
|
|
prev_sample = self._find_prev_sample(lpar_sample, prev_samples)
|
|
user_cycles += self._delta_user_cycles(lpar_sample, prev_sample)
|
|
return user_cycles
|
|
|
|
@staticmethod
|
|
def _delta_user_cycles(cur_sample, prev_sample):
|
|
"""Determines the delta of user cycles from the cur and prev sample.
|
|
|
|
:param cur_sample: The current sample.
|
|
:param prev_sample: The previous sample. May be None.
|
|
:return: The difference in cycles between the two samples. If the data
|
|
only exists in the current sample (indicates a new workload),
|
|
then all of the cycles from the current sample will be
|
|
considered the delta.
|
|
"""
|
|
# If the previous sample for this VM is None it could be one of two
|
|
# conditions. It could be a new spawn or a live migration. The cycles
|
|
# from a live migrate are brought over from the previous host. That
|
|
# can disorient the calculation because all of a sudden you could get
|
|
# months of cycles. Since we can not discern between the two
|
|
# scenarios, we return 0 (effectively throwing the sample out).
|
|
# The next pass through will have the previous sample and will be
|
|
# included.
|
|
if prev_sample is None:
|
|
return 0
|
|
# If the previous sample values are all 0 (happens when VM is just
|
|
# migrated, phyp creates entry for VM with 0 values), then ignore the
|
|
# sample.
|
|
if (prev_sample.processor.util_cap_proc_cycles ==
|
|
prev_sample.processor.util_uncap_proc_cycles ==
|
|
prev_sample.processor.idle_proc_cycles == 0):
|
|
return 0
|
|
# The VM utilization on host is its capped + uncapped - idle cycles.
|
|
# Donated proc cycles should not be considered as these are
|
|
# not guaranteed to be getting utilized by any other lpar on the host.
|
|
prev_amount = (prev_sample.processor.util_cap_proc_cycles +
|
|
prev_sample.processor.util_uncap_proc_cycles -
|
|
prev_sample.processor.idle_proc_cycles)
|
|
cur_amount = (cur_sample.processor.util_cap_proc_cycles +
|
|
cur_sample.processor.util_uncap_proc_cycles -
|
|
cur_sample.processor.idle_proc_cycles)
|
|
return cur_amount - prev_amount
|
|
|
|
@staticmethod
|
|
def _find_prev_sample(sample, prev_samples):
|
|
"""Finds the previous VM Sample for a given current sample.
|
|
|
|
:param sample: The current sample.
|
|
:param prev_samples: The previous samples to search through.
|
|
:return: The previous sample, if it exists. None otherwise.
|
|
"""
|
|
# Will occur if there are no previous samples.
|
|
if prev_samples is None:
|
|
return None
|
|
for prev_sample in prev_samples:
|
|
if prev_sample.id == sample.id and prev_sample.name == sample.name:
|
|
return prev_sample
|
|
return None
|
|
|
|
def _get_total_cycles_delta(self):
|
|
"""Returns the 'total cycles' on the system since last sample.
|
|
|
|
:return: The total delta cycles since the last run.
|
|
"""
|
|
sample = self.cur_phyp.sample
|
|
cur_cores = sample.processor.configurable_proc_units
|
|
cur_cycles_per_core = sample.time_based_cycles
|
|
|
|
if self.prev_phyp:
|
|
prev_cycles_per_core = self.prev_phyp.sample.time_based_cycles
|
|
else:
|
|
prev_cycles_per_core = 0
|
|
|
|
# Get the delta cycles between the cores.
|
|
delta_cycles_per_core = cur_cycles_per_core - prev_cycles_per_core
|
|
|
|
# Total cycles since last sample is the 'per cpu' cycles spent
|
|
# times the number of active cores.
|
|
return delta_cycles_per_core * cur_cores
|
|
|
|
def _get_fw_cycles_delta(self):
|
|
"""Returns the number of cycles spent on firmware since last sample."""
|
|
cur_fw = self.cur_phyp.sample.system_firmware.utilized_proc_cycles
|
|
prev_fw = (self.prev_phyp.sample.system_firmware.utilized_proc_cycles
|
|
if self.prev_phyp else 0)
|
|
return cur_fw - prev_fw
|