Ensure instance metrics get collected

The Hyper-V metric collection API has the following limitations:
* some metrics get disabled when the instance is rebooted (network,
cpu and memory)
* network metrics cannot be enabled while the vm is stopped
* metrics cannot be collected for passthrough disks
* some times, the metric counters are reset when the vms are rebooted.
We can't do much about this Hyper-V bug, which is unfortunately
affecting the reliability of the collected data.

At the same time, OVS ports and Cinder volumes aren't handled at all.

To overcome those issues, this patch brings the following changes:
* re-enable vm metrics each time the vms boot
* enable vm metrics after live migrations
* enable metrics metrics when binding OVS ports (networking-hyperv
  will handle 'hyperv' ports)
* enable disk metrics when attaching SMB Cinder volumes, logging a
  warning for passthrough disks

Closes-Bug: #1794456
Depends-On: Ibe7409b392b91204f0cea2bdd32c86ec03d06b67
Depends-On: Iaba32ca94b2c09661f150fa4b12b2c7eb83ff6d9

Change-Id: I94f2be20b02531d3ceabdef4272207e37ef34930
This commit is contained in:
Lucian Petrut 2018-09-26 11:27:17 +03:00
parent e2ffcc2f0e
commit 8c44c7a960
12 changed files with 203 additions and 22 deletions

View File

@ -72,6 +72,14 @@ class InstanceEventHandler(object):
instance_state)
utils.spawn_n(self._state_change_callback, virt_event)
should_enable_metrics = (
CONF.hyperv.enable_instance_metrics_collection and
instance_state == constants.HYPERV_VM_STATE_ENABLED)
if should_enable_metrics:
utils.spawn_n(self._vmops.configure_instance_metrics,
instance_name,
enable_network_metrics=True)
utils.spawn_n(self._handle_serial_console_workers,
instance_name, instance_state)

View File

@ -126,6 +126,7 @@ class LiveMigrationOps(object):
LOG.debug("post_live_migration_at_destination called",
instance=instance_ref)
self._vmops.plug_vifs(instance_ref, network_info)
self._vmops.configure_instance_metrics(instance_ref.name)
def check_can_live_migrate_destination(self, ctxt, instance_ref,
src_compute_info, dst_compute_info,

View File

@ -325,8 +325,7 @@ class MigrationOps(object):
self._check_ephemeral_disks(instance, ephemerals, resize_instance)
self._vmops.configure_remotefx(instance, vm_gen, resize_instance)
if CONF.hyperv.enable_instance_metrics_collection:
self._metricsutils.enable_vm_metrics_collection(instance.name)
self._vmops.configure_instance_metrics(instance.name)
def _import_vm(self, instance_dir):
snapshot_dir = self._pathutils.get_instance_snapshot_dir(

View File

@ -22,10 +22,13 @@ import nova.network
from nova.network import model
from nova.network import os_vif_util
import os_vif
from os_win import constants as os_win_const
from os_win import utilsfactory
from oslo_log import log as logging
import compute_hyperv.nova.conf
LOG = logging.getLogger(__name__)
CONF = compute_hyperv.nova.conf.CONF
@ -68,7 +71,9 @@ class HyperVNovaNetworkVIFPlugin(HyperVBaseVIFPlugin):
class HyperVVIFDriver(object):
def __init__(self):
self._metricsutils = utilsfactory.get_metricsutils()
self._netutils = utilsfactory.get_networkutils()
self._vmutils = utilsfactory.get_vmutils()
if nova.network.is_neutron():
self._vif_plugin = HyperVNeutronVIFPlugin()
else:
@ -86,6 +91,10 @@ class HyperVVIFDriver(object):
# before the ovs port is created.
self._netutils.connect_vnic_to_vswitch(CONF.hyperv.vswitch_name,
vif.id)
if CONF.hyperv.enable_instance_metrics_collection:
self._netutils.add_metrics_collection_acls(vif.id)
self.enable_metrics(instance.name, vif.id)
os_vif.plug(vif, instance)
else:
reason = _("Failed to plug virtual interface: "
@ -103,3 +112,26 @@ class HyperVVIFDriver(object):
else:
reason = _("unexpected vif_type=%s") % vif_type
raise exception.VirtualInterfaceUnplugException(reason=reason)
def enable_metrics(self, instance_name, vif_id):
# Hyper-V's metric collection API is extremely inconsistent.
# As opposed to other metrics, network metrics have to be enabled
# whenever the vm starts. Attempting to do so while the vm is shut off
# will fail. Also, this option gets reset when the vm is rebooted.
#
# Note that meter ACLs must already be set on the specified port.
# For "hyperv" ports, this is handled by networking-hyperv, while
# for OVS ports, we're doing it on the Nova side.
vm_state = self._vmutils.get_vm_state(instance_name)
if vm_state in [os_win_const.HYPERV_VM_STATE_ENABLED,
os_win_const.HYPERV_VM_STATE_PAUSED]:
LOG.debug("Enabling instance port metrics. "
"Instance name: %(instance_name)s. "
"Port name: %(port_name)s.",
dict(instance_name=instance_name,
port_name=vif_id))
self._metricsutils.enable_port_metrics_collection(vif_id)
else:
LOG.debug("Instance %s is not running. Port metrics will "
"be enabled when the instance starts.",
instance_name)

View File

@ -433,8 +433,7 @@ class VMOps(object):
vif['id'],
vif['address'])
if CONF.hyperv.enable_instance_metrics_collection:
self._metricsutils.enable_vm_metrics_collection(instance_name)
self.configure_instance_metrics(instance_name)
if secure_boot_enabled:
certificate_required = self._requires_certificate(image_meta)
@ -1103,6 +1102,26 @@ class VMOps(object):
for vif in network_info:
self._vif_driver.unplug(instance, vif)
def configure_instance_metrics(self, instance_name,
enable_network_metrics=False):
if not CONF.hyperv.enable_instance_metrics_collection:
LOG.debug("Instance metrics collection is not enabled.")
return
LOG.debug("Enabling instance %s metrics.", instance_name)
# Some metrics (cpu, memory and network) are disabled when the vms are
# powered off. This looks like a Hyper-V bug that we'll have to
# mitigate at the Nova driver level.
self._metricsutils.enable_vm_metrics_collection(instance_name)
# Network metrics are handled separately. The reason is that the vm
# must be running and the ports must be already attached in order to
# be able to enable those metrics.
if enable_network_metrics:
vif_ids = self._vmutils.get_vm_nic_names(instance_name)
for vif_id in vif_ids:
self._vif_driver.enable_metrics(instance_name, vif_id)
def _get_image_serial_port_settings(self, image_meta):
image_props = image_meta['properties']
serial_ports = {}

View File

@ -399,6 +399,7 @@ class BaseVolumeDriver(object):
self._diskutils = utilsfactory.get_diskutils()
self._vmutils = utilsfactory.get_vmutils()
self._migrutils = utilsfactory.get_migrationutils()
self._metricsutils = utilsfactory.get_metricsutils()
@property
def _connector(self):
@ -488,6 +489,21 @@ class BaseVolumeDriver(object):
ctrller_path,
slot)
self._configure_disk_metrics(disk_path)
def _configure_disk_metrics(self, disk_path):
if not CONF.hyperv.enable_instance_metrics_collection:
return
if self._is_block_dev:
LOG.warning("Hyper-V does not support collecting metrics for "
"passthrough disks (e.g. iSCSI/FC).")
return
LOG.debug("Enabling disk metrics: %s.", disk_path)
self._metricsutils.enable_disk_metrics_collection(
disk_path, is_physical=self._is_block_dev)
def detach_volume(self, connection_info, instance_name):
if self._migrutils.planned_vm_exists(instance_name):
LOG.warning("Instance %s is a Planned VM, cannot detach "

View File

@ -76,16 +76,23 @@ class EventHandlerTestCase(test_base.HyperVBaseTestCase):
@mock.patch.object(eventhandler.InstanceEventHandler, '_get_virt_event')
@mock.patch.object(utils, 'spawn_n')
def test_emit_event(self, mock_spawn, mock_get_event):
state = constants.HYPERV_VM_STATE_ENABLED
self.flags(enable_instance_metrics_collection=True,
group="hyperv")
self._event_handler._emit_event(mock.sentinel.instance_name,
mock.sentinel.instance_uuid,
mock.sentinel.instance_state)
state)
virt_event = mock_get_event.return_value
mock_spawn.assert_has_calls(
[mock.call(self._state_change_callback, virt_event),
mock.call(self._event_handler._vmops.configure_instance_metrics,
mock.sentinel.instance_name,
enable_network_metrics=True),
mock.call(self._event_handler._handle_serial_console_workers,
mock.sentinel.instance_name,
mock.sentinel.instance_state)])
state)])
def test_handle_serial_console_instance_running(self):
self._event_handler._handle_serial_console_workers(

View File

@ -45,6 +45,7 @@ class LiveMigrationOpsTestCase(test_base.HyperVBaseTestCase):
self.context = 'fake_context'
self._livemigrops = livemigrationops.LiveMigrationOps()
self._pathutils = self._livemigrops._pathutils
self._vmops = self._livemigrops._vmops
def _test_live_migration(self, side_effect=None,
shared_storage=False,
@ -239,9 +240,13 @@ class LiveMigrationOpsTestCase(test_base.HyperVBaseTestCase):
mock.sentinel.dest_comp_info)
def test_post_live_migration_at_destination(self):
mock_instance = fake_instance.fake_instance_obj(self.context)
self._livemigrops.post_live_migration_at_destination(
self.context, mock.sentinel.instance,
self.context, mock_instance,
network_info=mock.sentinel.NET_INFO,
block_migration=mock.sentinel.BLOCK_INFO)
self._livemigrops._vmops.plug_vifs.assert_called_once_with(
mock.sentinel.instance, mock.sentinel.NET_INFO)
mock_instance, mock.sentinel.NET_INFO)
self._vmops.configure_instance_metrics.assert_called_once_with(
mock_instance.name)

View File

@ -381,8 +381,6 @@ class MigrationOpsTestCase(test_base.HyperVBaseTestCase):
mock_update_disk_image_paths,
mock_check_and_update_disks,
mock_check_eph_disks):
self.flags(enable_instance_metrics_collection=True,
group='hyperv')
block_device_info = {'ephemerals': mock.sentinel.ephemerals}
mock_instance = fake_instance.fake_instance_obj(self.context)
@ -417,9 +415,8 @@ class MigrationOpsTestCase(test_base.HyperVBaseTestCase):
self._migrationops._vmops.configure_remotefx.assert_called_once_with(
mock_instance, get_image_vm_gen.return_value,
mock.sentinel.resize_instance)
mock_enable = (
self._migrationops._metricsutils.enable_vm_metrics_collection)
mock_enable.assert_called_once_with(mock_instance.name)
self._vmops.configure_instance_metrics.assert_called_once_with(
mock_instance.name)
def test_import_vm(self):
self._migrationops._import_vm(mock.sentinel.instance_dir)

View File

@ -14,9 +14,11 @@
# License for the specific language governing permissions and limitations
# under the License.
import ddt
import mock
from nova import exception
from nova.network import model
from os_win import constants as os_win_const
import compute_hyperv.nova.conf
from compute_hyperv.nova import vif
@ -41,12 +43,17 @@ class HyperVNovaNetworkVIFPluginTestCase(test_base.HyperVBaseTestCase):
'fake_vswitch_name', mock.sentinel.fake_id)
@ddt.ddt
class HyperVVIFDriverTestCase(test_base.HyperVBaseTestCase):
def setUp(self):
super(HyperVVIFDriverTestCase, self).setUp()
self.vif_driver = vif.HyperVVIFDriver()
self.vif_driver._vif_plugin = mock.MagicMock()
self._netutils = self.vif_driver._netutils
self._vmutils = self.vif_driver._vmutils
self._metricsutils = self.vif_driver._metricsutils
@mock.patch.object(vif.nova.network, 'is_neutron')
def test_init_neutron(self, mock_is_neutron):
mock_is_neutron.return_value = True
@ -70,11 +77,19 @@ class HyperVVIFDriverTestCase(test_base.HyperVBaseTestCase):
mock.sentinel.instance, vif)
@mock.patch.object(vif, 'os_vif')
@mock.patch.object(vif.HyperVVIFDriver, 'enable_metrics')
@mock.patch.object(vif.os_vif_util, 'nova_to_osvif_instance')
@mock.patch.object(vif.os_vif_util, 'nova_to_osvif_vif')
def test_plug_ovs(self, mock_nova_to_osvif_vif,
mock_nova_to_osvif_instance, mock_os_vif):
mock_nova_to_osvif_instance,
mock_enable_metrics, mock_os_vif):
self.flags(enable_instance_metrics_collection=True,
group='hyperv')
vif = {'type': model.VIF_TYPE_OVS}
osvif_instance = mock_nova_to_osvif_instance.return_value
vif_obj = mock_nova_to_osvif_vif.return_value
self.vif_driver.plug(mock.sentinel.instance, vif)
mock_nova_to_osvif_vif.assert_called_once_with(vif)
@ -82,10 +97,32 @@ class HyperVVIFDriverTestCase(test_base.HyperVBaseTestCase):
mock.sentinel.instance)
connect_vnic = self.vif_driver._netutils.connect_vnic_to_vswitch
connect_vnic.assert_called_once_with(
CONF.hyperv.vswitch_name, mock_nova_to_osvif_vif.return_value.id)
CONF.hyperv.vswitch_name, vif_obj.id)
mock_os_vif.plug.assert_called_once_with(
mock_nova_to_osvif_vif.return_value,
mock_nova_to_osvif_instance.return_value)
vif_obj, osvif_instance)
self._netutils.add_metrics_collection_acls.assert_called_once_with(
vif_obj.id)
mock_enable_metrics.assert_called_once_with(
osvif_instance.name, vif_obj.id)
@ddt.data(True, False)
def test_enable_metrics(self, vm_running):
state = (os_win_const.HYPERV_VM_STATE_ENABLED if vm_running
else os_win_const.HYPERV_VM_STATE_DISABLED)
self._vmutils.get_vm_state.return_value = state
enable_metrics = self._metricsutils.enable_port_metrics_collection
self.vif_driver.enable_metrics(mock.sentinel.instance_name,
mock.sentinel.vif_id)
self._vmutils.get_vm_state.assert_called_once_with(
mock.sentinel.instance_name)
if vm_running:
enable_metrics.assert_called_once_with(mock.sentinel.vif_id)
else:
enable_metrics.assert_not_called()
def test_plug_type_unknown(self):
vif = {'type': mock.sentinel.vif_type}

View File

@ -77,6 +77,8 @@ class VMOpsTestCase(test_base.HyperVBaseTestCase):
self._vmops = vmops.VMOps(virtapi=mock.MagicMock())
self._pathutils = self._vmops._pathutils
self._vmutils = self._vmops._vmutils
self._metricsutils = self._vmops._metricsutils
self._vif_driver = self._vmops._vif_driver
def test_list_instances(self):
mock_instance = mock.MagicMock()
@ -647,6 +649,7 @@ class VMOpsTestCase(test_base.HyperVBaseTestCase):
self.assertEqual([], events)
mock_is_neutron.assert_called_once_with()
@mock.patch.object(vmops.VMOps, 'configure_instance_metrics')
@mock.patch.object(vmops.VMOps, 'update_vm_resources')
@mock.patch.object(vmops.VMOps, '_configure_secure_vm')
@mock.patch.object(vmops.VMOps, '_requires_secure_boot')
@ -666,9 +669,8 @@ class VMOpsTestCase(test_base.HyperVBaseTestCase):
mock_requires_certificate,
mock_requires_secure_boot,
mock_configure_secure_vm,
mock_update_vm_resources):
self.flags(enable_instance_metrics_collection=True,
group='hyperv')
mock_update_vm_resources,
mock_configure_metrics):
root_device_info = mock.sentinel.ROOT_DEV_INFO
block_device_info = {'root_disk': root_device_info, 'ephemerals': [],
'block_device_mapping': []}
@ -713,8 +715,7 @@ class VMOpsTestCase(test_base.HyperVBaseTestCase):
self._vmops._vmutils.create_nic.assert_called_once_with(
mock_instance.name, mock.sentinel.ID, mock.sentinel.ADDRESS)
mock_enable = self._vmops._metricsutils.enable_vm_metrics_collection
mock_enable.assert_called_once_with(mock_instance.name)
mock_configure_metrics.assert_called_once_with(mock_instance.name)
mock_requires_secure_boot.assert_called_once_with(
mock_instance, mock.sentinel.image_meta, mock.sentinel.vm_gen)
mock_requires_certificate.assert_called_once_with(
@ -1721,6 +1722,39 @@ class VMOpsTestCase(test_base.HyperVBaseTestCase):
network_info=mock_network_info)
self._vmops._vif_driver.unplug.assert_has_calls(calls)
@ddt.data({},
{'metrics_enabled': False},
{'enable_network_metrics': False})
@ddt.unpack
def test_configure_instance_metrics(self, metrics_enabled=True,
enable_network_metrics=True):
port_names = ['port1', 'port2']
enable_vm_metrics = self._metricsutils.enable_vm_metrics_collection
self._vmutils.get_vm_nic_names.return_value = port_names
self.flags(enable_instance_metrics_collection=metrics_enabled,
group='hyperv')
self._vmops.configure_instance_metrics(
mock.sentinel.instance_name,
enable_network_metrics=enable_network_metrics)
if metrics_enabled:
enable_vm_metrics.assert_called_once_with(
mock.sentinel.instance_name)
if enable_network_metrics:
self._vmutils.get_vm_nic_names.assert_called_once_with(
mock.sentinel.instance_name)
self._vif_driver.enable_metrics.assert_has_calls(
[mock.call(mock.sentinel.instance_name, port_name)
for port_name in port_names])
else:
enable_vm_metrics.assert_not_called()
if not (metrics_enabled and enable_network_metrics):
self._vif_driver.enable_metrics.assert_not_called()
def _setup_remotefx_mocks(self):
mock_instance = fake_instance.fake_instance_obj(self.context)
mock_instance.flavor.extra_specs = {

View File

@ -539,6 +539,7 @@ class BaseVolumeDriverTestCase(test_base.HyperVBaseTestCase):
self._vmutils = self._base_vol_driver._vmutils
self._migrutils = self._base_vol_driver._migrutils
self._diskutils = self._base_vol_driver._diskutils
self._metricsutils = self._base_vol_driver._metricsutils
self._conn = self._base_vol_driver._conn
@mock.patch.object(connector.InitiatorConnector, 'factory')
@ -673,6 +674,8 @@ class BaseVolumeDriverTestCase(test_base.HyperVBaseTestCase):
else:
self._base_vol_driver._check_san_policy()
@mock.patch.object(volumeops.BaseVolumeDriver,
'_configure_disk_metrics')
@mock.patch.object(volumeops.BaseVolumeDriver,
'_get_disk_res_path')
@mock.patch.object(volumeops.BaseVolumeDriver, '_get_disk_ctrl_and_slot')
@ -684,6 +687,7 @@ class BaseVolumeDriverTestCase(test_base.HyperVBaseTestCase):
mock_connect_volume,
mock_get_disk_ctrl_and_slot,
mock_get_disk_res_path,
mock_configure_metrics,
is_block_dev=True):
connection_info = get_fake_connection_info()
self._base_vol_driver._is_block_dev = is_block_dev
@ -719,6 +723,7 @@ class BaseVolumeDriverTestCase(test_base.HyperVBaseTestCase):
mock_get_disk_ctrl_and_slot.assert_called_once_with(
mock.sentinel.instance_name, mock.sentinel.disk_bus)
mock_validate_host_config.assert_called_once_with()
mock_configure_metrics.assert_called_once_with(mock.sentinel.disk_path)
def test_attach_volume_image_file(self):
self._test_attach_volume(is_block_dev=False)
@ -731,6 +736,27 @@ class BaseVolumeDriverTestCase(test_base.HyperVBaseTestCase):
mock.sentinel.inst_name)
self._vmutils.detach_vm_disk.assert_not_called()
@ddt.data({},
{'metrics_enabled': False},
{'is_block_dev': True})
@ddt.unpack
def test_configure_disk_metrics(self, metrics_enabled=True,
is_block_dev=False):
self.flags(enable_instance_metrics_collection=metrics_enabled,
group='hyperv')
self._base_vol_driver._is_block_dev = is_block_dev
enable_metrics = self._metricsutils.enable_disk_metrics_collection
self._base_vol_driver._configure_disk_metrics(mock.sentinel.disk_path)
if metrics_enabled and not is_block_dev:
enable_metrics.assert_called_once_with(
mock.sentinel.disk_path,
is_physical=is_block_dev)
else:
enable_metrics.assert_not_called()
@ddt.data(True, False)
@mock.patch.object(volumeops.BaseVolumeDriver,
'get_disk_resource_path')