From 8c44c7a960f99b582c733118b7c2d24dcca51b79 Mon Sep 17 00:00:00 2001 From: Lucian Petrut Date: Wed, 26 Sep 2018 11:27:17 +0300 Subject: [PATCH] Ensure instance metrics get collected The Hyper-V metric collection API has the following limitations: * some metrics get disabled when the instance is rebooted (network, cpu and memory) * network metrics cannot be enabled while the vm is stopped * metrics cannot be collected for passthrough disks * some times, the metric counters are reset when the vms are rebooted. We can't do much about this Hyper-V bug, which is unfortunately affecting the reliability of the collected data. At the same time, OVS ports and Cinder volumes aren't handled at all. To overcome those issues, this patch brings the following changes: * re-enable vm metrics each time the vms boot * enable vm metrics after live migrations * enable metrics metrics when binding OVS ports (networking-hyperv will handle 'hyperv' ports) * enable disk metrics when attaching SMB Cinder volumes, logging a warning for passthrough disks Closes-Bug: #1794456 Depends-On: Ibe7409b392b91204f0cea2bdd32c86ec03d06b67 Depends-On: Iaba32ca94b2c09661f150fa4b12b2c7eb83ff6d9 Change-Id: I94f2be20b02531d3ceabdef4272207e37ef34930 --- compute_hyperv/nova/eventhandler.py | 8 ++++ compute_hyperv/nova/livemigrationops.py | 1 + compute_hyperv/nova/migrationops.py | 3 +- compute_hyperv/nova/vif.py | 32 +++++++++++++ compute_hyperv/nova/vmops.py | 23 +++++++++- compute_hyperv/nova/volumeops.py | 16 +++++++ .../tests/unit/test_eventhandler.py | 11 ++++- .../tests/unit/test_livemigrationops.py | 9 +++- .../tests/unit/test_migrationops.py | 7 +-- compute_hyperv/tests/unit/test_vif.py | 45 +++++++++++++++++-- compute_hyperv/tests/unit/test_vmops.py | 44 +++++++++++++++--- compute_hyperv/tests/unit/test_volumeops.py | 26 +++++++++++ 12 files changed, 203 insertions(+), 22 deletions(-) diff --git a/compute_hyperv/nova/eventhandler.py b/compute_hyperv/nova/eventhandler.py index d0916fe4..eb64a4db 100644 --- a/compute_hyperv/nova/eventhandler.py +++ b/compute_hyperv/nova/eventhandler.py @@ -72,6 +72,14 @@ class InstanceEventHandler(object): instance_state) utils.spawn_n(self._state_change_callback, virt_event) + should_enable_metrics = ( + CONF.hyperv.enable_instance_metrics_collection and + instance_state == constants.HYPERV_VM_STATE_ENABLED) + if should_enable_metrics: + utils.spawn_n(self._vmops.configure_instance_metrics, + instance_name, + enable_network_metrics=True) + utils.spawn_n(self._handle_serial_console_workers, instance_name, instance_state) diff --git a/compute_hyperv/nova/livemigrationops.py b/compute_hyperv/nova/livemigrationops.py index b50f4c3a..ceb07be2 100644 --- a/compute_hyperv/nova/livemigrationops.py +++ b/compute_hyperv/nova/livemigrationops.py @@ -126,6 +126,7 @@ class LiveMigrationOps(object): LOG.debug("post_live_migration_at_destination called", instance=instance_ref) self._vmops.plug_vifs(instance_ref, network_info) + self._vmops.configure_instance_metrics(instance_ref.name) def check_can_live_migrate_destination(self, ctxt, instance_ref, src_compute_info, dst_compute_info, diff --git a/compute_hyperv/nova/migrationops.py b/compute_hyperv/nova/migrationops.py index 05b0fdb4..98e77fc3 100644 --- a/compute_hyperv/nova/migrationops.py +++ b/compute_hyperv/nova/migrationops.py @@ -325,8 +325,7 @@ class MigrationOps(object): self._check_ephemeral_disks(instance, ephemerals, resize_instance) self._vmops.configure_remotefx(instance, vm_gen, resize_instance) - if CONF.hyperv.enable_instance_metrics_collection: - self._metricsutils.enable_vm_metrics_collection(instance.name) + self._vmops.configure_instance_metrics(instance.name) def _import_vm(self, instance_dir): snapshot_dir = self._pathutils.get_instance_snapshot_dir( diff --git a/compute_hyperv/nova/vif.py b/compute_hyperv/nova/vif.py index a7e559c2..9bca1686 100644 --- a/compute_hyperv/nova/vif.py +++ b/compute_hyperv/nova/vif.py @@ -22,10 +22,13 @@ import nova.network from nova.network import model from nova.network import os_vif_util import os_vif +from os_win import constants as os_win_const from os_win import utilsfactory +from oslo_log import log as logging import compute_hyperv.nova.conf +LOG = logging.getLogger(__name__) CONF = compute_hyperv.nova.conf.CONF @@ -68,7 +71,9 @@ class HyperVNovaNetworkVIFPlugin(HyperVBaseVIFPlugin): class HyperVVIFDriver(object): def __init__(self): + self._metricsutils = utilsfactory.get_metricsutils() self._netutils = utilsfactory.get_networkutils() + self._vmutils = utilsfactory.get_vmutils() if nova.network.is_neutron(): self._vif_plugin = HyperVNeutronVIFPlugin() else: @@ -86,6 +91,10 @@ class HyperVVIFDriver(object): # before the ovs port is created. self._netutils.connect_vnic_to_vswitch(CONF.hyperv.vswitch_name, vif.id) + if CONF.hyperv.enable_instance_metrics_collection: + self._netutils.add_metrics_collection_acls(vif.id) + self.enable_metrics(instance.name, vif.id) + os_vif.plug(vif, instance) else: reason = _("Failed to plug virtual interface: " @@ -103,3 +112,26 @@ class HyperVVIFDriver(object): else: reason = _("unexpected vif_type=%s") % vif_type raise exception.VirtualInterfaceUnplugException(reason=reason) + + def enable_metrics(self, instance_name, vif_id): + # Hyper-V's metric collection API is extremely inconsistent. + # As opposed to other metrics, network metrics have to be enabled + # whenever the vm starts. Attempting to do so while the vm is shut off + # will fail. Also, this option gets reset when the vm is rebooted. + # + # Note that meter ACLs must already be set on the specified port. + # For "hyperv" ports, this is handled by networking-hyperv, while + # for OVS ports, we're doing it on the Nova side. + vm_state = self._vmutils.get_vm_state(instance_name) + if vm_state in [os_win_const.HYPERV_VM_STATE_ENABLED, + os_win_const.HYPERV_VM_STATE_PAUSED]: + LOG.debug("Enabling instance port metrics. " + "Instance name: %(instance_name)s. " + "Port name: %(port_name)s.", + dict(instance_name=instance_name, + port_name=vif_id)) + self._metricsutils.enable_port_metrics_collection(vif_id) + else: + LOG.debug("Instance %s is not running. Port metrics will " + "be enabled when the instance starts.", + instance_name) diff --git a/compute_hyperv/nova/vmops.py b/compute_hyperv/nova/vmops.py index 5e2bc7e6..0d98947d 100644 --- a/compute_hyperv/nova/vmops.py +++ b/compute_hyperv/nova/vmops.py @@ -433,8 +433,7 @@ class VMOps(object): vif['id'], vif['address']) - if CONF.hyperv.enable_instance_metrics_collection: - self._metricsutils.enable_vm_metrics_collection(instance_name) + self.configure_instance_metrics(instance_name) if secure_boot_enabled: certificate_required = self._requires_certificate(image_meta) @@ -1103,6 +1102,26 @@ class VMOps(object): for vif in network_info: self._vif_driver.unplug(instance, vif) + def configure_instance_metrics(self, instance_name, + enable_network_metrics=False): + if not CONF.hyperv.enable_instance_metrics_collection: + LOG.debug("Instance metrics collection is not enabled.") + return + + LOG.debug("Enabling instance %s metrics.", instance_name) + # Some metrics (cpu, memory and network) are disabled when the vms are + # powered off. This looks like a Hyper-V bug that we'll have to + # mitigate at the Nova driver level. + self._metricsutils.enable_vm_metrics_collection(instance_name) + + # Network metrics are handled separately. The reason is that the vm + # must be running and the ports must be already attached in order to + # be able to enable those metrics. + if enable_network_metrics: + vif_ids = self._vmutils.get_vm_nic_names(instance_name) + for vif_id in vif_ids: + self._vif_driver.enable_metrics(instance_name, vif_id) + def _get_image_serial_port_settings(self, image_meta): image_props = image_meta['properties'] serial_ports = {} diff --git a/compute_hyperv/nova/volumeops.py b/compute_hyperv/nova/volumeops.py index ebe27c4e..c1eff6f9 100644 --- a/compute_hyperv/nova/volumeops.py +++ b/compute_hyperv/nova/volumeops.py @@ -399,6 +399,7 @@ class BaseVolumeDriver(object): self._diskutils = utilsfactory.get_diskutils() self._vmutils = utilsfactory.get_vmutils() self._migrutils = utilsfactory.get_migrationutils() + self._metricsutils = utilsfactory.get_metricsutils() @property def _connector(self): @@ -488,6 +489,21 @@ class BaseVolumeDriver(object): ctrller_path, slot) + self._configure_disk_metrics(disk_path) + + def _configure_disk_metrics(self, disk_path): + if not CONF.hyperv.enable_instance_metrics_collection: + return + + if self._is_block_dev: + LOG.warning("Hyper-V does not support collecting metrics for " + "passthrough disks (e.g. iSCSI/FC).") + return + + LOG.debug("Enabling disk metrics: %s.", disk_path) + self._metricsutils.enable_disk_metrics_collection( + disk_path, is_physical=self._is_block_dev) + def detach_volume(self, connection_info, instance_name): if self._migrutils.planned_vm_exists(instance_name): LOG.warning("Instance %s is a Planned VM, cannot detach " diff --git a/compute_hyperv/tests/unit/test_eventhandler.py b/compute_hyperv/tests/unit/test_eventhandler.py index 04ccafb0..e1a64e1b 100644 --- a/compute_hyperv/tests/unit/test_eventhandler.py +++ b/compute_hyperv/tests/unit/test_eventhandler.py @@ -76,16 +76,23 @@ class EventHandlerTestCase(test_base.HyperVBaseTestCase): @mock.patch.object(eventhandler.InstanceEventHandler, '_get_virt_event') @mock.patch.object(utils, 'spawn_n') def test_emit_event(self, mock_spawn, mock_get_event): + state = constants.HYPERV_VM_STATE_ENABLED + self.flags(enable_instance_metrics_collection=True, + group="hyperv") + self._event_handler._emit_event(mock.sentinel.instance_name, mock.sentinel.instance_uuid, - mock.sentinel.instance_state) + state) virt_event = mock_get_event.return_value mock_spawn.assert_has_calls( [mock.call(self._state_change_callback, virt_event), + mock.call(self._event_handler._vmops.configure_instance_metrics, + mock.sentinel.instance_name, + enable_network_metrics=True), mock.call(self._event_handler._handle_serial_console_workers, mock.sentinel.instance_name, - mock.sentinel.instance_state)]) + state)]) def test_handle_serial_console_instance_running(self): self._event_handler._handle_serial_console_workers( diff --git a/compute_hyperv/tests/unit/test_livemigrationops.py b/compute_hyperv/tests/unit/test_livemigrationops.py index 464ab196..e62f0c0c 100644 --- a/compute_hyperv/tests/unit/test_livemigrationops.py +++ b/compute_hyperv/tests/unit/test_livemigrationops.py @@ -45,6 +45,7 @@ class LiveMigrationOpsTestCase(test_base.HyperVBaseTestCase): self.context = 'fake_context' self._livemigrops = livemigrationops.LiveMigrationOps() self._pathutils = self._livemigrops._pathutils + self._vmops = self._livemigrops._vmops def _test_live_migration(self, side_effect=None, shared_storage=False, @@ -239,9 +240,13 @@ class LiveMigrationOpsTestCase(test_base.HyperVBaseTestCase): mock.sentinel.dest_comp_info) def test_post_live_migration_at_destination(self): + mock_instance = fake_instance.fake_instance_obj(self.context) + self._livemigrops.post_live_migration_at_destination( - self.context, mock.sentinel.instance, + self.context, mock_instance, network_info=mock.sentinel.NET_INFO, block_migration=mock.sentinel.BLOCK_INFO) self._livemigrops._vmops.plug_vifs.assert_called_once_with( - mock.sentinel.instance, mock.sentinel.NET_INFO) + mock_instance, mock.sentinel.NET_INFO) + self._vmops.configure_instance_metrics.assert_called_once_with( + mock_instance.name) diff --git a/compute_hyperv/tests/unit/test_migrationops.py b/compute_hyperv/tests/unit/test_migrationops.py index ddcb8e29..669e1c7e 100644 --- a/compute_hyperv/tests/unit/test_migrationops.py +++ b/compute_hyperv/tests/unit/test_migrationops.py @@ -381,8 +381,6 @@ class MigrationOpsTestCase(test_base.HyperVBaseTestCase): mock_update_disk_image_paths, mock_check_and_update_disks, mock_check_eph_disks): - self.flags(enable_instance_metrics_collection=True, - group='hyperv') block_device_info = {'ephemerals': mock.sentinel.ephemerals} mock_instance = fake_instance.fake_instance_obj(self.context) @@ -417,9 +415,8 @@ class MigrationOpsTestCase(test_base.HyperVBaseTestCase): self._migrationops._vmops.configure_remotefx.assert_called_once_with( mock_instance, get_image_vm_gen.return_value, mock.sentinel.resize_instance) - mock_enable = ( - self._migrationops._metricsutils.enable_vm_metrics_collection) - mock_enable.assert_called_once_with(mock_instance.name) + self._vmops.configure_instance_metrics.assert_called_once_with( + mock_instance.name) def test_import_vm(self): self._migrationops._import_vm(mock.sentinel.instance_dir) diff --git a/compute_hyperv/tests/unit/test_vif.py b/compute_hyperv/tests/unit/test_vif.py index 0820cbaf..385161c4 100644 --- a/compute_hyperv/tests/unit/test_vif.py +++ b/compute_hyperv/tests/unit/test_vif.py @@ -14,9 +14,11 @@ # License for the specific language governing permissions and limitations # under the License. +import ddt import mock from nova import exception from nova.network import model +from os_win import constants as os_win_const import compute_hyperv.nova.conf from compute_hyperv.nova import vif @@ -41,12 +43,17 @@ class HyperVNovaNetworkVIFPluginTestCase(test_base.HyperVBaseTestCase): 'fake_vswitch_name', mock.sentinel.fake_id) +@ddt.ddt class HyperVVIFDriverTestCase(test_base.HyperVBaseTestCase): def setUp(self): super(HyperVVIFDriverTestCase, self).setUp() self.vif_driver = vif.HyperVVIFDriver() self.vif_driver._vif_plugin = mock.MagicMock() + self._netutils = self.vif_driver._netutils + self._vmutils = self.vif_driver._vmutils + self._metricsutils = self.vif_driver._metricsutils + @mock.patch.object(vif.nova.network, 'is_neutron') def test_init_neutron(self, mock_is_neutron): mock_is_neutron.return_value = True @@ -70,11 +77,19 @@ class HyperVVIFDriverTestCase(test_base.HyperVBaseTestCase): mock.sentinel.instance, vif) @mock.patch.object(vif, 'os_vif') + @mock.patch.object(vif.HyperVVIFDriver, 'enable_metrics') @mock.patch.object(vif.os_vif_util, 'nova_to_osvif_instance') @mock.patch.object(vif.os_vif_util, 'nova_to_osvif_vif') def test_plug_ovs(self, mock_nova_to_osvif_vif, - mock_nova_to_osvif_instance, mock_os_vif): + mock_nova_to_osvif_instance, + mock_enable_metrics, mock_os_vif): + self.flags(enable_instance_metrics_collection=True, + group='hyperv') + vif = {'type': model.VIF_TYPE_OVS} + osvif_instance = mock_nova_to_osvif_instance.return_value + vif_obj = mock_nova_to_osvif_vif.return_value + self.vif_driver.plug(mock.sentinel.instance, vif) mock_nova_to_osvif_vif.assert_called_once_with(vif) @@ -82,10 +97,32 @@ class HyperVVIFDriverTestCase(test_base.HyperVBaseTestCase): mock.sentinel.instance) connect_vnic = self.vif_driver._netutils.connect_vnic_to_vswitch connect_vnic.assert_called_once_with( - CONF.hyperv.vswitch_name, mock_nova_to_osvif_vif.return_value.id) + CONF.hyperv.vswitch_name, vif_obj.id) mock_os_vif.plug.assert_called_once_with( - mock_nova_to_osvif_vif.return_value, - mock_nova_to_osvif_instance.return_value) + vif_obj, osvif_instance) + + self._netutils.add_metrics_collection_acls.assert_called_once_with( + vif_obj.id) + mock_enable_metrics.assert_called_once_with( + osvif_instance.name, vif_obj.id) + + @ddt.data(True, False) + def test_enable_metrics(self, vm_running): + state = (os_win_const.HYPERV_VM_STATE_ENABLED if vm_running + else os_win_const.HYPERV_VM_STATE_DISABLED) + self._vmutils.get_vm_state.return_value = state + + enable_metrics = self._metricsutils.enable_port_metrics_collection + + self.vif_driver.enable_metrics(mock.sentinel.instance_name, + mock.sentinel.vif_id) + + self._vmutils.get_vm_state.assert_called_once_with( + mock.sentinel.instance_name) + if vm_running: + enable_metrics.assert_called_once_with(mock.sentinel.vif_id) + else: + enable_metrics.assert_not_called() def test_plug_type_unknown(self): vif = {'type': mock.sentinel.vif_type} diff --git a/compute_hyperv/tests/unit/test_vmops.py b/compute_hyperv/tests/unit/test_vmops.py index 954e8bb9..48c880bd 100644 --- a/compute_hyperv/tests/unit/test_vmops.py +++ b/compute_hyperv/tests/unit/test_vmops.py @@ -77,6 +77,8 @@ class VMOpsTestCase(test_base.HyperVBaseTestCase): self._vmops = vmops.VMOps(virtapi=mock.MagicMock()) self._pathutils = self._vmops._pathutils self._vmutils = self._vmops._vmutils + self._metricsutils = self._vmops._metricsutils + self._vif_driver = self._vmops._vif_driver def test_list_instances(self): mock_instance = mock.MagicMock() @@ -647,6 +649,7 @@ class VMOpsTestCase(test_base.HyperVBaseTestCase): self.assertEqual([], events) mock_is_neutron.assert_called_once_with() + @mock.patch.object(vmops.VMOps, 'configure_instance_metrics') @mock.patch.object(vmops.VMOps, 'update_vm_resources') @mock.patch.object(vmops.VMOps, '_configure_secure_vm') @mock.patch.object(vmops.VMOps, '_requires_secure_boot') @@ -666,9 +669,8 @@ class VMOpsTestCase(test_base.HyperVBaseTestCase): mock_requires_certificate, mock_requires_secure_boot, mock_configure_secure_vm, - mock_update_vm_resources): - self.flags(enable_instance_metrics_collection=True, - group='hyperv') + mock_update_vm_resources, + mock_configure_metrics): root_device_info = mock.sentinel.ROOT_DEV_INFO block_device_info = {'root_disk': root_device_info, 'ephemerals': [], 'block_device_mapping': []} @@ -713,8 +715,7 @@ class VMOpsTestCase(test_base.HyperVBaseTestCase): self._vmops._vmutils.create_nic.assert_called_once_with( mock_instance.name, mock.sentinel.ID, mock.sentinel.ADDRESS) - mock_enable = self._vmops._metricsutils.enable_vm_metrics_collection - mock_enable.assert_called_once_with(mock_instance.name) + mock_configure_metrics.assert_called_once_with(mock_instance.name) mock_requires_secure_boot.assert_called_once_with( mock_instance, mock.sentinel.image_meta, mock.sentinel.vm_gen) mock_requires_certificate.assert_called_once_with( @@ -1721,6 +1722,39 @@ class VMOpsTestCase(test_base.HyperVBaseTestCase): network_info=mock_network_info) self._vmops._vif_driver.unplug.assert_has_calls(calls) + @ddt.data({}, + {'metrics_enabled': False}, + {'enable_network_metrics': False}) + @ddt.unpack + def test_configure_instance_metrics(self, metrics_enabled=True, + enable_network_metrics=True): + port_names = ['port1', 'port2'] + + enable_vm_metrics = self._metricsutils.enable_vm_metrics_collection + self._vmutils.get_vm_nic_names.return_value = port_names + + self.flags(enable_instance_metrics_collection=metrics_enabled, + group='hyperv') + + self._vmops.configure_instance_metrics( + mock.sentinel.instance_name, + enable_network_metrics=enable_network_metrics) + + if metrics_enabled: + enable_vm_metrics.assert_called_once_with( + mock.sentinel.instance_name) + if enable_network_metrics: + self._vmutils.get_vm_nic_names.assert_called_once_with( + mock.sentinel.instance_name) + self._vif_driver.enable_metrics.assert_has_calls( + [mock.call(mock.sentinel.instance_name, port_name) + for port_name in port_names]) + else: + enable_vm_metrics.assert_not_called() + + if not (metrics_enabled and enable_network_metrics): + self._vif_driver.enable_metrics.assert_not_called() + def _setup_remotefx_mocks(self): mock_instance = fake_instance.fake_instance_obj(self.context) mock_instance.flavor.extra_specs = { diff --git a/compute_hyperv/tests/unit/test_volumeops.py b/compute_hyperv/tests/unit/test_volumeops.py index be1cf94e..b8f0405e 100644 --- a/compute_hyperv/tests/unit/test_volumeops.py +++ b/compute_hyperv/tests/unit/test_volumeops.py @@ -539,6 +539,7 @@ class BaseVolumeDriverTestCase(test_base.HyperVBaseTestCase): self._vmutils = self._base_vol_driver._vmutils self._migrutils = self._base_vol_driver._migrutils self._diskutils = self._base_vol_driver._diskutils + self._metricsutils = self._base_vol_driver._metricsutils self._conn = self._base_vol_driver._conn @mock.patch.object(connector.InitiatorConnector, 'factory') @@ -673,6 +674,8 @@ class BaseVolumeDriverTestCase(test_base.HyperVBaseTestCase): else: self._base_vol_driver._check_san_policy() + @mock.patch.object(volumeops.BaseVolumeDriver, + '_configure_disk_metrics') @mock.patch.object(volumeops.BaseVolumeDriver, '_get_disk_res_path') @mock.patch.object(volumeops.BaseVolumeDriver, '_get_disk_ctrl_and_slot') @@ -684,6 +687,7 @@ class BaseVolumeDriverTestCase(test_base.HyperVBaseTestCase): mock_connect_volume, mock_get_disk_ctrl_and_slot, mock_get_disk_res_path, + mock_configure_metrics, is_block_dev=True): connection_info = get_fake_connection_info() self._base_vol_driver._is_block_dev = is_block_dev @@ -719,6 +723,7 @@ class BaseVolumeDriverTestCase(test_base.HyperVBaseTestCase): mock_get_disk_ctrl_and_slot.assert_called_once_with( mock.sentinel.instance_name, mock.sentinel.disk_bus) mock_validate_host_config.assert_called_once_with() + mock_configure_metrics.assert_called_once_with(mock.sentinel.disk_path) def test_attach_volume_image_file(self): self._test_attach_volume(is_block_dev=False) @@ -731,6 +736,27 @@ class BaseVolumeDriverTestCase(test_base.HyperVBaseTestCase): mock.sentinel.inst_name) self._vmutils.detach_vm_disk.assert_not_called() + @ddt.data({}, + {'metrics_enabled': False}, + {'is_block_dev': True}) + @ddt.unpack + def test_configure_disk_metrics(self, metrics_enabled=True, + is_block_dev=False): + self.flags(enable_instance_metrics_collection=metrics_enabled, + group='hyperv') + self._base_vol_driver._is_block_dev = is_block_dev + + enable_metrics = self._metricsutils.enable_disk_metrics_collection + + self._base_vol_driver._configure_disk_metrics(mock.sentinel.disk_path) + + if metrics_enabled and not is_block_dev: + enable_metrics.assert_called_once_with( + mock.sentinel.disk_path, + is_physical=is_block_dev) + else: + enable_metrics.assert_not_called() + @ddt.data(True, False) @mock.patch.object(volumeops.BaseVolumeDriver, 'get_disk_resource_path')