From 9999bce00f5bea5f3e90ab9e16625d4237504bcb Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Tue, 16 Oct 2018 17:41:17 +0100 Subject: [PATCH] Fail to live migration if instance has a NUMA topology Live migration is currently totally broken if a NUMA topology is present. This affects everything that's been regrettably stuffed in with NUMA topology including CPU pinning, hugepage support and emulator thread support. Side effects can range from simple unexpected performance hits (due to instances running on the same cores) to complete failures (due to instance cores or huge pages being mapped to CPUs/NUMA nodes that don't exist on the destination host). Until such a time as we resolve these issues, we should alert users to the fact that such issues exist. A workaround option is provided for operators that _really_ need the broken behavior, but it's defaulted to False to highlight the brokenness of this feature to unsuspecting operators. Conflicts: nova/conf/workarounds.py nova/tests/unit/api/openstack/compute/admin_only_action_common.py nova/tests/unit/api/openstack/compute/test_migrate_server.py nova/tests/unit/conductor/tasks/test_live_migrate.py NOTE(stephenfin): stable/rocky conflicts due to removal of 'report_ironic_standard_resource_class_inventory' option and addition of change Iaea1cb4ed93bb98f451de4f993106d7891ca3682 on master. NOTE(stephenfin): stable/queens conflicts due to presence of the 'enable_consoleauth' configuration option and change I83b473e9ba557545b5c186f979e068e442de2424 (Mox to mock) in stable/rocky. A hyperlink is removed from the config option help text as the version of 'oslo.config' used here does not parse help text as rST (bug 1755783). Change-Id: I217fba9138132b107e9d62895d699d238392e761 Signed-off-by: Stephen Finucane Related-bug: #1289064 (cherry picked from commit ae2e5650d14a2c81dd397727d67b60f9b8dd0dd7) (cherry picked from commit 52b89734426253f64b6d4797ba4d849c3020fb52) --- doc/source/admin/adv-config.rst | 2 + doc/source/admin/configuring-migrations.rst | 3 +- doc/source/admin/cpu-topologies.rst | 2 + .../common/numa-live-migration-warning.txt | 10 +++++ doc/source/user/feature-classification.rst | 2 + nova/api/openstack/compute/migrate_server.py | 6 ++- nova/conductor/tasks/live_migrate.py | 26 +++++++++++ nova/conf/workarounds.py | 26 +++++++++++ .../compute/admin_only_action_common.py | 37 ++++++++++------ .../openstack/compute/test_migrate_server.py | 8 ++-- .../unit/conductor/tasks/test_live_migrate.py | 43 ++++++++++++++++++- ...-migration-with-numa-bc710a1bcde25957.yaml | 25 +++++++++++ 12 files changed, 169 insertions(+), 21 deletions(-) create mode 100644 doc/source/common/numa-live-migration-warning.txt create mode 100644 releasenotes/notes/disable-live-migration-with-numa-bc710a1bcde25957.yaml diff --git a/doc/source/admin/adv-config.rst b/doc/source/admin/adv-config.rst index 61c101e8a47b..df0631cd76d7 100644 --- a/doc/source/admin/adv-config.rst +++ b/doc/source/admin/adv-config.rst @@ -20,6 +20,8 @@ vital. In these cases, instances can be expected to deliver near-native performance. The Compute service provides features to improve individual instance for these kind of workloads. +.. include:: /common/numa-live-migration-warning.txt + .. toctree:: :maxdepth: 2 diff --git a/doc/source/admin/configuring-migrations.rst b/doc/source/admin/configuring-migrations.rst index 260001d6904a..b6f13fcfe369 100644 --- a/doc/source/admin/configuring-migrations.rst +++ b/doc/source/admin/configuring-migrations.rst @@ -19,7 +19,8 @@ This document covers live migrations using the .. note:: Not all Compute service hypervisor drivers support live-migration, or - support all live-migration features. + support all live-migration features. Similarly not all compute service + features are supported. Consult :doc:`/user/support-matrix` to determine which hypervisors support live-migration. diff --git a/doc/source/admin/cpu-topologies.rst b/doc/source/admin/cpu-topologies.rst index e3c60d4fbc3a..2f3dbeb19d72 100644 --- a/doc/source/admin/cpu-topologies.rst +++ b/doc/source/admin/cpu-topologies.rst @@ -7,6 +7,8 @@ control over how instances run on hypervisor CPUs and the topology of virtual CPUs available to instances. These features help minimize latency and maximize performance. +.. include:: /common/numa-live-migration-warning.txt + SMP, NUMA, and SMT ~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/common/numa-live-migration-warning.txt b/doc/source/common/numa-live-migration-warning.txt new file mode 100644 index 000000000000..7b695f218830 --- /dev/null +++ b/doc/source/common/numa-live-migration-warning.txt @@ -0,0 +1,10 @@ +.. important:: + + Unless :oslo.config:option:`specifically enabled + `, live migration is not currently + possible for instances with a NUMA topology when using the libvirt driver. + A NUMA topology may be specified explicitly or can be added implicitly due + to the use of CPU pinning or huge pages. Refer to `bug #1289064`__ for more + information. + + __ https://bugs.launchpad.net/nova/+bug/1289064 diff --git a/doc/source/user/feature-classification.rst b/doc/source/user/feature-classification.rst index ceeafe572f5d..db0ce886d973 100644 --- a/doc/source/user/feature-classification.rst +++ b/doc/source/user/feature-classification.rst @@ -65,6 +65,8 @@ create a particular service. It is common for this workloads needing bare metal like performance, i.e. low latency and close to line speed performance. +.. include:: /common/numa-live-migration-warning.txt + .. feature_matrix:: feature-matrix-nfv.ini .. _matrix-hpc: diff --git a/nova/api/openstack/compute/migrate_server.py b/nova/api/openstack/compute/migrate_server.py index a85679ccad33..23c8735281a6 100644 --- a/nova/api/openstack/compute/migrate_server.py +++ b/nova/api/openstack/compute/migrate_server.py @@ -100,7 +100,11 @@ class MigrateServerController(wsgi.Controller): disk_over_commit = strutils.bool_from_string(disk_over_commit, strict=True) - instance = common.get_instance(self.compute_api, context, id) + # NOTE(stephenfin): we need 'numa_topology' because of the + # 'LiveMigrationTask._check_instance_has_no_numa' check in the + # conductor + instance = common.get_instance(self.compute_api, context, id, + expected_attrs=['numa_topology']) try: self.compute_api.live_migrate(context, instance, block_migration, disk_over_commit, host, force, async) diff --git a/nova/conductor/tasks/live_migrate.py b/nova/conductor/tasks/live_migrate.py index 0234b6b9ad04..07b183f70617 100644 --- a/nova/conductor/tasks/live_migrate.py +++ b/nova/conductor/tasks/live_migrate.py @@ -22,6 +22,7 @@ import nova.conf from nova import exception from nova.i18n import _ from nova import objects +from nova.objects import fields as obj_fields from nova.scheduler import utils as scheduler_utils from nova import utils @@ -56,6 +57,7 @@ class LiveMigrationTask(base.TaskBase): def _execute(self): self._check_instance_is_active() + self._check_instance_has_no_numa() self._check_host_is_up(self.source) if should_do_migration_allocation(self.context): @@ -141,6 +143,30 @@ class LiveMigrationTask(base.TaskBase): state=self.instance.power_state, method='live migrate') + def _check_instance_has_no_numa(self): + """Prevent live migrations of instances with NUMA topologies.""" + if not self.instance.numa_topology: + return + + # Only KVM (libvirt) supports NUMA topologies with CPU pinning; + # HyperV's vNUMA feature doesn't allow specific pinning + hypervisor_type = objects.ComputeNode.get_by_host_and_nodename( + self.context, self.source, self.instance.node).hypervisor_type + if hypervisor_type != obj_fields.HVType.KVM: + return + + msg = ('Instance has an associated NUMA topology. ' + 'Instance NUMA topologies, including related attributes ' + 'such as CPU pinning, huge page and emulator thread ' + 'pinning information, are not currently recalculated on ' + 'live migration. See bug #1289064 for more information.' + ) + + if CONF.workarounds.enable_numa_live_migration: + LOG.warning(msg, instance=self.instance) + else: + raise exception.MigrationPreCheckError(reason=msg) + def _check_host_is_up(self, host): service = objects.Service.get_by_compute_host(self.context, host) diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py index 11ff37d12ea0..1e346f2e675c 100644 --- a/nova/conf/workarounds.py +++ b/nova/conf/workarounds.py @@ -176,6 +176,32 @@ Related options: * ``compute_driver`` (libvirt) * ``[libvirt]/images_type`` (rbd) * ``instances_path`` +"""), + + cfg.BoolOpt( + 'enable_numa_live_migration', + default=False, + help=""" +Enable live migration of instances with NUMA topologies. + +Live migration of instances with NUMA topologies is disabled by default +when using the libvirt driver. This includes live migration of instances with +CPU pinning or hugepages. CPU pinning and huge page information for such +instances is not currently re-calculated, as noted in bug #1289064. This +means that if instances were already present on the destination host, the +migrated instance could be placed on the same dedicated cores as these +instances or use hugepages allocated for another instance. Alternately, if the +host platforms were not homogeneous, the instance could be assigned to +non-existent cores or be inadvertently split across host NUMA nodes. + +Despite these known issues, there may be cases where live migration is +necessary. By enabling this option, operators that are aware of the issues and +are willing to manually work around them can enable live migration support for +these instances. + +Related options: + +* ``compute_driver``: Only the libvirt driver is affected. """), ] diff --git a/nova/tests/unit/api/openstack/compute/admin_only_action_common.py b/nova/tests/unit/api/openstack/compute/admin_only_action_common.py index f9273b62ac76..a098b659ff93 100644 --- a/nova/tests/unit/api/openstack/compute/admin_only_action_common.py +++ b/nova/tests/unit/api/openstack/compute/admin_only_action_common.py @@ -30,27 +30,36 @@ class CommonMixin(object): self.req = fakes.HTTPRequest.blank('') self.context = self.req.environ['nova.context'] - def _stub_instance_get(self, uuid=None): - if uuid is None: + def _stub_instance_get(self, action=None, uuid=None): + if not uuid: uuid = uuidutils.generate_uuid() instance = fake_instance.fake_instance_obj(self.context, id=1, uuid=uuid, vm_state=vm_states.ACTIVE, task_state=None, launched_at=timeutils.utcnow()) + + expected_attrs = None + if action == '_migrate_live': + expected_attrs = ['numa_topology'] + self.compute_api.get( - self.context, uuid, expected_attrs=None).AndReturn(instance) + self.context, uuid, expected_attrs=expected_attrs).AndReturn( + instance) + return instance - def _stub_instance_get_failure(self, exc_info, uuid=None): - if uuid is None: - uuid = uuidutils.generate_uuid() + def _stub_instance_get_failure(self, action, exc_info, uuid): + expected_attrs = None + if action == '_migrate_live': + expected_attrs = ['numa_topology'] + self.compute_api.get( - self.context, uuid, expected_attrs=None).AndRaise(exc_info) - return uuid + self.context, uuid, expected_attrs=expected_attrs).AndRaise( + exc_info) def _test_non_existing_instance(self, action, body_map=None): uuid = uuidutils.generate_uuid() self._stub_instance_get_failure( - exception.InstanceNotFound(instance_id=uuid), uuid=uuid) + action, exception.InstanceNotFound(instance_id=uuid), uuid=uuid) self.mox.ReplayAll() controller_function = getattr(self.controller, action) @@ -68,7 +77,7 @@ class CommonMixin(object): method = action.replace('_', '') compute_api_args_map = compute_api_args_map or {} - instance = self._stub_instance_get() + instance = self._stub_instance_get(action) args, kwargs = compute_api_args_map.get(action, ((), {})) getattr(self.compute_api, method)(self.context, instance, *args, **kwargs) @@ -92,7 +101,7 @@ class CommonMixin(object): if method is None: method = action.replace('_', '') - instance = self._stub_instance_get() + instance = self._stub_instance_get(action) body = {} compute_api_args_map = {} args, kwargs = compute_api_args_map.get(action, ((), {})) @@ -120,7 +129,7 @@ class CommonMixin(object): if compute_api_args_map is None: compute_api_args_map = {} - instance = self._stub_instance_get() + instance = self._stub_instance_get(action) args, kwargs = compute_api_args_map.get(action, ((), {})) @@ -150,7 +159,7 @@ class CommonMixin(object): method = action.replace('_', '') compute_api_args_map = compute_api_args_map or {} - instance = self._stub_instance_get() + instance = self._stub_instance_get(action) args, kwargs = compute_api_args_map.get(action, ((), {})) getattr(self.compute_api, method)(self.context, instance, *args, @@ -174,7 +183,7 @@ class CommonMixin(object): method = action.replace('_', '') compute_api_args_map = compute_api_args_map or {} - instance = self._stub_instance_get() + instance = self._stub_instance_get(action) args, kwargs = compute_api_args_map.get(action, ((), {})) getattr(self.compute_api, method)(self.context, instance, *args, diff --git a/nova/tests/unit/api/openstack/compute/test_migrate_server.py b/nova/tests/unit/api/openstack/compute/test_migrate_server.py index 8423cfc708c6..af93964a15a4 100644 --- a/nova/tests/unit/api/openstack/compute/test_migrate_server.py +++ b/nova/tests/unit/api/openstack/compute/test_migrate_server.py @@ -138,7 +138,7 @@ class MigrateServerTestsV21(admin_only_action_common.CommonTests): def _test_migrate_live_succeeded(self, param): self.mox.StubOutWithMock(self.compute_api, 'live_migrate') - instance = self._stub_instance_get() + instance = self._stub_instance_get('_migrate_live') self.compute_api.live_migrate(self.context, instance, False, self.disk_over_commit, 'hostname', self.force, self.async) @@ -211,7 +211,7 @@ class MigrateServerTestsV21(admin_only_action_common.CommonTests): check_response=True): self.mox.StubOutWithMock(self.compute_api, 'live_migrate') - instance = self._stub_instance_get(uuid=uuid) + instance = self._stub_instance_get('_migrate_live', uuid=uuid) self.compute_api.live_migrate(self.context, instance, False, self.disk_over_commit, 'hostname', self.force, self.async @@ -438,7 +438,7 @@ class MigrateServerTestsV234(MigrateServerTestsV230): reason="Compute host %(host)s could not be found.", host='hostname') self.mox.StubOutWithMock(self.compute_api, 'live_migrate') - instance = self._stub_instance_get() + instance = self._stub_instance_get('_migrate_live') self.compute_api.live_migrate(self.context, instance, None, self.disk_over_commit, 'hostname', self.force, self.async).AndRaise(exc) @@ -455,7 +455,7 @@ class MigrateServerTestsV234(MigrateServerTestsV230): exc = exception.InvalidHypervisorType( reason="The supplied hypervisor type of is invalid.") self.mox.StubOutWithMock(self.compute_api, 'live_migrate') - instance = self._stub_instance_get() + instance = self._stub_instance_get('_migrate_live') self.compute_api.live_migrate(self.context, instance, None, self.disk_over_commit, 'hostname', self.force, self.async).AndRaise(exc) diff --git a/nova/tests/unit/conductor/tasks/test_live_migrate.py b/nova/tests/unit/conductor/tasks/test_live_migrate.py index fd8ec25a01c7..498939954d7f 100644 --- a/nova/tests/unit/conductor/tasks/test_live_migrate.py +++ b/nova/tests/unit/conductor/tasks/test_live_migrate.py @@ -18,6 +18,7 @@ from nova.compute import power_state from nova.compute import rpcapi as compute_rpcapi from nova.compute import vm_states from nova.conductor.tasks import live_migrate +from nova import context as nova_context from nova import exception from nova import objects from nova.scheduler import client as scheduler_client @@ -38,7 +39,7 @@ fake_selection2 = objects.Selection(service_host="host2", nodename="node2", class LiveMigrationTaskTestCase(test.NoDBTestCase): def setUp(self): super(LiveMigrationTaskTestCase, self).setUp() - self.context = "context" + self.context = nova_context.get_admin_context() self.instance_host = "host" self.instance_uuid = uuids.instance self.instance_image = "image_ref" @@ -52,6 +53,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): self.instance = objects.Instance._from_db_object( self.context, objects.Instance(), db_instance) self.instance.system_metadata = {'image_hw_disk_bus': 'scsi'} + self.instance.numa_topology = None self.destination = "destination" self.block_migration = "bm" self.disk_over_commit = "doc" @@ -176,6 +178,45 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): self.assertRaises(exception.InstanceInvalidState, self.task._check_instance_is_active) + @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename') + def test_check_instance_has_no_numa_passes_no_numa(self, mock_get): + self.flags(enable_numa_live_migration=False, group='workarounds') + self.task.instance.numa_topology = None + mock_get.return_value = objects.ComputeNode( + uuid=uuids.cn1, hypervisor_type='kvm') + self.task._check_instance_has_no_numa() + + @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename') + def test_check_instance_has_no_numa_passes_non_kvm(self, mock_get): + self.flags(enable_numa_live_migration=False, group='workarounds') + self.task.instance.numa_topology = objects.InstanceNUMATopology( + cells=[objects.InstanceNUMACell(id=0, cpuset=set([0]), + memory=1024)]) + mock_get.return_value = objects.ComputeNode( + uuid=uuids.cn1, hypervisor_type='xen') + self.task._check_instance_has_no_numa() + + @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename') + def test_check_instance_has_no_numa_passes_workaround(self, mock_get): + self.flags(enable_numa_live_migration=True, group='workarounds') + self.task.instance.numa_topology = objects.InstanceNUMATopology( + cells=[objects.InstanceNUMACell(id=0, cpuset=set([0]), + memory=1024)]) + mock_get.return_value = objects.ComputeNode( + uuid=uuids.cn1, hypervisor_type='kvm') + self.task._check_instance_has_no_numa() + + @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename') + def test_check_instance_has_no_numa_fails(self, mock_get): + self.flags(enable_numa_live_migration=False, group='workarounds') + mock_get.return_value = objects.ComputeNode( + uuid=uuids.cn1, hypervisor_type='kvm') + self.task.instance.numa_topology = objects.InstanceNUMATopology( + cells=[objects.InstanceNUMACell(id=0, cpuset=set([0]), + memory=1024)]) + self.assertRaises(exception.MigrationPreCheckError, + self.task._check_instance_has_no_numa) + @mock.patch.object(objects.Service, 'get_by_compute_host') @mock.patch.object(servicegroup.API, 'service_is_up') def test_check_instance_host_is_up(self, mock_is_up, mock_get): diff --git a/releasenotes/notes/disable-live-migration-with-numa-bc710a1bcde25957.yaml b/releasenotes/notes/disable-live-migration-with-numa-bc710a1bcde25957.yaml new file mode 100644 index 000000000000..4297c186f7f3 --- /dev/null +++ b/releasenotes/notes/disable-live-migration-with-numa-bc710a1bcde25957.yaml @@ -0,0 +1,25 @@ +--- +upgrade: + - | + Live migration of instances with NUMA topologies is now disabled by default + when using the libvirt driver. This includes live migration of instances + with CPU pinning or hugepages. CPU pinning and huge page information for + such instances is not currently re-calculated, as noted in `bug #1289064`_. + This means that if instances were already present on the destination host, + the migrated instance could be placed on the same dedicated cores as these + instances or use hugepages allocated for another instance. Alternately, if + the host platforms were not homogeneous, the instance could be assigned to + non-existent cores or be inadvertently split across host NUMA nodes. + + The `long term solution`_ to these issues is to recalculate the XML on the + destination node. When this work is completed, the restriction on live + migration with NUMA topologies will be lifted. + + For operators that are aware of the issues and are able to manually work + around them, the ``[workarounds] enable_numa_live_migration`` option can + be used to allow the broken behavior. + + For more information, refer to `bug #1289064`_. + + .. _bug #1289064: https://bugs.launchpad.net/nova/+bug/1289064 + .. _long term solution: https://blueprints.launchpad.net/nova/+spec/numa-aware-live-migration