Port binding based on events during live migration
Currently port binding call is made at destination compute in post live migration phase. This may cause network outage during post-copy as the virtual CPUs are paused immediately at source and unpaused at destination by transferring a minimum set of pages. The following domain life cycle events are emitted in this order during post-copy: * VIR_DOMAIN_EVENT_STARTED(destination) * VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY(source)--migration entered post-copy mode * VIR_DOMAIN_EVENT_RESUMED_POSTCOPY(destination)--guest is running on the destinaton host while some if its memory pages still remain on sourcehost. * VIR_DOMAIN_EVENT_RESUMED_MIGRATED(destination) * VIR_DOMAIN_EVENT_STOPPED_MIGRATED(source)--migration finished successfully and the destination host holds a complete guest state. In this change, dest host port binding activation is done when the following events are emitted at source for post-copy and pre-copy: * VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY * VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED This reduces the network outage during live migration as network switch is done right before VM resumes at destination. Co-Authored-By: Matt Riedemann <mriedem.os@gmail.com> Change-Id: Ic5cab99944df9e501ba2032eb96911c36304494d Closes-Bug: #1605016
This commit is contained in:
parent
e53f46672e
commit
1f48d3d83b
|
@ -1051,22 +1051,27 @@ class ComputeManager(manager.Manager):
|
|||
{'state': event.get_name()},
|
||||
instance_uuid=event.get_instance_uuid())
|
||||
context = nova.context.get_admin_context(read_deleted='yes')
|
||||
# Join on info_cache since that's needed in migrate_instance_start.
|
||||
instance = objects.Instance.get_by_uuid(context,
|
||||
event.get_instance_uuid(),
|
||||
expected_attrs=[])
|
||||
expected_attrs=['info_cache'])
|
||||
vm_power_state = None
|
||||
if event.get_transition() == virtevent.EVENT_LIFECYCLE_STOPPED:
|
||||
event_transition = event.get_transition()
|
||||
if event_transition == virtevent.EVENT_LIFECYCLE_STOPPED:
|
||||
vm_power_state = power_state.SHUTDOWN
|
||||
elif event.get_transition() == virtevent.EVENT_LIFECYCLE_STARTED:
|
||||
elif event_transition == virtevent.EVENT_LIFECYCLE_STARTED:
|
||||
vm_power_state = power_state.RUNNING
|
||||
elif event.get_transition() == virtevent.EVENT_LIFECYCLE_PAUSED:
|
||||
elif event_transition in (
|
||||
virtevent.EVENT_LIFECYCLE_PAUSED,
|
||||
virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED,
|
||||
virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED):
|
||||
vm_power_state = power_state.PAUSED
|
||||
elif event.get_transition() == virtevent.EVENT_LIFECYCLE_RESUMED:
|
||||
elif event_transition == virtevent.EVENT_LIFECYCLE_RESUMED:
|
||||
vm_power_state = power_state.RUNNING
|
||||
elif event.get_transition() == virtevent.EVENT_LIFECYCLE_SUSPENDED:
|
||||
elif event_transition == virtevent.EVENT_LIFECYCLE_SUSPENDED:
|
||||
vm_power_state = power_state.SUSPENDED
|
||||
else:
|
||||
LOG.warning("Unexpected power state %d", event.get_transition())
|
||||
LOG.warning("Unexpected lifecycle event: %d", event_transition)
|
||||
|
||||
# Note(lpetrut): The event may be delayed, thus not reflecting
|
||||
# the current instance power state. In that case, ignore the event.
|
||||
|
@ -1087,6 +1092,36 @@ class ComputeManager(manager.Manager):
|
|||
instance,
|
||||
vm_power_state)
|
||||
|
||||
# The following checks are for live migration. We want to activate
|
||||
# the port binding for the destination host before the live migration
|
||||
# is resumed on the destination host in order to reduce network
|
||||
# downtime. Otherwise the ports are bound to the destination host
|
||||
# in post_live_migration_at_destination.
|
||||
migrate_finish_statuses = {
|
||||
# This happens on the source node and indicates live migration
|
||||
# entered post-copy mode.
|
||||
virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED: 'running (post-copy)',
|
||||
# Suspended for offline migration.
|
||||
virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED: 'running'
|
||||
}
|
||||
if (instance.task_state == task_states.MIGRATING and
|
||||
event_transition in migrate_finish_statuses):
|
||||
status = migrate_finish_statuses[event_transition]
|
||||
try:
|
||||
migration = objects.Migration.get_by_instance_and_status(
|
||||
context, instance.uuid, status)
|
||||
LOG.debug('Binding ports to destination host: %s',
|
||||
migration.dest_compute, instance=instance)
|
||||
# For neutron, migrate_instance_start will activate the
|
||||
# destination host port bindings, if there are any created by
|
||||
# conductor before live migration started.
|
||||
self.network_api.migrate_instance_start(
|
||||
context, instance, migration)
|
||||
except exception.MigrationNotFoundByStatus:
|
||||
LOG.warning("Unable to find migration record with status "
|
||||
"'%s' for instance. Port binding will happen in "
|
||||
"post live migration.", status, instance=instance)
|
||||
|
||||
def handle_events(self, event):
|
||||
if isinstance(event, virtevent.LifecycleEvent):
|
||||
try:
|
||||
|
|
|
@ -92,24 +92,49 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
@mock.patch.object(manager.ComputeManager, '_get_power_state')
|
||||
@mock.patch.object(manager.ComputeManager, '_sync_instance_power_state')
|
||||
@mock.patch.object(objects.Instance, 'get_by_uuid')
|
||||
def _test_handle_lifecycle_event(self, mock_get, mock_sync,
|
||||
mock_get_power_state, transition,
|
||||
event_pwr_state, current_pwr_state):
|
||||
@mock.patch.object(objects.Migration, 'get_by_instance_and_status')
|
||||
@mock.patch.object(nova.network.neutronv2.api.API,
|
||||
'migrate_instance_start')
|
||||
def _test_handle_lifecycle_event(self, migrate_instance_start,
|
||||
mock_get_migration, mock_get,
|
||||
mock_sync, mock_get_power_state,
|
||||
transition, event_pwr_state,
|
||||
current_pwr_state):
|
||||
event = mock.Mock()
|
||||
event.get_instance_uuid.return_value = mock.sentinel.uuid
|
||||
mock_get.return_value = fake_instance.fake_instance_obj(self.context,
|
||||
task_state=task_states.MIGRATING)
|
||||
event.get_transition.return_value = transition
|
||||
mock_get_power_state.return_value = current_pwr_state
|
||||
|
||||
self.compute.handle_lifecycle_event(event)
|
||||
mock_get.assert_called_once_with(
|
||||
test.MatchType(context.RequestContext),
|
||||
event.get_instance_uuid.return_value,
|
||||
expected_attrs=['info_cache'])
|
||||
|
||||
mock_get.assert_called_with(mock.ANY, mock.sentinel.uuid,
|
||||
expected_attrs=[])
|
||||
if event_pwr_state == current_pwr_state:
|
||||
mock_sync.assert_called_with(mock.ANY, mock_get.return_value,
|
||||
event_pwr_state)
|
||||
else:
|
||||
self.assertFalse(mock_sync.called)
|
||||
|
||||
migrate_finish_statuses = {
|
||||
virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED: 'running (post-copy)',
|
||||
virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED: 'running'
|
||||
}
|
||||
if transition in migrate_finish_statuses:
|
||||
mock_get_migration.assert_called_with(
|
||||
test.MatchType(context.RequestContext),
|
||||
mock_get.return_value.uuid,
|
||||
migrate_finish_statuses[transition])
|
||||
migrate_instance_start.assert_called_once_with(
|
||||
test.MatchType(context.RequestContext),
|
||||
mock_get.return_value,
|
||||
mock_get_migration.return_value)
|
||||
else:
|
||||
mock_get_migration.assert_not_called()
|
||||
migrate_instance_start.assert_not_called()
|
||||
|
||||
def test_handle_lifecycle_event(self):
|
||||
event_map = {virtevent.EVENT_LIFECYCLE_STOPPED: power_state.SHUTDOWN,
|
||||
virtevent.EVENT_LIFECYCLE_STARTED: power_state.RUNNING,
|
||||
|
@ -117,6 +142,10 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
virtevent.EVENT_LIFECYCLE_RESUMED: power_state.RUNNING,
|
||||
virtevent.EVENT_LIFECYCLE_SUSPENDED:
|
||||
power_state.SUSPENDED,
|
||||
virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED:
|
||||
power_state.PAUSED,
|
||||
virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED:
|
||||
power_state.PAUSED,
|
||||
}
|
||||
|
||||
for transition, pwr_state in event_map.items():
|
||||
|
@ -130,6 +159,35 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
event_pwr_state=power_state.SHUTDOWN,
|
||||
current_pwr_state=power_state.RUNNING)
|
||||
|
||||
@mock.patch('nova.objects.Instance.get_by_uuid')
|
||||
@mock.patch('nova.compute.manager.ComputeManager.'
|
||||
'_sync_instance_power_state')
|
||||
@mock.patch('nova.objects.Migration.get_by_instance_and_status',
|
||||
side_effect=exception.MigrationNotFoundByStatus(
|
||||
instance_id=uuids.instance, status='running (post-copy)'))
|
||||
def test_handle_lifecycle_event_postcopy_migration_not_found(
|
||||
self, mock_get_migration, mock_sync, mock_get_instance):
|
||||
"""Tests a EVENT_LIFECYCLE_POSTCOPY_STARTED scenario where the
|
||||
migration record is not found by the expected status.
|
||||
"""
|
||||
inst = fake_instance.fake_instance_obj(
|
||||
self.context, uuid=uuids.instance,
|
||||
task_state=task_states.MIGRATING)
|
||||
mock_get_instance.return_value = inst
|
||||
event = virtevent.LifecycleEvent(
|
||||
uuids.instance, virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED)
|
||||
with mock.patch.object(self.compute, '_get_power_state',
|
||||
return_value=power_state.PAUSED):
|
||||
with mock.patch.object(self.compute.network_api,
|
||||
'migrate_instance_finish') as mig_finish:
|
||||
self.compute.handle_lifecycle_event(event)
|
||||
# Since we failed to find the migration record, we shouldn't call
|
||||
# migrate_instance_finish.
|
||||
mig_finish.assert_not_called()
|
||||
mock_get_migration.assert_called_once_with(
|
||||
test.MatchType(context.RequestContext), uuids.instance,
|
||||
'running (post-copy)')
|
||||
|
||||
@mock.patch('nova.compute.utils.notify_about_instance_action')
|
||||
def test_delete_instance_info_cache_delete_ordering(self, mock_notify):
|
||||
call_tracker = mock.Mock()
|
||||
|
|
|
@ -192,6 +192,46 @@ class HostTestCase(test.NoDBTestCase):
|
|||
self.assertEqual(got_events[0].transition,
|
||||
event.EVENT_LIFECYCLE_STOPPED)
|
||||
|
||||
def test_event_lifecycle_callback_suspended_old_libvirt(self):
|
||||
"""Tests the suspended lifecycle event with libvirt before post-copy
|
||||
"""
|
||||
hostimpl = mock.MagicMock()
|
||||
conn = mock.MagicMock()
|
||||
fake_dom_xml = """
|
||||
<domain type='kvm'>
|
||||
<uuid>cef19ce0-0ca2-11df-855d-b19fbce37686</uuid>
|
||||
</domain>
|
||||
"""
|
||||
dom = fakelibvirt.Domain(conn, fake_dom_xml, running=True)
|
||||
VIR_DOMAIN_EVENT_SUSPENDED_PAUSED = 0
|
||||
host.Host._event_lifecycle_callback(
|
||||
conn, dom, fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED,
|
||||
detail=VIR_DOMAIN_EVENT_SUSPENDED_PAUSED, opaque=hostimpl)
|
||||
expected_event = hostimpl._queue_event.call_args[0][0]
|
||||
self.assertEqual(event.EVENT_LIFECYCLE_PAUSED,
|
||||
expected_event.transition)
|
||||
|
||||
def test_event_lifecycle_callback_suspended_postcopy(self):
|
||||
"""Tests the suspended lifecycle event with libvirt with post-copy"""
|
||||
hostimpl = mock.MagicMock()
|
||||
conn = mock.MagicMock()
|
||||
fake_dom_xml = """
|
||||
<domain type='kvm'>
|
||||
<uuid>cef19ce0-0ca2-11df-855d-b19fbce37686</uuid>
|
||||
</domain>
|
||||
"""
|
||||
dom = fakelibvirt.Domain(conn, fake_dom_xml, running=True)
|
||||
VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY = 7
|
||||
with mock.patch.object(host.libvirt,
|
||||
'VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY', new=7,
|
||||
create=True):
|
||||
host.Host._event_lifecycle_callback(
|
||||
conn, dom, fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED,
|
||||
detail=VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY, opaque=hostimpl)
|
||||
expected_event = hostimpl._queue_event.call_args[0][0]
|
||||
self.assertEqual(event.EVENT_LIFECYCLE_POSTCOPY_STARTED,
|
||||
expected_event.transition)
|
||||
|
||||
def test_event_emit_delayed_call_delayed(self):
|
||||
ev = event.LifecycleEvent(
|
||||
"cef19ce0-0ca2-11df-855d-b19fbce37686",
|
||||
|
|
|
@ -29,6 +29,9 @@ EVENT_LIFECYCLE_STOPPED = 1
|
|||
EVENT_LIFECYCLE_PAUSED = 2
|
||||
EVENT_LIFECYCLE_RESUMED = 3
|
||||
EVENT_LIFECYCLE_SUSPENDED = 4
|
||||
EVENT_LIFECYCLE_POSTCOPY_STARTED = 5
|
||||
EVENT_LIFECYCLE_MIGRATION_COMPLETED = 6
|
||||
|
||||
|
||||
NAMES = {
|
||||
EVENT_LIFECYCLE_STARTED: _('Started'),
|
||||
|
@ -36,6 +39,8 @@ NAMES = {
|
|||
EVENT_LIFECYCLE_PAUSED: _('Paused'),
|
||||
EVENT_LIFECYCLE_RESUMED: _('Resumed'),
|
||||
EVENT_LIFECYCLE_SUSPENDED: _('Suspended'),
|
||||
EVENT_LIFECYCLE_POSTCOPY_STARTED: _('Postcopy started'),
|
||||
EVENT_LIFECYCLE_MIGRATION_COMPLETED: _('Migration completed'),
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -170,7 +170,20 @@ class Host(object):
|
|||
elif event == libvirt.VIR_DOMAIN_EVENT_STARTED:
|
||||
transition = virtevent.EVENT_LIFECYCLE_STARTED
|
||||
elif event == libvirt.VIR_DOMAIN_EVENT_SUSPENDED:
|
||||
transition = virtevent.EVENT_LIFECYCLE_PAUSED
|
||||
# NOTE(siva_krishnan): We have to check if
|
||||
# VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY and
|
||||
# VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED exist since the current
|
||||
# minimum version of libvirt (1.2.9) don't have those attributes.
|
||||
# This check can be removed once MIN_LIBVIRT_VERSION is bumped to
|
||||
# at least 1.3.3.
|
||||
if (hasattr(libvirt, 'VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY') and
|
||||
detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY):
|
||||
transition = virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED
|
||||
elif (hasattr(libvirt, 'VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED') and
|
||||
detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED):
|
||||
transition = virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED
|
||||
else:
|
||||
transition = virtevent.EVENT_LIFECYCLE_PAUSED
|
||||
elif event == libvirt.VIR_DOMAIN_EVENT_RESUMED:
|
||||
transition = virtevent.EVENT_LIFECYCLE_RESUMED
|
||||
|
||||
|
|
Loading…
Reference in New Issue