Port binding based on events during live migration
Currently port binding call is made at destination compute in post live migration phase. This may cause network outage during post-copy as the virtual CPUs are paused immediately at source and unpaused at destination by transferring a minimum set of pages. The following domain life cycle events are emitted in this order during post-copy: * VIR_DOMAIN_EVENT_STARTED(destination) * VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY(source)--migration entered post-copy mode * VIR_DOMAIN_EVENT_RESUMED_POSTCOPY(destination)--guest is running on the destinaton host while some if its memory pages still remain on sourcehost. * VIR_DOMAIN_EVENT_RESUMED_MIGRATED(destination) * VIR_DOMAIN_EVENT_STOPPED_MIGRATED(source)--migration finished successfully and the destination host holds a complete guest state. In this change, dest host port binding activation is done when the following events are emitted at source for post-copy and pre-copy: * VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY * VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED This reduces the network outage during live migration as network switch is done right before VM resumes at destination. Co-Authored-By: Matt Riedemann <mriedem.os@gmail.com> Change-Id: Ic5cab99944df9e501ba2032eb96911c36304494d Closes-Bug: #1605016
This commit is contained in:
parent
e53f46672e
commit
1f48d3d83b
|
@ -1051,22 +1051,27 @@ class ComputeManager(manager.Manager):
|
||||||
{'state': event.get_name()},
|
{'state': event.get_name()},
|
||||||
instance_uuid=event.get_instance_uuid())
|
instance_uuid=event.get_instance_uuid())
|
||||||
context = nova.context.get_admin_context(read_deleted='yes')
|
context = nova.context.get_admin_context(read_deleted='yes')
|
||||||
|
# Join on info_cache since that's needed in migrate_instance_start.
|
||||||
instance = objects.Instance.get_by_uuid(context,
|
instance = objects.Instance.get_by_uuid(context,
|
||||||
event.get_instance_uuid(),
|
event.get_instance_uuid(),
|
||||||
expected_attrs=[])
|
expected_attrs=['info_cache'])
|
||||||
vm_power_state = None
|
vm_power_state = None
|
||||||
if event.get_transition() == virtevent.EVENT_LIFECYCLE_STOPPED:
|
event_transition = event.get_transition()
|
||||||
|
if event_transition == virtevent.EVENT_LIFECYCLE_STOPPED:
|
||||||
vm_power_state = power_state.SHUTDOWN
|
vm_power_state = power_state.SHUTDOWN
|
||||||
elif event.get_transition() == virtevent.EVENT_LIFECYCLE_STARTED:
|
elif event_transition == virtevent.EVENT_LIFECYCLE_STARTED:
|
||||||
vm_power_state = power_state.RUNNING
|
vm_power_state = power_state.RUNNING
|
||||||
elif event.get_transition() == virtevent.EVENT_LIFECYCLE_PAUSED:
|
elif event_transition in (
|
||||||
|
virtevent.EVENT_LIFECYCLE_PAUSED,
|
||||||
|
virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED,
|
||||||
|
virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED):
|
||||||
vm_power_state = power_state.PAUSED
|
vm_power_state = power_state.PAUSED
|
||||||
elif event.get_transition() == virtevent.EVENT_LIFECYCLE_RESUMED:
|
elif event_transition == virtevent.EVENT_LIFECYCLE_RESUMED:
|
||||||
vm_power_state = power_state.RUNNING
|
vm_power_state = power_state.RUNNING
|
||||||
elif event.get_transition() == virtevent.EVENT_LIFECYCLE_SUSPENDED:
|
elif event_transition == virtevent.EVENT_LIFECYCLE_SUSPENDED:
|
||||||
vm_power_state = power_state.SUSPENDED
|
vm_power_state = power_state.SUSPENDED
|
||||||
else:
|
else:
|
||||||
LOG.warning("Unexpected power state %d", event.get_transition())
|
LOG.warning("Unexpected lifecycle event: %d", event_transition)
|
||||||
|
|
||||||
# Note(lpetrut): The event may be delayed, thus not reflecting
|
# Note(lpetrut): The event may be delayed, thus not reflecting
|
||||||
# the current instance power state. In that case, ignore the event.
|
# the current instance power state. In that case, ignore the event.
|
||||||
|
@ -1087,6 +1092,36 @@ class ComputeManager(manager.Manager):
|
||||||
instance,
|
instance,
|
||||||
vm_power_state)
|
vm_power_state)
|
||||||
|
|
||||||
|
# The following checks are for live migration. We want to activate
|
||||||
|
# the port binding for the destination host before the live migration
|
||||||
|
# is resumed on the destination host in order to reduce network
|
||||||
|
# downtime. Otherwise the ports are bound to the destination host
|
||||||
|
# in post_live_migration_at_destination.
|
||||||
|
migrate_finish_statuses = {
|
||||||
|
# This happens on the source node and indicates live migration
|
||||||
|
# entered post-copy mode.
|
||||||
|
virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED: 'running (post-copy)',
|
||||||
|
# Suspended for offline migration.
|
||||||
|
virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED: 'running'
|
||||||
|
}
|
||||||
|
if (instance.task_state == task_states.MIGRATING and
|
||||||
|
event_transition in migrate_finish_statuses):
|
||||||
|
status = migrate_finish_statuses[event_transition]
|
||||||
|
try:
|
||||||
|
migration = objects.Migration.get_by_instance_and_status(
|
||||||
|
context, instance.uuid, status)
|
||||||
|
LOG.debug('Binding ports to destination host: %s',
|
||||||
|
migration.dest_compute, instance=instance)
|
||||||
|
# For neutron, migrate_instance_start will activate the
|
||||||
|
# destination host port bindings, if there are any created by
|
||||||
|
# conductor before live migration started.
|
||||||
|
self.network_api.migrate_instance_start(
|
||||||
|
context, instance, migration)
|
||||||
|
except exception.MigrationNotFoundByStatus:
|
||||||
|
LOG.warning("Unable to find migration record with status "
|
||||||
|
"'%s' for instance. Port binding will happen in "
|
||||||
|
"post live migration.", status, instance=instance)
|
||||||
|
|
||||||
def handle_events(self, event):
|
def handle_events(self, event):
|
||||||
if isinstance(event, virtevent.LifecycleEvent):
|
if isinstance(event, virtevent.LifecycleEvent):
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -92,24 +92,49 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
||||||
@mock.patch.object(manager.ComputeManager, '_get_power_state')
|
@mock.patch.object(manager.ComputeManager, '_get_power_state')
|
||||||
@mock.patch.object(manager.ComputeManager, '_sync_instance_power_state')
|
@mock.patch.object(manager.ComputeManager, '_sync_instance_power_state')
|
||||||
@mock.patch.object(objects.Instance, 'get_by_uuid')
|
@mock.patch.object(objects.Instance, 'get_by_uuid')
|
||||||
def _test_handle_lifecycle_event(self, mock_get, mock_sync,
|
@mock.patch.object(objects.Migration, 'get_by_instance_and_status')
|
||||||
mock_get_power_state, transition,
|
@mock.patch.object(nova.network.neutronv2.api.API,
|
||||||
event_pwr_state, current_pwr_state):
|
'migrate_instance_start')
|
||||||
|
def _test_handle_lifecycle_event(self, migrate_instance_start,
|
||||||
|
mock_get_migration, mock_get,
|
||||||
|
mock_sync, mock_get_power_state,
|
||||||
|
transition, event_pwr_state,
|
||||||
|
current_pwr_state):
|
||||||
event = mock.Mock()
|
event = mock.Mock()
|
||||||
event.get_instance_uuid.return_value = mock.sentinel.uuid
|
mock_get.return_value = fake_instance.fake_instance_obj(self.context,
|
||||||
|
task_state=task_states.MIGRATING)
|
||||||
event.get_transition.return_value = transition
|
event.get_transition.return_value = transition
|
||||||
mock_get_power_state.return_value = current_pwr_state
|
mock_get_power_state.return_value = current_pwr_state
|
||||||
|
|
||||||
self.compute.handle_lifecycle_event(event)
|
self.compute.handle_lifecycle_event(event)
|
||||||
|
mock_get.assert_called_once_with(
|
||||||
|
test.MatchType(context.RequestContext),
|
||||||
|
event.get_instance_uuid.return_value,
|
||||||
|
expected_attrs=['info_cache'])
|
||||||
|
|
||||||
mock_get.assert_called_with(mock.ANY, mock.sentinel.uuid,
|
|
||||||
expected_attrs=[])
|
|
||||||
if event_pwr_state == current_pwr_state:
|
if event_pwr_state == current_pwr_state:
|
||||||
mock_sync.assert_called_with(mock.ANY, mock_get.return_value,
|
mock_sync.assert_called_with(mock.ANY, mock_get.return_value,
|
||||||
event_pwr_state)
|
event_pwr_state)
|
||||||
else:
|
else:
|
||||||
self.assertFalse(mock_sync.called)
|
self.assertFalse(mock_sync.called)
|
||||||
|
|
||||||
|
migrate_finish_statuses = {
|
||||||
|
virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED: 'running (post-copy)',
|
||||||
|
virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED: 'running'
|
||||||
|
}
|
||||||
|
if transition in migrate_finish_statuses:
|
||||||
|
mock_get_migration.assert_called_with(
|
||||||
|
test.MatchType(context.RequestContext),
|
||||||
|
mock_get.return_value.uuid,
|
||||||
|
migrate_finish_statuses[transition])
|
||||||
|
migrate_instance_start.assert_called_once_with(
|
||||||
|
test.MatchType(context.RequestContext),
|
||||||
|
mock_get.return_value,
|
||||||
|
mock_get_migration.return_value)
|
||||||
|
else:
|
||||||
|
mock_get_migration.assert_not_called()
|
||||||
|
migrate_instance_start.assert_not_called()
|
||||||
|
|
||||||
def test_handle_lifecycle_event(self):
|
def test_handle_lifecycle_event(self):
|
||||||
event_map = {virtevent.EVENT_LIFECYCLE_STOPPED: power_state.SHUTDOWN,
|
event_map = {virtevent.EVENT_LIFECYCLE_STOPPED: power_state.SHUTDOWN,
|
||||||
virtevent.EVENT_LIFECYCLE_STARTED: power_state.RUNNING,
|
virtevent.EVENT_LIFECYCLE_STARTED: power_state.RUNNING,
|
||||||
|
@ -117,6 +142,10 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
||||||
virtevent.EVENT_LIFECYCLE_RESUMED: power_state.RUNNING,
|
virtevent.EVENT_LIFECYCLE_RESUMED: power_state.RUNNING,
|
||||||
virtevent.EVENT_LIFECYCLE_SUSPENDED:
|
virtevent.EVENT_LIFECYCLE_SUSPENDED:
|
||||||
power_state.SUSPENDED,
|
power_state.SUSPENDED,
|
||||||
|
virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED:
|
||||||
|
power_state.PAUSED,
|
||||||
|
virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED:
|
||||||
|
power_state.PAUSED,
|
||||||
}
|
}
|
||||||
|
|
||||||
for transition, pwr_state in event_map.items():
|
for transition, pwr_state in event_map.items():
|
||||||
|
@ -130,6 +159,35 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
||||||
event_pwr_state=power_state.SHUTDOWN,
|
event_pwr_state=power_state.SHUTDOWN,
|
||||||
current_pwr_state=power_state.RUNNING)
|
current_pwr_state=power_state.RUNNING)
|
||||||
|
|
||||||
|
@mock.patch('nova.objects.Instance.get_by_uuid')
|
||||||
|
@mock.patch('nova.compute.manager.ComputeManager.'
|
||||||
|
'_sync_instance_power_state')
|
||||||
|
@mock.patch('nova.objects.Migration.get_by_instance_and_status',
|
||||||
|
side_effect=exception.MigrationNotFoundByStatus(
|
||||||
|
instance_id=uuids.instance, status='running (post-copy)'))
|
||||||
|
def test_handle_lifecycle_event_postcopy_migration_not_found(
|
||||||
|
self, mock_get_migration, mock_sync, mock_get_instance):
|
||||||
|
"""Tests a EVENT_LIFECYCLE_POSTCOPY_STARTED scenario where the
|
||||||
|
migration record is not found by the expected status.
|
||||||
|
"""
|
||||||
|
inst = fake_instance.fake_instance_obj(
|
||||||
|
self.context, uuid=uuids.instance,
|
||||||
|
task_state=task_states.MIGRATING)
|
||||||
|
mock_get_instance.return_value = inst
|
||||||
|
event = virtevent.LifecycleEvent(
|
||||||
|
uuids.instance, virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED)
|
||||||
|
with mock.patch.object(self.compute, '_get_power_state',
|
||||||
|
return_value=power_state.PAUSED):
|
||||||
|
with mock.patch.object(self.compute.network_api,
|
||||||
|
'migrate_instance_finish') as mig_finish:
|
||||||
|
self.compute.handle_lifecycle_event(event)
|
||||||
|
# Since we failed to find the migration record, we shouldn't call
|
||||||
|
# migrate_instance_finish.
|
||||||
|
mig_finish.assert_not_called()
|
||||||
|
mock_get_migration.assert_called_once_with(
|
||||||
|
test.MatchType(context.RequestContext), uuids.instance,
|
||||||
|
'running (post-copy)')
|
||||||
|
|
||||||
@mock.patch('nova.compute.utils.notify_about_instance_action')
|
@mock.patch('nova.compute.utils.notify_about_instance_action')
|
||||||
def test_delete_instance_info_cache_delete_ordering(self, mock_notify):
|
def test_delete_instance_info_cache_delete_ordering(self, mock_notify):
|
||||||
call_tracker = mock.Mock()
|
call_tracker = mock.Mock()
|
||||||
|
|
|
@ -192,6 +192,46 @@ class HostTestCase(test.NoDBTestCase):
|
||||||
self.assertEqual(got_events[0].transition,
|
self.assertEqual(got_events[0].transition,
|
||||||
event.EVENT_LIFECYCLE_STOPPED)
|
event.EVENT_LIFECYCLE_STOPPED)
|
||||||
|
|
||||||
|
def test_event_lifecycle_callback_suspended_old_libvirt(self):
|
||||||
|
"""Tests the suspended lifecycle event with libvirt before post-copy
|
||||||
|
"""
|
||||||
|
hostimpl = mock.MagicMock()
|
||||||
|
conn = mock.MagicMock()
|
||||||
|
fake_dom_xml = """
|
||||||
|
<domain type='kvm'>
|
||||||
|
<uuid>cef19ce0-0ca2-11df-855d-b19fbce37686</uuid>
|
||||||
|
</domain>
|
||||||
|
"""
|
||||||
|
dom = fakelibvirt.Domain(conn, fake_dom_xml, running=True)
|
||||||
|
VIR_DOMAIN_EVENT_SUSPENDED_PAUSED = 0
|
||||||
|
host.Host._event_lifecycle_callback(
|
||||||
|
conn, dom, fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED,
|
||||||
|
detail=VIR_DOMAIN_EVENT_SUSPENDED_PAUSED, opaque=hostimpl)
|
||||||
|
expected_event = hostimpl._queue_event.call_args[0][0]
|
||||||
|
self.assertEqual(event.EVENT_LIFECYCLE_PAUSED,
|
||||||
|
expected_event.transition)
|
||||||
|
|
||||||
|
def test_event_lifecycle_callback_suspended_postcopy(self):
|
||||||
|
"""Tests the suspended lifecycle event with libvirt with post-copy"""
|
||||||
|
hostimpl = mock.MagicMock()
|
||||||
|
conn = mock.MagicMock()
|
||||||
|
fake_dom_xml = """
|
||||||
|
<domain type='kvm'>
|
||||||
|
<uuid>cef19ce0-0ca2-11df-855d-b19fbce37686</uuid>
|
||||||
|
</domain>
|
||||||
|
"""
|
||||||
|
dom = fakelibvirt.Domain(conn, fake_dom_xml, running=True)
|
||||||
|
VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY = 7
|
||||||
|
with mock.patch.object(host.libvirt,
|
||||||
|
'VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY', new=7,
|
||||||
|
create=True):
|
||||||
|
host.Host._event_lifecycle_callback(
|
||||||
|
conn, dom, fakelibvirt.VIR_DOMAIN_EVENT_SUSPENDED,
|
||||||
|
detail=VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY, opaque=hostimpl)
|
||||||
|
expected_event = hostimpl._queue_event.call_args[0][0]
|
||||||
|
self.assertEqual(event.EVENT_LIFECYCLE_POSTCOPY_STARTED,
|
||||||
|
expected_event.transition)
|
||||||
|
|
||||||
def test_event_emit_delayed_call_delayed(self):
|
def test_event_emit_delayed_call_delayed(self):
|
||||||
ev = event.LifecycleEvent(
|
ev = event.LifecycleEvent(
|
||||||
"cef19ce0-0ca2-11df-855d-b19fbce37686",
|
"cef19ce0-0ca2-11df-855d-b19fbce37686",
|
||||||
|
|
|
@ -29,6 +29,9 @@ EVENT_LIFECYCLE_STOPPED = 1
|
||||||
EVENT_LIFECYCLE_PAUSED = 2
|
EVENT_LIFECYCLE_PAUSED = 2
|
||||||
EVENT_LIFECYCLE_RESUMED = 3
|
EVENT_LIFECYCLE_RESUMED = 3
|
||||||
EVENT_LIFECYCLE_SUSPENDED = 4
|
EVENT_LIFECYCLE_SUSPENDED = 4
|
||||||
|
EVENT_LIFECYCLE_POSTCOPY_STARTED = 5
|
||||||
|
EVENT_LIFECYCLE_MIGRATION_COMPLETED = 6
|
||||||
|
|
||||||
|
|
||||||
NAMES = {
|
NAMES = {
|
||||||
EVENT_LIFECYCLE_STARTED: _('Started'),
|
EVENT_LIFECYCLE_STARTED: _('Started'),
|
||||||
|
@ -36,6 +39,8 @@ NAMES = {
|
||||||
EVENT_LIFECYCLE_PAUSED: _('Paused'),
|
EVENT_LIFECYCLE_PAUSED: _('Paused'),
|
||||||
EVENT_LIFECYCLE_RESUMED: _('Resumed'),
|
EVENT_LIFECYCLE_RESUMED: _('Resumed'),
|
||||||
EVENT_LIFECYCLE_SUSPENDED: _('Suspended'),
|
EVENT_LIFECYCLE_SUSPENDED: _('Suspended'),
|
||||||
|
EVENT_LIFECYCLE_POSTCOPY_STARTED: _('Postcopy started'),
|
||||||
|
EVENT_LIFECYCLE_MIGRATION_COMPLETED: _('Migration completed'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -170,7 +170,20 @@ class Host(object):
|
||||||
elif event == libvirt.VIR_DOMAIN_EVENT_STARTED:
|
elif event == libvirt.VIR_DOMAIN_EVENT_STARTED:
|
||||||
transition = virtevent.EVENT_LIFECYCLE_STARTED
|
transition = virtevent.EVENT_LIFECYCLE_STARTED
|
||||||
elif event == libvirt.VIR_DOMAIN_EVENT_SUSPENDED:
|
elif event == libvirt.VIR_DOMAIN_EVENT_SUSPENDED:
|
||||||
transition = virtevent.EVENT_LIFECYCLE_PAUSED
|
# NOTE(siva_krishnan): We have to check if
|
||||||
|
# VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY and
|
||||||
|
# VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED exist since the current
|
||||||
|
# minimum version of libvirt (1.2.9) don't have those attributes.
|
||||||
|
# This check can be removed once MIN_LIBVIRT_VERSION is bumped to
|
||||||
|
# at least 1.3.3.
|
||||||
|
if (hasattr(libvirt, 'VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY') and
|
||||||
|
detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY):
|
||||||
|
transition = virtevent.EVENT_LIFECYCLE_POSTCOPY_STARTED
|
||||||
|
elif (hasattr(libvirt, 'VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED') and
|
||||||
|
detail == libvirt.VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED):
|
||||||
|
transition = virtevent.EVENT_LIFECYCLE_MIGRATION_COMPLETED
|
||||||
|
else:
|
||||||
|
transition = virtevent.EVENT_LIFECYCLE_PAUSED
|
||||||
elif event == libvirt.VIR_DOMAIN_EVENT_RESUMED:
|
elif event == libvirt.VIR_DOMAIN_EVENT_RESUMED:
|
||||||
transition = virtevent.EVENT_LIFECYCLE_RESUMED
|
transition = virtevent.EVENT_LIFECYCLE_RESUMED
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue