Merge "Extend live-migration-force-complete to use postcopy if available"

This commit is contained in:
Jenkins 2016-06-29 17:51:08 +00:00 committed by Gerrit Code Review
commit 502a948635
6 changed files with 377 additions and 42 deletions

View File

@ -8346,6 +8346,7 @@ class LibvirtConnTestCase(test.NoDBTestCase):
EXPECT_FAILURE = 2
EXPECT_ABORT = 3
@mock.patch.object(libvirt_guest.Guest, "migrate_start_postcopy")
@mock.patch.object(time, "time")
@mock.patch.object(time, "sleep",
side_effect=lambda x: eventlet.sleep(0))
@ -8369,9 +8370,12 @@ class LibvirtConnTestCase(test.NoDBTestCase):
mock_conn,
mock_sleep,
mock_time,
mock_postcopy_switch,
current_mig_status=None,
expected_mig_status=None,
scheduled_action=None,
scheduled_action_executed=False):
scheduled_action_executed=False,
block_migration=False):
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
instance = objects.Instance(**self.test_instance)
drvr.active_migrations[instance.uuid] = deque()
@ -8390,7 +8394,8 @@ class LibvirtConnTestCase(test.NoDBTestCase):
elif rec == "domain-stop":
dom.destroy()
elif rec == "force_complete":
drvr.active_migrations[instance.uuid].append("pause")
drvr.active_migrations[instance.uuid].append(
"force-complete")
else:
if len(time_records) > 0:
time_records.pop(0)
@ -8411,7 +8416,11 @@ class LibvirtConnTestCase(test.NoDBTestCase):
dest = mock.sentinel.migrate_dest
migration = objects.Migration(context=self.context, id=1)
migrate_data = objects.LibvirtLiveMigrateData(
migration=migration)
migration=migration, block_migration=block_migration)
if current_mig_status:
migrate_data.migration.status = current_mig_status
migrate_data.migration.save()
fake_post_method = mock.MagicMock()
fake_recover_method = mock.MagicMock()
@ -8426,9 +8435,13 @@ class LibvirtConnTestCase(test.NoDBTestCase):
if scheduled_action_executed:
if scheduled_action == 'pause':
self.assertTrue(mock_pause.called)
if scheduled_action == 'postcopy_switch':
self.assertTrue(mock_postcopy_switch.called)
else:
if scheduled_action == 'pause':
self.assertFalse(mock_pause.called)
if scheduled_action == 'postcopy_switch':
self.assertFalse(mock_postcopy_switch.called)
mock_mig_save.assert_called_with()
if expect_result == self.EXPECT_SUCCESS:
@ -8497,6 +8510,7 @@ class LibvirtConnTestCase(test.NoDBTestCase):
self._test_live_migration_monitoring(domain_info_records, [],
self.EXPECT_SUCCESS,
current_mig_status="running",
scheduled_action="pause",
scheduled_action_executed=True)
@ -8522,6 +8536,7 @@ class LibvirtConnTestCase(test.NoDBTestCase):
self._test_live_migration_monitoring(domain_info_records, [],
self.EXPECT_SUCCESS,
current_mig_status="preparing",
scheduled_action="pause",
scheduled_action_executed=True)
@ -8547,6 +8562,7 @@ class LibvirtConnTestCase(test.NoDBTestCase):
self._test_live_migration_monitoring(domain_info_records, [],
self.EXPECT_SUCCESS,
current_mig_status="completed",
scheduled_action="pause",
scheduled_action_executed=False)
@ -8571,6 +8587,7 @@ class LibvirtConnTestCase(test.NoDBTestCase):
self._test_live_migration_monitoring(domain_info_records, [],
self.EXPECT_FAILURE,
current_mig_status="cancelled",
expected_mig_status='cancelled',
scheduled_action="pause",
scheduled_action_executed=False)
@ -8599,6 +8616,211 @@ class LibvirtConnTestCase(test.NoDBTestCase):
scheduled_action="pause",
scheduled_action_executed=False)
@mock.patch.object(libvirt_driver.LibvirtDriver,
"_is_post_copy_enabled")
def test_live_migration_handle_postcopy_normal(self,
mock_postcopy_enabled):
# A normal sequence where see all the normal job states, and postcopy
# switch scheduled in between VIR_DOMAIN_JOB_UNBOUNDED
mock_postcopy_enabled.return_value = True
domain_info_records = [
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_NONE),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
"force_complete",
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
"thread-finish",
"domain-stop",
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_COMPLETED),
]
self._test_live_migration_monitoring(domain_info_records, [],
self.EXPECT_SUCCESS,
current_mig_status="running",
scheduled_action="postcopy_switch",
scheduled_action_executed=True)
@mock.patch.object(libvirt_driver.LibvirtDriver,
"_is_post_copy_enabled")
def test_live_migration_handle_postcopy_on_start(self,
mock_postcopy_enabled):
# A normal sequence where see all the normal job states, and postcopy
# switch scheduled in case of job type VIR_DOMAIN_JOB_NONE and
# finish_event is not ready yet
mock_postcopy_enabled.return_value = True
domain_info_records = [
"force_complete",
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_NONE),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
"thread-finish",
"domain-stop",
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_COMPLETED),
]
self._test_live_migration_monitoring(domain_info_records, [],
self.EXPECT_SUCCESS,
current_mig_status="preparing",
scheduled_action="postcopy_switch",
scheduled_action_executed=True)
@mock.patch.object(libvirt_driver.LibvirtDriver,
"_is_post_copy_enabled")
def test_live_migration_handle_postcopy_on_finish(self,
mock_postcopy_enabled):
# A normal sequence where see all the normal job states, and postcopy
# switch scheduled in case of job type VIR_DOMAIN_JOB_NONE and
# finish_event is ready
mock_postcopy_enabled.return_value = True
domain_info_records = [
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_NONE),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
"thread-finish",
"domain-stop",
"force_complete",
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_COMPLETED),
]
self._test_live_migration_monitoring(domain_info_records, [],
self.EXPECT_SUCCESS,
current_mig_status="completed",
scheduled_action="postcopy_switch",
scheduled_action_executed=False)
@mock.patch.object(libvirt_driver.LibvirtDriver,
"_is_post_copy_enabled")
def test_live_migration_handle_postcopy_on_cancel(self,
mock_postcopy_enabled):
# A normal sequence where see all the normal job states, and postcopy
# scheduled in case of job type VIR_DOMAIN_JOB_CANCELLED
mock_postcopy_enabled.return_value = True
domain_info_records = [
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_NONE),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
"thread-finish",
"domain-stop",
"force_complete",
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_CANCELLED),
]
self._test_live_migration_monitoring(domain_info_records, [],
self.EXPECT_FAILURE,
current_mig_status="cancelled",
expected_mig_status='cancelled',
scheduled_action="postcopy_switch",
scheduled_action_executed=False)
@mock.patch.object(libvirt_driver.LibvirtDriver,
"_is_post_copy_enabled")
def test_live_migration_handle_pause_on_postcopy(self,
mock_postcopy_enabled):
# A normal sequence where see all the normal job states, and pause
# scheduled after migration switched to postcopy
mock_postcopy_enabled.return_value = True
domain_info_records = [
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_NONE),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
"force_complete",
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
"thread-finish",
"domain-stop",
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_COMPLETED),
]
self._test_live_migration_monitoring(domain_info_records, [],
self.EXPECT_SUCCESS,
current_mig_status="running (post-copy)",
scheduled_action="pause",
scheduled_action_executed=False)
@mock.patch.object(libvirt_driver.LibvirtDriver,
"_is_post_copy_enabled")
def test_live_migration_handle_postcopy_on_postcopy(self,
mock_postcopy_enabled):
# A normal sequence where see all the normal job states, and pause
# scheduled after migration switched to postcopy
mock_postcopy_enabled.return_value = True
domain_info_records = [
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_NONE),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
"force_complete",
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
"thread-finish",
"domain-stop",
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_COMPLETED),
]
self._test_live_migration_monitoring(domain_info_records, [],
self.EXPECT_SUCCESS,
current_mig_status="running (post-copy)",
scheduled_action="postcopy_switch",
scheduled_action_executed=False)
@mock.patch.object(libvirt_driver.LibvirtDriver,
"_is_post_copy_enabled")
def test_live_migration_handle_postcopy_on_failure(self,
mock_postcopy_enabled):
# A normal sequence where see all the normal job states, and postcopy
# scheduled in case of job type VIR_DOMAIN_JOB_FAILED
mock_postcopy_enabled.return_value = True
domain_info_records = [
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_NONE),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_UNBOUNDED),
"thread-finish",
"domain-stop",
"force_complete",
libvirt_guest.JobInfo(
type=fakelibvirt.VIR_DOMAIN_JOB_FAILED),
]
self._test_live_migration_monitoring(domain_info_records, [],
self.EXPECT_FAILURE,
scheduled_action="postcopy_switch",
scheduled_action_executed=False)
def test_live_migration_monitor_success_race(self):
# A normalish sequence but we're too slow to see the
# completed job state
@ -14266,7 +14488,7 @@ class LibvirtConnTestCase(test.NoDBTestCase):
drvr.active_migrations[instance.uuid] = deque()
drvr.live_migration_force_complete(instance)
self.assertEqual(
1, drvr.active_migrations[instance.uuid].count("pause"))
1, drvr.active_migrations[instance.uuid].count("force-complete"))
@mock.patch.object(host.Host, "get_connection")
@mock.patch.object(fakelibvirt.virDomain, "abortJob")

View File

@ -429,54 +429,117 @@ class MigrationMonitorTestCase(test.NoDBTestCase):
mock_msave.assert_called_once_with()
mock_isave.assert_called_once_with()
@mock.patch.object(libvirt_guest.Guest, "migrate_start_postcopy")
@mock.patch.object(libvirt_guest.Guest, "pause")
def test_live_migration_run_tasks_pause(self, mock_pause):
tasks = deque()
tasks.append("pause")
active_migrations = {self.instance.uuid: tasks}
on_migration_failure = deque()
migration.run_tasks(self.guest, self.instance,
active_migrations, on_migration_failure)
mock_pause.assert_called_once_with()
self.assertEqual(len(on_migration_failure), 1)
self.assertEqual(on_migration_failure.pop(), "unpause")
@mock.patch.object(libvirt_guest.Guest, "pause")
def test_live_migration_run_tasks_empty_tasks(self, mock_pause):
def test_live_migration_run_tasks_empty_tasks(self, mock_pause,
mock_postcopy):
tasks = deque()
active_migrations = {self.instance.uuid: tasks}
on_migration_failure = deque()
mig = objects.Migration(id=1, status="running")
migration.run_tasks(self.guest, self.instance,
active_migrations, on_migration_failure)
active_migrations, on_migration_failure,
mig, False)
self.assertFalse(mock_pause.called)
self.assertFalse(mock_postcopy.called)
self.assertEqual(len(on_migration_failure), 0)
@mock.patch.object(libvirt_guest.Guest, "migrate_start_postcopy")
@mock.patch.object(libvirt_guest.Guest, "pause")
def test_live_migration_run_tasks_no_tasks(self, mock_pause):
def test_live_migration_run_tasks_no_tasks(self, mock_pause,
mock_postcopy):
active_migrations = {}
on_migration_failure = deque()
mig = objects.Migration(id=1, status="running")
migration.run_tasks(self.guest, self.instance,
active_migrations, on_migration_failure)
active_migrations, on_migration_failure,
mig, False)
self.assertFalse(mock_pause.called)
self.assertFalse(mock_postcopy.called)
self.assertEqual(len(on_migration_failure), 0)
@mock.patch.object(libvirt_guest.Guest, "migrate_start_postcopy")
@mock.patch.object(libvirt_guest.Guest, "pause")
def test_live_migration_run_tasks_no_pause(self, mock_pause):
def test_live_migration_run_tasks_no_force_complete(self, mock_pause,
mock_postcopy):
tasks = deque()
# Test to ensure unknown tasks are ignored
tasks.append("wibble")
active_migrations = {self.instance.uuid: tasks}
on_migration_failure = deque()
migration.run_tasks(self.guest, self.instance,
active_migrations, on_migration_failure)
mig = objects.Migration(id=1, status="running")
migration.run_tasks(self.guest, self.instance,
active_migrations, on_migration_failure,
mig, False)
self.assertFalse(mock_pause.called)
self.assertFalse(mock_postcopy.called)
self.assertEqual(len(on_migration_failure), 0)
@mock.patch.object(libvirt_guest.Guest, "migrate_start_postcopy")
@mock.patch.object(libvirt_guest.Guest, "pause")
def test_live_migration_run_tasks_force_complete(self, mock_pause,
mock_postcopy):
tasks = deque()
tasks.append("force-complete")
active_migrations = {self.instance.uuid: tasks}
on_migration_failure = deque()
mig = objects.Migration(id=1, status="running")
migration.run_tasks(self.guest, self.instance,
active_migrations, on_migration_failure,
mig, False)
mock_pause.assert_called_once_with()
self.assertFalse(mock_postcopy.called)
self.assertEqual(len(on_migration_failure), 1)
self.assertEqual(on_migration_failure.pop(), "unpause")
@mock.patch.object(libvirt_guest.Guest, "migrate_start_postcopy")
@mock.patch.object(libvirt_guest.Guest, "pause")
def test_live_migration_run_tasks_force_complete_postcopy_running(self,
mock_pause, mock_postcopy):
tasks = deque()
tasks.append("force-complete")
active_migrations = {self.instance.uuid: tasks}
on_migration_failure = deque()
mig = objects.Migration(id=1, status="running (post-copy)")
migration.run_tasks(self.guest, self.instance,
active_migrations, on_migration_failure,
mig, True)
self.assertFalse(mock_pause.called)
self.assertFalse(mock_postcopy.called)
self.assertEqual(len(on_migration_failure), 0)
@mock.patch.object(objects.Migration, "save")
@mock.patch.object(libvirt_guest.Guest, "migrate_start_postcopy")
@mock.patch.object(libvirt_guest.Guest, "pause")
def test_live_migration_run_tasks_force_complete_postcopy(self,
mock_pause, mock_postcopy, mock_msave):
tasks = deque()
tasks.append("force-complete")
active_migrations = {self.instance.uuid: tasks}
on_migration_failure = deque()
mig = objects.Migration(id=1, status="running")
migration.run_tasks(self.guest, self.instance,
active_migrations, on_migration_failure,
mig, True)
mock_postcopy.assert_called_once_with()
self.assertFalse(mock_pause.called)
self.assertEqual(len(on_migration_failure), 0)

View File

@ -6097,6 +6097,11 @@ class LibvirtDriver(driver.ComputeDriver):
return ram_gb + disk_gb
def _get_migration_flags(self, is_block_migration):
if is_block_migration:
return self._block_migration_flags
return self._live_migration_flags
def _live_migration_monitor(self, context, instance, guest,
dest, post_method,
recover_method, block_migration,
@ -6111,10 +6116,14 @@ class LibvirtDriver(driver.ComputeDriver):
migration = migrate_data.migration
curdowntime = None
migration_flags = self._get_migration_flags(
migrate_data.block_migration)
n = 0
start = time.time()
progress_time = start
progress_watermark = None
is_post_copy_enabled = self._is_post_copy_enabled(migration_flags)
while True:
info = guest.get_job_info()
@ -6141,7 +6150,9 @@ class LibvirtDriver(driver.ComputeDriver):
# the operation, change max bandwidth
libvirt_migrate.run_tasks(guest, instance,
self.active_migrations,
on_migration_failure)
on_migration_failure,
migration,
is_post_copy_enabled)
now = time.time()
elapsed = now - start
@ -6318,12 +6329,15 @@ class LibvirtDriver(driver.ComputeDriver):
LOG.debug("Live migration monitoring is all done",
instance=instance)
def _is_post_copy_enabled(self, migration_flags):
if self._is_post_copy_available():
if (migration_flags & libvirt.VIR_MIGRATE_POSTCOPY) != 0:
return True
return False
def live_migration_force_complete(self, instance):
# NOTE(pkoniszewski): currently only pause during live migration is
# supported to force live migration to complete, so just try to pause
# the instance
try:
self.active_migrations[instance.uuid].append('pause')
self.active_migrations[instance.uuid].append('force-complete')
except KeyError:
raise exception.NoActiveMigrationForInstance(
instance_id=instance.uuid)

View File

@ -539,6 +539,10 @@ class Guest(object):
"""
self._domain.migrateSetMaxDowntime(mstime)
def migrate_start_postcopy(self):
"""Switch running live migration to post-copy mode"""
self._domain.migrateStartPostCopy()
def get_job_info(self):
"""Get job info for the domain

View File

@ -283,34 +283,62 @@ def save_stats(instance, migration, info, remaining):
instance.save()
def run_tasks(guest, instance, active_migrations, on_migration_failure):
def trigger_postcopy_switch(guest, instance, migration):
try:
guest.migrate_start_postcopy()
except libvirt.libvirtError as e:
LOG.warning(_LW("Failed to switch to post-copy live "
"migration: %s"),
e, instance=instance)
else:
# NOTE(ltomas): Change the migration status to indicate that
# it is in post-copy active mode, i.e., the VM at
# destination is the active one
LOG.info(_LI("Switching to post-copy migration mode"),
instance=instance)
migration.status = 'running (post-copy)'
migration.save()
def run_tasks(guest, instance, active_migrations, on_migration_failure,
migration, is_post_copy_enabled):
"""Run any pending migration tasks
:param guest: a nova.virt.libvirt.guest.Guest
:param instance: a nova.objects.Instance
:param active_migrations: dict of active migrations
:param on_migration_failure: queue of recovery tasks
:param migration: a nova.objects.Migration
:param is_post_copy_enabled: True if post-copy can be used
Run any pending migration tasks queued against the
provided instance object. The active migrations dict
should use instance UUIDs for keys and a queue of
tasks as the values.
Currently the only valid task that can be requested
is "pause". Other tasks will be ignored
Currently the valid tasks that can be requested
are "pause" and "force-complete". Other tasks will
be ignored.
"""
tasks = active_migrations.get(instance.uuid, deque())
while tasks:
task = tasks.popleft()
if task == 'pause':
try:
guest.pause()
on_migration_failure.append("unpause")
except Exception as e:
LOG.warning(_LW("Failed to pause instance during "
"live-migration %s"),
e, instance=instance)
if task == 'force-complete':
if migration.status == 'running (post-copy)':
LOG.warning(_LW("Live-migration %s already switched "
"to post-copy mode."),
instance=instance)
elif is_post_copy_enabled:
trigger_postcopy_switch(guest, instance, migration)
else:
try:
guest.pause()
on_migration_failure.append("unpause")
except Exception as e:
LOG.warning(_LW("Failed to pause instance during "
"live-migration %s"),
e, instance=instance)
else:
LOG.warning(_LW("Unknown migration task '%(task)s'"),
{"task": task}, instance=instance)

View File

@ -3,4 +3,8 @@ features:
- New configuration option live_migration_permit_post_copy
has been added to start live migrations in a way that allows
nova to switch an on-going live migration to post-copy mode.
Requires libvirt>=1.3.3 and QEMU>=2.5.0.
Requires libvirt>=1.3.3 and QEMU>=2.5.0. If post copy is
permitted and version requirements are met it also changes
behaviour of 'live_migration_force_complete', so that it
switches on-going live migration to post-copy mode instead
of pausing an instance during live migration.