Merge "libvirt: handle code=38 + sigkill (ebusy) in destroy()" into stable/kilo
This commit is contained in:
commit
b8c4f1bce3
|
@ -8366,6 +8366,54 @@ class LibvirtConnTestCase(test.NoDBTestCase):
|
|||
self.assertRaises(fakelibvirt.libvirtError, conn._destroy,
|
||||
instance)
|
||||
|
||||
def test_private_destroy_ebusy_timeout(self):
|
||||
# Tests that _destroy will retry 3 times to destroy the guest when an
|
||||
# EBUSY is raised, but eventually times out and raises the libvirtError
|
||||
ex = fakelibvirt.make_libvirtError(
|
||||
fakelibvirt.libvirtError,
|
||||
("Failed to terminate process 26425 with SIGKILL: "
|
||||
"Device or resource busy"),
|
||||
error_code=fakelibvirt.VIR_ERR_SYSTEM_ERROR,
|
||||
int1=errno.EBUSY)
|
||||
|
||||
instance = objects.Instance(**self.test_instance)
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||
|
||||
with mock.patch.object(drvr._host, 'get_domain') as mock_get_domain:
|
||||
mock_domain = mock.MagicMock()
|
||||
mock_domain.ID.return_value = 1
|
||||
mock_get_domain.return_value = mock_domain
|
||||
mock_domain.destroy.side_effect = ex
|
||||
|
||||
self.assertRaises(fakelibvirt.libvirtError, drvr._destroy,
|
||||
instance)
|
||||
|
||||
self.assertEqual(3, mock_domain.destroy.call_count)
|
||||
|
||||
def test_private_destroy_ebusy_multiple_attempt_ok(self):
|
||||
# Tests that the _destroy attempt loop is broken when EBUSY is no
|
||||
# longer raised.
|
||||
ex = fakelibvirt.make_libvirtError(
|
||||
fakelibvirt.libvirtError,
|
||||
("Failed to terminate process 26425 with SIGKILL: "
|
||||
"Device or resource busy"),
|
||||
error_code=fakelibvirt.VIR_ERR_SYSTEM_ERROR,
|
||||
int1=errno.EBUSY)
|
||||
|
||||
inst_info = hardware.InstanceInfo(power_state.SHUTDOWN, id=1)
|
||||
instance = objects.Instance(**self.test_instance)
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||
|
||||
with mock.patch.object(drvr._host, 'get_domain') as mock_get_domain, \
|
||||
mock.patch.object(drvr, 'get_info', return_value=inst_info):
|
||||
mock_domain = mock.MagicMock()
|
||||
mock_domain.ID.return_value = 1
|
||||
mock_get_domain.return_value = mock_domain
|
||||
mock_domain.destroy.side_effect = ex, None
|
||||
drvr._destroy(instance)
|
||||
|
||||
self.assertEqual(2, mock_domain.destroy.call_count)
|
||||
|
||||
def test_undefine_domain_with_not_found_instance(self):
|
||||
def fake_get_domain(self, instance):
|
||||
raise exception.InstanceNotFound(instance_id=instance.name)
|
||||
|
|
|
@ -617,7 +617,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
rootfs_dev = instance.system_metadata.get('rootfs_device_name')
|
||||
disk.teardown_container(container_dir, rootfs_dev)
|
||||
|
||||
def _destroy(self, instance):
|
||||
def _destroy(self, instance, attempt=1):
|
||||
try:
|
||||
virt_dom = self._host.get_domain(instance)
|
||||
except exception.InstanceNotFound:
|
||||
|
@ -660,6 +660,34 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
instance=instance)
|
||||
reason = _("operation time out")
|
||||
raise exception.InstancePowerOffFailure(reason=reason)
|
||||
elif errcode == libvirt.VIR_ERR_SYSTEM_ERROR:
|
||||
if e.get_int1() == errno.EBUSY:
|
||||
# NOTE(danpb): When libvirt kills a process it sends it
|
||||
# SIGTERM first and waits 10 seconds. If it hasn't gone
|
||||
# it sends SIGKILL and waits another 5 seconds. If it
|
||||
# still hasn't gone then you get this EBUSY error.
|
||||
# Usually when a QEMU process fails to go away upon
|
||||
# SIGKILL it is because it is stuck in an
|
||||
# uninterruptable kernel sleep waiting on I/O from
|
||||
# some non-responsive server.
|
||||
# Given the CPU load of the gate tests though, it is
|
||||
# conceivable that the 15 second timeout is too short,
|
||||
# particularly if the VM running tempest has a high
|
||||
# steal time from the cloud host. ie 15 wallclock
|
||||
# seconds may have passed, but the VM might have only
|
||||
# have a few seconds of scheduled run time.
|
||||
LOG.warn(_LW('Error from libvirt during destroy. '
|
||||
'Code=%(errcode)s Error=%(e)s; '
|
||||
'attempt %(attempt)d of 3'),
|
||||
{'errcode': errcode, 'e': e,
|
||||
'attempt': attempt},
|
||||
instance=instance)
|
||||
with excutils.save_and_reraise_exception() as ctxt:
|
||||
# Try up to 3 times before giving up.
|
||||
if attempt < 3:
|
||||
ctxt.reraise = False
|
||||
self._destroy(instance, attempt + 1)
|
||||
return
|
||||
|
||||
if not is_okay:
|
||||
with excutils.save_and_reraise_exception():
|
||||
|
|
Loading…
Reference in New Issue