libvirt: wait for interface detach from the guest
The test_reassign_port_between_servers test in Tempest creates a port in neutron and two servers. It attaches the port to the first server and then quickly detaches it and waits for the port.device_id to be unbound from the server. Then it repeats that for the second server. The interface detach from the guest is asynchronous and happens before nova unbinds the port, so there is a race where the port's device_id is unset but the interface is still on the first guest when we try to attach to the second guest, which fails. This is a latent bug, but apparently has been tickled by the move to our neutron CI jobs to use ubuntu xenial nodes. The fix is to add a detach and retry loop on the interface detach on the guest so that we can wait until the interface is gone from the guest before nova unbinds the port in neutron, which is what the Tempest test is waiting for. Then the device should be available for attaching to the second guest. This is similar to what we do with detaching volumes. Closes-Bug: #1607714 Change-Id: Ic04aad8923ea2edf1d16e32c208cd41fdf898834
This commit is contained in:
parent
1ecd71b08d
commit
a3b3e8d831
|
@ -16611,7 +16611,8 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
|
|||
power_state.SHUTDOWN,
|
||||
expected_flags=(fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG))
|
||||
|
||||
def _test_detach_interface(self, power_state, expected_flags):
|
||||
def _test_detach_interface(self, power_state, expected_flags,
|
||||
device_not_found=False):
|
||||
# setup some mocks
|
||||
instance = self._create_instance()
|
||||
network_info = _fake_network_info(self, 1)
|
||||
|
@ -16640,13 +16641,27 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
|
|||
<target dev='tap12345678'/>
|
||||
</interface>""")
|
||||
|
||||
if device_not_found:
|
||||
# This will trigger detach_device_with_retry to raise
|
||||
# DeviceNotFound
|
||||
get_interface_calls = [expected_cfg, None]
|
||||
else:
|
||||
get_interface_calls = [expected_cfg, expected_cfg, None]
|
||||
|
||||
with test.nested(
|
||||
mock.patch.object(host.Host, 'get_guest', return_value=guest),
|
||||
mock.patch.object(self.drvr.vif_driver, 'get_config',
|
||||
return_value=expected_cfg),
|
||||
mock.patch.object(domain, 'detachDeviceFlags')
|
||||
# This is called multiple times in a retry loop so we use a
|
||||
# side_effect to simulate the calls to stop the loop.
|
||||
mock.patch.object(guest, 'get_interface_by_cfg',
|
||||
side_effect=get_interface_calls),
|
||||
mock.patch.object(domain, 'detachDeviceFlags'),
|
||||
mock.patch('nova.virt.libvirt.driver.LOG.warning')
|
||||
) as (
|
||||
mock_get_guest, mock_get_config, mock_detach_device_flags
|
||||
mock_get_guest, mock_get_config,
|
||||
mock_get_interface, mock_detach_device_flags,
|
||||
mock_warning
|
||||
):
|
||||
# run the detach method
|
||||
self.drvr.detach_interface(self.context, instance, network_info[0])
|
||||
|
@ -16657,8 +16672,16 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
|
|||
instance, network_info[0], test.MatchType(objects.ImageMeta),
|
||||
test.MatchType(objects.Flavor), CONF.libvirt.virt_type,
|
||||
self.drvr._host)
|
||||
mock_detach_device_flags.assert_called_once_with(
|
||||
expected_cfg.to_xml(), flags=expected_flags)
|
||||
mock_get_interface.assert_has_calls(
|
||||
[mock.call(expected_cfg) for x in range(len(get_interface_calls))])
|
||||
|
||||
if device_not_found:
|
||||
mock_detach_device_flags.assert_not_called()
|
||||
self.assertTrue(mock_warning.called)
|
||||
else:
|
||||
mock_detach_device_flags.assert_called_once_with(
|
||||
expected_cfg.to_xml(), flags=expected_flags)
|
||||
mock_warning.assert_not_called()
|
||||
|
||||
def test_detach_interface_with_running_instance(self):
|
||||
self._test_detach_interface(
|
||||
|
@ -16666,6 +16689,15 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
|
|||
expected_flags=(fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG |
|
||||
fakelibvirt.VIR_DOMAIN_AFFECT_LIVE))
|
||||
|
||||
def test_detach_interface_with_running_instance_device_not_found(self):
|
||||
"""Tests that the interface is detached before we try to detach it.
|
||||
"""
|
||||
self._test_detach_interface(
|
||||
power_state.RUNNING,
|
||||
expected_flags=(fakelibvirt.VIR_DOMAIN_AFFECT_CONFIG |
|
||||
fakelibvirt.VIR_DOMAIN_AFFECT_LIVE),
|
||||
device_not_found=True)
|
||||
|
||||
def test_detach_interface_with_pause_instance(self):
|
||||
self._test_detach_interface(
|
||||
power_state.PAUSED,
|
||||
|
@ -16750,7 +16782,9 @@ class LibvirtDriverTestCase(test.NoDBTestCase):
|
|||
fakelibvirt.VIR_DOMAIN_AFFECT_LIVE)
|
||||
domain.detachDeviceFlags(expected.to_xml(), flags=expected_flags)
|
||||
self.mox.ReplayAll()
|
||||
self.drvr.detach_interface(self.context, instance, network_info[0])
|
||||
with mock.patch.object(libvirt_guest.Guest, 'get_interface_by_cfg',
|
||||
side_effect=[expected, expected, None]):
|
||||
self.drvr.detach_interface(self.context, instance, network_info[0])
|
||||
self.mox.VerifyAll()
|
||||
|
||||
@mock.patch('nova.virt.libvirt.utils.write_to_file')
|
||||
|
|
|
@ -18,6 +18,7 @@ import sys
|
|||
|
||||
import mock
|
||||
from oslo_utils import encodeutils
|
||||
import six
|
||||
|
||||
from nova import context
|
||||
from nova import exception
|
||||
|
@ -272,9 +273,19 @@ class GuestTestCase(test.NoDBTestCase):
|
|||
|
||||
def test_detach_device_with_retry_device_not_found(self):
|
||||
get_config = mock.Mock(return_value=None)
|
||||
self.assertRaises(
|
||||
ex = self.assertRaises(
|
||||
exception.DeviceNotFound, self.guest.detach_device_with_retry,
|
||||
get_config, "/dev/vdb", persistent=True, live=True)
|
||||
self.assertIn("/dev/vdb", six.text_type(ex))
|
||||
|
||||
def test_detach_device_with_retry_device_not_found_alt_name(self):
|
||||
"""Tests to make sure we use the alternative name in errors."""
|
||||
get_config = mock.Mock(return_value=None)
|
||||
ex = self.assertRaises(
|
||||
exception.DeviceNotFound, self.guest.detach_device_with_retry,
|
||||
get_config, mock.sentinel.device, persistent=True, live=True,
|
||||
alternative_device_name='foo')
|
||||
self.assertIn('foo', six.text_type(ex))
|
||||
|
||||
@mock.patch.object(libvirt_guest.Guest, "detach_device")
|
||||
def test_detach_device_with_retry_operation_failed(self, mock_detach):
|
||||
|
|
|
@ -1407,7 +1407,17 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
|
||||
state = guest.get_power_state(self._host)
|
||||
live = state in (power_state.RUNNING, power_state.PAUSED)
|
||||
guest.detach_device(interface, persistent=True, live=live)
|
||||
# Now we are going to loop until the interface is detached or we
|
||||
# timeout.
|
||||
wait_for_detach = guest.detach_device_with_retry(
|
||||
guest.get_interface_by_cfg, cfg, persistent=True, live=live,
|
||||
alternative_device_name=vif.get('address'))
|
||||
wait_for_detach()
|
||||
except exception.DeviceNotFound:
|
||||
# The interface is gone so just log it as a warning.
|
||||
LOG.warning(_LW('Detaching interface %(mac)s failed because '
|
||||
'the device is no longer found on the guest.'),
|
||||
{'mac': vif.get('address')}, instance=instance)
|
||||
except libvirt.libvirtError as ex:
|
||||
error_code = ex.get_error_code()
|
||||
if error_code == libvirt.VIR_ERR_NO_DOMAIN:
|
||||
|
@ -1434,7 +1444,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
|||
instance_uuid=instance.uuid)
|
||||
|
||||
# The interface is gone so just log it as a warning.
|
||||
LOG.warning(_LW('Detaching interface %(mac)s failed because '
|
||||
LOG.warning(_LW('Detaching interface %(mac)s failed because '
|
||||
'the device is no longer found on the guest.'),
|
||||
{'mac': mac}, instance=instance)
|
||||
|
||||
|
|
|
@ -364,7 +364,8 @@ class Guest(object):
|
|||
def detach_device_with_retry(self, get_device_conf_func, device,
|
||||
persistent, live, max_retry_count=7,
|
||||
inc_sleep_time=2,
|
||||
max_sleep_time=30):
|
||||
max_sleep_time=30,
|
||||
alternative_device_name=None):
|
||||
"""Detaches a device from the guest. After the initial detach request,
|
||||
a function is returned which can be used to ensure the device is
|
||||
successfully removed from the guest domain (retrying the removal as
|
||||
|
@ -385,7 +386,11 @@ class Guest(object):
|
|||
time will not be incremented using param
|
||||
inc_sleep_time. On reaching this threshold,
|
||||
max_sleep_time will be used as the sleep time.
|
||||
:param alternative_device_name: This is an alternative identifier for
|
||||
the device if device is not an ID, used solely for error messages.
|
||||
"""
|
||||
alternative_device_name = alternative_device_name or device
|
||||
|
||||
def _try_detach_device(conf, persistent=False, live=False):
|
||||
# Raise DeviceNotFound if the device isn't found during detach
|
||||
try:
|
||||
|
@ -398,17 +403,19 @@ class Guest(object):
|
|||
if 'not found' in errmsg:
|
||||
# This will be raised if the live domain
|
||||
# detach fails because the device is not found
|
||||
raise exception.DeviceNotFound(device=device)
|
||||
raise exception.DeviceNotFound(
|
||||
device=alternative_device_name)
|
||||
elif errcode == libvirt.VIR_ERR_INVALID_ARG:
|
||||
errmsg = ex.get_error_message()
|
||||
if 'no target device' in errmsg:
|
||||
# This will be raised if the persistent domain
|
||||
# detach fails because the device is not found
|
||||
raise exception.DeviceNotFound(device=device)
|
||||
raise exception.DeviceNotFound(
|
||||
device=alternative_device_name)
|
||||
|
||||
conf = get_device_conf_func(device)
|
||||
if conf is None:
|
||||
raise exception.DeviceNotFound(device=device)
|
||||
raise exception.DeviceNotFound(device=alternative_device_name)
|
||||
|
||||
_try_detach_device(conf, persistent, live)
|
||||
|
||||
|
@ -424,8 +431,8 @@ class Guest(object):
|
|||
_try_detach_device(config, persistent=False, live=live)
|
||||
|
||||
reason = _("Unable to detach from guest transient domain.")
|
||||
raise exception.DeviceDetachFailed(device=device,
|
||||
reason=reason)
|
||||
raise exception.DeviceDetachFailed(
|
||||
device=alternative_device_name, reason=reason)
|
||||
|
||||
return _do_wait_and_retry_detach
|
||||
|
||||
|
|
Loading…
Reference in New Issue