Wait for cluster node to be available

After a reboot, the Failover Cluster WMI provider may be unavailable.
This change will wait for it when initializing clusterutils, having
a configurable timeout defaulting to infinite.

Closes-Bug: #1841779

Change-Id: Id7f2d65cb0307bc952f1e6a2724295ea524835f3
(cherry picked from commit b6ce9b33dd)
This commit is contained in:
Lucian Petrut 2019-08-28 11:34:54 +03:00
parent 3d224b7704
commit c5450ed0b5
11 changed files with 150 additions and 56 deletions

View File

@ -102,7 +102,8 @@ def get_wrapped_function(function):
def retry_decorator(max_retry_count=5, timeout=None, inc_sleep_time=1,
max_sleep_time=1, exceptions=(), error_codes=(),
pass_retry_context=False):
pass_retry_context=False,
extract_err_code_func=None):
"""Retries invoking the decorated method in case of expected exceptions.
:param max_retry_count: The maximum number of retries performed. If 0, no
@ -126,6 +127,8 @@ def retry_decorator(max_retry_count=5, timeout=None, inc_sleep_time=1,
include a dict containing the 'prevent_retry'
field. If this field is set, no further retries
will be performed.
:param extract_err_code_func: Optional helper function that extracts the
error code from the exception.
"""
if isinstance(error_codes, six.integer_types):
@ -146,7 +149,11 @@ def retry_decorator(max_retry_count=5, timeout=None, inc_sleep_time=1,
return f(*args, **kwargs)
except exceptions as exc:
with excutils.save_and_reraise_exception() as ctxt:
err_code = getattr(exc, 'error_code', None)
if extract_err_code_func:
err_code = extract_err_code_func(exc)
else:
err_code = getattr(exc, 'error_code', None)
expected_err_code = (err_code in error_codes or not
error_codes)
@ -191,6 +198,37 @@ def retry_decorator(max_retry_count=5, timeout=None, inc_sleep_time=1,
return wrapper
def wmi_retry_decorator(exceptions=exceptions.x_wmi, **kwargs):
"""Retry decorator that can be used for specific WMI error codes.
This function will extract the error code from the hresult. Use
wmi_retry_decorator_hresult if you want the original hresult to
be checked.
"""
def err_code_func(exc):
com_error = getattr(exc, 'com_error', None)
if com_error:
return get_com_error_code(com_error)
return retry_decorator(extract_err_code_func=err_code_func,
exceptions=exceptions,
**kwargs)
def wmi_retry_decorator_hresult(exceptions=exceptions.x_wmi, **kwargs):
"""Retry decorator that can be used for specific WMI HRESULTs"""
def err_code_func(exc):
com_error = getattr(exc, 'com_error', None)
if com_error:
return get_com_error_hresult(com_error)
return retry_decorator(extract_err_code_func=err_code_func,
exceptions=exceptions,
**kwargs)
def get_ips(addr):
addr_info = socket.getaddrinfo(addr, None, 0, 0, 0)
# Returns IPv4 and IPv6 addresses, ordered by protocol family
@ -219,6 +257,11 @@ def avoid_blocking_call_decorator(f):
return wrapper
def hresult_to_err_code(hresult):
# The last 2 bytes of the hresult store the error code.
return hresult & 0xFFFF
def get_com_error_hresult(com_error):
try:
return ctypes.c_uint(com_error.excepinfo[5]).value
@ -226,6 +269,12 @@ def get_com_error_hresult(com_error):
LOG.debug("Unable to retrieve COM error hresult: %s", com_error)
def get_com_error_code(com_error):
hres = get_com_error_hresult(com_error)
if hres is not None:
return hresult_to_err_code(hres)
def _is_not_found_exc(exc):
hresult = get_com_error_hresult(exc.com_error)
return hresult == _WBEM_E_NOT_FOUND

View File

@ -36,6 +36,10 @@ os_win_opts = [
help='The default amount of seconds to wait when stopping '
'pending WMI jobs. Setting this value to 0 will '
'disable the timeout.'),
cfg.IntOpt('connect_cluster_timeout',
default=0,
help='The amount of time to wait for the Failover Cluster '
'service to be available.')
]
CONF = cfg.CONF

View File

@ -34,6 +34,7 @@ class FakeWMIExc(exceptions.x_wmi):
super(FakeWMIExc, self).__init__()
excepinfo = [None] * 5 + [hresult]
self.com_error = mock.Mock(excepinfo=excepinfo)
self.com_error.hresult = hresult
class BaseTestCase(base.BaseTestCase):

View File

@ -62,10 +62,11 @@ class UtilsTestCase(test_base.BaseTestCase):
self.assertEqual(('', ''), result)
def _get_fake_func_with_retry_decorator(self, side_effect,
decorator=_utils.retry_decorator,
*args, **kwargs):
func_side_effect = mock.Mock(side_effect=side_effect)
@_utils.retry_decorator(*args, **kwargs)
@decorator(*args, **kwargs)
def fake_func(*_args, **_kwargs):
return func_side_effect(*_args, **_kwargs)
@ -228,6 +229,38 @@ class UtilsTestCase(test_base.BaseTestCase):
else:
self.assertFalse(mock_execute.called)
@mock.patch.object(_utils, 'time')
@ddt.data(True, False)
def test_wmi_retry_decorator(self, expect_hres, mock_time):
expected_hres = 0x8007beef
expected_err_code = expected_hres if expect_hres else 0xbeef
other_hres = 0x80070001
max_retry_count = 5
# The second exception will contain an unexpected error code,
# in which case we expect the function to propagate the error.
expected_try_count = 2
side_effect = [test_base.FakeWMIExc(hresult=expected_hres),
test_base.FakeWMIExc(hresult=other_hres)]
decorator = (_utils.wmi_retry_decorator_hresult if expect_hres
else _utils.wmi_retry_decorator)
(fake_func,
fake_func_side_effect) = self._get_fake_func_with_retry_decorator(
error_codes=expected_err_code,
max_retry_count=max_retry_count,
decorator=decorator,
side_effect=side_effect)
self.assertRaises(test_base.FakeWMIExc,
fake_func,
mock.sentinel.arg,
kwarg=mock.sentinel.kwarg)
fake_func_side_effect.assert_has_calls(
[mock.call(mock.sentinel.arg, kwarg=mock.sentinel.kwarg)] *
expected_try_count)
def test_get_com_error_hresult(self):
fake_hres = -5
expected_hres = (1 << 32) + fake_hres
@ -242,17 +275,33 @@ class UtilsTestCase(test_base.BaseTestCase):
ret_val = _utils.get_com_error_hresult(None)
self.assertIsNone(ret_val)
@ddt.data(_utils._WBEM_E_NOT_FOUND, mock.sentinel.wbem_error)
def test_hresult_to_err_code(self):
# This could differ based on the error source.
# Only the last 2 bytes of the hresult the error code.
fake_file_exists_hres = -0x7ff8ffb0
file_exists_err_code = 0x50
ret_val = _utils.hresult_to_err_code(fake_file_exists_hres)
self.assertEqual(file_exists_err_code, ret_val)
@mock.patch.object(_utils, 'get_com_error_hresult')
def test_is_not_found_exc(self, hresult, mock_get_com_error_hresult):
mock_get_com_error_hresult.return_value = hresult
exc = mock.MagicMock()
@mock.patch.object(_utils, 'hresult_to_err_code')
def test_get_com_error_code(self, mock_hres_to_err_code, mock_get_hresult):
ret_val = _utils.get_com_error_code(mock.sentinel.com_err)
self.assertEqual(mock_hres_to_err_code.return_value, ret_val)
mock_get_hresult.assert_called_once_with(mock.sentinel.com_err)
mock_hres_to_err_code.assert_called_once_with(
mock_get_hresult.return_value)
@ddt.data(_utils._WBEM_E_NOT_FOUND, mock.sentinel.wbem_error)
def test_is_not_found_exc(self, hresult):
exc = test_base.FakeWMIExc(hresult=hresult)
result = _utils._is_not_found_exc(exc)
expected = hresult == _utils._WBEM_E_NOT_FOUND
self.assertEqual(expected, result)
mock_get_com_error_hresult.assert_called_once_with(exc.com_error)
@mock.patch.object(_utils, 'get_com_error_hresult')
def test_not_found_decorator(self, mock_get_com_error_hresult):

View File

@ -66,13 +66,14 @@ class ClusterUtilsTestCase(test_base.OsWinBaseTestCase):
self._clusterutils._get_wmi_conn = mock.MagicMock()
self._clusterutils._get_wmi_conn.return_value = mock_conn
self._clusterutils._init_hyperv_conn("fake_host")
self._clusterutils._init_hyperv_conn("fake_host", timeout=1)
def test_init_hyperv_conn_exception(self):
self._clusterutils._get_wmi_conn = mock.MagicMock()
self._clusterutils._get_wmi_conn.side_effect = AttributeError
self.assertRaises(exceptions.HyperVClusterException,
self._clusterutils._init_hyperv_conn, "fake_host")
self._clusterutils._init_hyperv_conn, "fake_host",
timeout=1)
@mock.patch.object(clusterutils.ClusterUtils,
'_get_cluster_nodes')

View File

@ -215,11 +215,13 @@ class ISCSITargetUtilsTestCase(test_base.OsWinBaseTestCase):
def test_get_wt_idmethod_not_found(self):
self._test_get_wt_idmethod(idmeth_found=False)
def _test_create_iscsi_target_exception(self, target_exists=False,
@mock.patch('os_win._utils.get_com_error_code')
def _test_create_iscsi_target_exception(self, mock_get_com_err_code,
target_exists=False,
fail_if_exists=False):
mock_wt_host_cls = self._tgutils._conn_wmi.WT_Host
mock_wt_host_cls.NewHost.side_effect = test_base.FakeWMIExc
self._tgutils._win32utils.get_com_err_code.return_value = (
mock_get_com_err_code.return_value = (
self._tgutils._ERR_FILE_EXISTS if target_exists else 1)
if target_exists and not fail_if_exists:

View File

@ -179,25 +179,6 @@ class Win32UtilsTestCase(test_base.BaseTestCase):
last_err)
win32utils.kernel32.SetLastError.assert_called_once_with(0)
def test_hresult_to_err_code(self):
# This could differ based on the error source.
# Only the last 2 bytes of the hresult the error code.
fake_file_exists_hres = -0x7ff8ffb0
file_exists_err_code = 0x50
ret_val = self._win32_utils.hresult_to_err_code(fake_file_exists_hres)
self.assertEqual(file_exists_err_code, ret_val)
@mock.patch.object(win32utils._utils, 'get_com_error_hresult')
@mock.patch.object(win32utils.Win32Utils, 'hresult_to_err_code')
def test_get_com_err_code(self, mock_hres_to_err_code, mock_get_hresult):
ret_val = self._win32_utils.get_com_err_code(mock.sentinel.com_err)
self.assertEqual(mock_hres_to_err_code.return_value, ret_val)
mock_get_hresult.assert_called_once_with(mock.sentinel.com_err)
mock_hres_to_err_code.assert_called_once_with(
mock_get_hresult.return_value)
@ddt.data(0, 1)
@mock.patch.object(win32utils.LOG, 'exception')
def test_local_free(self, ret_val, mock_log_exc):

View File

@ -31,6 +31,7 @@ from six.moves import queue
from os_win._i18n import _
from os_win import _utils
import os_win.conf
from os_win import constants
from os_win import exceptions
from os_win.utils import baseutils
@ -39,6 +40,7 @@ from os_win.utils.winapi import constants as w_const
from os_win.utils.winapi.libs import clusapi as clusapi_def
from os_win.utils.winapi import wintypes
CONF = os_win.conf.CONF
LOG = logging.getLogger(__name__)
@ -62,27 +64,39 @@ class ClusterUtils(baseutils.BaseUtils):
_WMI_EVENT_TIMEOUT_MS = 100
_WMI_EVENT_CHECK_INTERVAL = 2
def __init__(self, host='.'):
def __init__(self, host='.', timeout=CONF.os_win.connect_cluster_timeout):
self._instance_name_regex = re.compile('Virtual Machine (.*)')
self._clusapi_utils = _clusapi_utils.ClusApiUtils()
self._cmgr = _clusapi_utils.ClusterContextManager()
if sys.platform == 'win32':
self._init_hyperv_conn(host)
self._init_hyperv_conn(host, timeout)
def _init_hyperv_conn(self, host):
try:
self._conn_cluster = self._get_wmi_conn(
self._MS_CLUSTER_NAMESPACE % host)
self._cluster = self._conn_cluster.MSCluster_Cluster()[0]
def _init_hyperv_conn(self, host, timeout):
# extract this node name from cluster's path
path = self._cluster.path_()
self._this_node = re.search(r'\\\\(.*)\\root', path,
re.IGNORECASE).group(1)
except AttributeError:
raise exceptions.HyperVClusterException(
_("Could not initialize cluster wmi connection."))
# The Failover Cluster WMI provider may be unavailable after a reboot.
# Let's wait for it.
@_utils.wmi_retry_decorator(
error_codes=(w_const.ERROR_SHARING_PAUSED,
w_const.EPT_S_NOT_REGISTERED),
max_sleep_time=5,
max_retry_count=None,
timeout=timeout)
def init():
try:
self._conn_cluster = self._get_wmi_conn(
self._MS_CLUSTER_NAMESPACE % host)
self._cluster = self._conn_cluster.MSCluster_Cluster()[0]
# extract this node name from cluster's path
path = self._cluster.path_()
self._this_node = re.search(r'\\\\(.*)\\root', path,
re.IGNORECASE).group(1)
except AttributeError:
raise exceptions.HyperVClusterException(
_("Could not initialize cluster wmi connection."))
init()
def _get_failover_watcher(self):
raw_query = ("SELECT * FROM __InstanceModificationEvent "

View File

@ -17,6 +17,7 @@ from oslo_log import log as logging
import six
from os_win._i18n import _
from os_win import _utils
from os_win import constants
from os_win import exceptions
from os_win.utils import baseutils
@ -117,7 +118,7 @@ class ISCSITargetUtils(baseutils.BaseUtils):
try:
self._conn_wmi.WT_Host.NewHost(HostName=target_name)
except exceptions.x_wmi as wmi_exc:
err_code = self._win32utils.get_com_err_code(wmi_exc.com_error)
err_code = _utils.get_com_error_code(wmi_exc.com_error)
target_exists = err_code == self._ERR_FILE_EXISTS
if not target_exists or fail_if_exists:

View File

@ -110,16 +110,6 @@ class Win32Utils(object):
kernel32.SetLastError(0)
return error_code
@staticmethod
def hresult_to_err_code(hresult):
# The last 2 bytes of the hresult store the error code.
return hresult & 0xFFFF
def get_com_err_code(self, com_error):
hres = _utils.get_com_error_hresult(com_error)
if hres is not None:
return self.hresult_to_err_code(hres)
def local_free(self, handle):
try:
self._run_and_check_output(kernel32.LocalFree, handle)

View File

@ -20,6 +20,7 @@ from os_win.utils.winapi import wintypes
# winerror.h
ERROR_INVALID_HANDLE = 6
ERROR_NOT_READY = 21
ERROR_SHARING_PAUSED = 70
ERROR_INSUFFICIENT_BUFFER = 122
ERROR_DIR_IS_NOT_EMPTY = 145
ERROR_PIPE_BUSY = 231
@ -28,6 +29,7 @@ ERROR_MORE_DATA = 234
ERROR_WAIT_TIMEOUT = 258
ERROR_IO_PENDING = 997
ERROR_NOT_FOUND = 1168
EPT_S_NOT_REGISTERED = 1753
# Cluster errors
ERROR_DEPENDENCY_NOT_FOUND = 5002