Wait for cluster node to be available
After a reboot, the Failover Cluster WMI provider may be unavailable.
This change will wait for it when initializing clusterutils, having
a configurable timeout defaulting to infinite.
Closes-Bug: #1841779
Change-Id: Id7f2d65cb0307bc952f1e6a2724295ea524835f3
(cherry picked from commit b6ce9b33dd
)
This commit is contained in:
parent
3d224b7704
commit
c5450ed0b5
|
@ -102,7 +102,8 @@ def get_wrapped_function(function):
|
|||
|
||||
def retry_decorator(max_retry_count=5, timeout=None, inc_sleep_time=1,
|
||||
max_sleep_time=1, exceptions=(), error_codes=(),
|
||||
pass_retry_context=False):
|
||||
pass_retry_context=False,
|
||||
extract_err_code_func=None):
|
||||
"""Retries invoking the decorated method in case of expected exceptions.
|
||||
|
||||
:param max_retry_count: The maximum number of retries performed. If 0, no
|
||||
|
@ -126,6 +127,8 @@ def retry_decorator(max_retry_count=5, timeout=None, inc_sleep_time=1,
|
|||
include a dict containing the 'prevent_retry'
|
||||
field. If this field is set, no further retries
|
||||
will be performed.
|
||||
:param extract_err_code_func: Optional helper function that extracts the
|
||||
error code from the exception.
|
||||
"""
|
||||
|
||||
if isinstance(error_codes, six.integer_types):
|
||||
|
@ -146,7 +149,11 @@ def retry_decorator(max_retry_count=5, timeout=None, inc_sleep_time=1,
|
|||
return f(*args, **kwargs)
|
||||
except exceptions as exc:
|
||||
with excutils.save_and_reraise_exception() as ctxt:
|
||||
err_code = getattr(exc, 'error_code', None)
|
||||
if extract_err_code_func:
|
||||
err_code = extract_err_code_func(exc)
|
||||
else:
|
||||
err_code = getattr(exc, 'error_code', None)
|
||||
|
||||
expected_err_code = (err_code in error_codes or not
|
||||
error_codes)
|
||||
|
||||
|
@ -191,6 +198,37 @@ def retry_decorator(max_retry_count=5, timeout=None, inc_sleep_time=1,
|
|||
return wrapper
|
||||
|
||||
|
||||
def wmi_retry_decorator(exceptions=exceptions.x_wmi, **kwargs):
|
||||
"""Retry decorator that can be used for specific WMI error codes.
|
||||
|
||||
This function will extract the error code from the hresult. Use
|
||||
wmi_retry_decorator_hresult if you want the original hresult to
|
||||
be checked.
|
||||
"""
|
||||
|
||||
def err_code_func(exc):
|
||||
com_error = getattr(exc, 'com_error', None)
|
||||
if com_error:
|
||||
return get_com_error_code(com_error)
|
||||
|
||||
return retry_decorator(extract_err_code_func=err_code_func,
|
||||
exceptions=exceptions,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def wmi_retry_decorator_hresult(exceptions=exceptions.x_wmi, **kwargs):
|
||||
"""Retry decorator that can be used for specific WMI HRESULTs"""
|
||||
|
||||
def err_code_func(exc):
|
||||
com_error = getattr(exc, 'com_error', None)
|
||||
if com_error:
|
||||
return get_com_error_hresult(com_error)
|
||||
|
||||
return retry_decorator(extract_err_code_func=err_code_func,
|
||||
exceptions=exceptions,
|
||||
**kwargs)
|
||||
|
||||
|
||||
def get_ips(addr):
|
||||
addr_info = socket.getaddrinfo(addr, None, 0, 0, 0)
|
||||
# Returns IPv4 and IPv6 addresses, ordered by protocol family
|
||||
|
@ -219,6 +257,11 @@ def avoid_blocking_call_decorator(f):
|
|||
return wrapper
|
||||
|
||||
|
||||
def hresult_to_err_code(hresult):
|
||||
# The last 2 bytes of the hresult store the error code.
|
||||
return hresult & 0xFFFF
|
||||
|
||||
|
||||
def get_com_error_hresult(com_error):
|
||||
try:
|
||||
return ctypes.c_uint(com_error.excepinfo[5]).value
|
||||
|
@ -226,6 +269,12 @@ def get_com_error_hresult(com_error):
|
|||
LOG.debug("Unable to retrieve COM error hresult: %s", com_error)
|
||||
|
||||
|
||||
def get_com_error_code(com_error):
|
||||
hres = get_com_error_hresult(com_error)
|
||||
if hres is not None:
|
||||
return hresult_to_err_code(hres)
|
||||
|
||||
|
||||
def _is_not_found_exc(exc):
|
||||
hresult = get_com_error_hresult(exc.com_error)
|
||||
return hresult == _WBEM_E_NOT_FOUND
|
||||
|
|
|
@ -36,6 +36,10 @@ os_win_opts = [
|
|||
help='The default amount of seconds to wait when stopping '
|
||||
'pending WMI jobs. Setting this value to 0 will '
|
||||
'disable the timeout.'),
|
||||
cfg.IntOpt('connect_cluster_timeout',
|
||||
default=0,
|
||||
help='The amount of time to wait for the Failover Cluster '
|
||||
'service to be available.')
|
||||
]
|
||||
|
||||
CONF = cfg.CONF
|
||||
|
|
|
@ -34,6 +34,7 @@ class FakeWMIExc(exceptions.x_wmi):
|
|||
super(FakeWMIExc, self).__init__()
|
||||
excepinfo = [None] * 5 + [hresult]
|
||||
self.com_error = mock.Mock(excepinfo=excepinfo)
|
||||
self.com_error.hresult = hresult
|
||||
|
||||
|
||||
class BaseTestCase(base.BaseTestCase):
|
||||
|
|
|
@ -62,10 +62,11 @@ class UtilsTestCase(test_base.BaseTestCase):
|
|||
self.assertEqual(('', ''), result)
|
||||
|
||||
def _get_fake_func_with_retry_decorator(self, side_effect,
|
||||
decorator=_utils.retry_decorator,
|
||||
*args, **kwargs):
|
||||
func_side_effect = mock.Mock(side_effect=side_effect)
|
||||
|
||||
@_utils.retry_decorator(*args, **kwargs)
|
||||
@decorator(*args, **kwargs)
|
||||
def fake_func(*_args, **_kwargs):
|
||||
return func_side_effect(*_args, **_kwargs)
|
||||
|
||||
|
@ -228,6 +229,38 @@ class UtilsTestCase(test_base.BaseTestCase):
|
|||
else:
|
||||
self.assertFalse(mock_execute.called)
|
||||
|
||||
@mock.patch.object(_utils, 'time')
|
||||
@ddt.data(True, False)
|
||||
def test_wmi_retry_decorator(self, expect_hres, mock_time):
|
||||
expected_hres = 0x8007beef
|
||||
expected_err_code = expected_hres if expect_hres else 0xbeef
|
||||
other_hres = 0x80070001
|
||||
max_retry_count = 5
|
||||
# The second exception will contain an unexpected error code,
|
||||
# in which case we expect the function to propagate the error.
|
||||
expected_try_count = 2
|
||||
|
||||
side_effect = [test_base.FakeWMIExc(hresult=expected_hres),
|
||||
test_base.FakeWMIExc(hresult=other_hres)]
|
||||
|
||||
decorator = (_utils.wmi_retry_decorator_hresult if expect_hres
|
||||
else _utils.wmi_retry_decorator)
|
||||
(fake_func,
|
||||
fake_func_side_effect) = self._get_fake_func_with_retry_decorator(
|
||||
error_codes=expected_err_code,
|
||||
max_retry_count=max_retry_count,
|
||||
decorator=decorator,
|
||||
side_effect=side_effect)
|
||||
|
||||
self.assertRaises(test_base.FakeWMIExc,
|
||||
fake_func,
|
||||
mock.sentinel.arg,
|
||||
kwarg=mock.sentinel.kwarg)
|
||||
|
||||
fake_func_side_effect.assert_has_calls(
|
||||
[mock.call(mock.sentinel.arg, kwarg=mock.sentinel.kwarg)] *
|
||||
expected_try_count)
|
||||
|
||||
def test_get_com_error_hresult(self):
|
||||
fake_hres = -5
|
||||
expected_hres = (1 << 32) + fake_hres
|
||||
|
@ -242,17 +275,33 @@ class UtilsTestCase(test_base.BaseTestCase):
|
|||
ret_val = _utils.get_com_error_hresult(None)
|
||||
self.assertIsNone(ret_val)
|
||||
|
||||
@ddt.data(_utils._WBEM_E_NOT_FOUND, mock.sentinel.wbem_error)
|
||||
def test_hresult_to_err_code(self):
|
||||
# This could differ based on the error source.
|
||||
# Only the last 2 bytes of the hresult the error code.
|
||||
fake_file_exists_hres = -0x7ff8ffb0
|
||||
file_exists_err_code = 0x50
|
||||
|
||||
ret_val = _utils.hresult_to_err_code(fake_file_exists_hres)
|
||||
self.assertEqual(file_exists_err_code, ret_val)
|
||||
|
||||
@mock.patch.object(_utils, 'get_com_error_hresult')
|
||||
def test_is_not_found_exc(self, hresult, mock_get_com_error_hresult):
|
||||
mock_get_com_error_hresult.return_value = hresult
|
||||
exc = mock.MagicMock()
|
||||
@mock.patch.object(_utils, 'hresult_to_err_code')
|
||||
def test_get_com_error_code(self, mock_hres_to_err_code, mock_get_hresult):
|
||||
ret_val = _utils.get_com_error_code(mock.sentinel.com_err)
|
||||
|
||||
self.assertEqual(mock_hres_to_err_code.return_value, ret_val)
|
||||
mock_get_hresult.assert_called_once_with(mock.sentinel.com_err)
|
||||
mock_hres_to_err_code.assert_called_once_with(
|
||||
mock_get_hresult.return_value)
|
||||
|
||||
@ddt.data(_utils._WBEM_E_NOT_FOUND, mock.sentinel.wbem_error)
|
||||
def test_is_not_found_exc(self, hresult):
|
||||
exc = test_base.FakeWMIExc(hresult=hresult)
|
||||
|
||||
result = _utils._is_not_found_exc(exc)
|
||||
|
||||
expected = hresult == _utils._WBEM_E_NOT_FOUND
|
||||
self.assertEqual(expected, result)
|
||||
mock_get_com_error_hresult.assert_called_once_with(exc.com_error)
|
||||
|
||||
@mock.patch.object(_utils, 'get_com_error_hresult')
|
||||
def test_not_found_decorator(self, mock_get_com_error_hresult):
|
||||
|
|
|
@ -66,13 +66,14 @@ class ClusterUtilsTestCase(test_base.OsWinBaseTestCase):
|
|||
self._clusterutils._get_wmi_conn = mock.MagicMock()
|
||||
self._clusterutils._get_wmi_conn.return_value = mock_conn
|
||||
|
||||
self._clusterutils._init_hyperv_conn("fake_host")
|
||||
self._clusterutils._init_hyperv_conn("fake_host", timeout=1)
|
||||
|
||||
def test_init_hyperv_conn_exception(self):
|
||||
self._clusterutils._get_wmi_conn = mock.MagicMock()
|
||||
self._clusterutils._get_wmi_conn.side_effect = AttributeError
|
||||
self.assertRaises(exceptions.HyperVClusterException,
|
||||
self._clusterutils._init_hyperv_conn, "fake_host")
|
||||
self._clusterutils._init_hyperv_conn, "fake_host",
|
||||
timeout=1)
|
||||
|
||||
@mock.patch.object(clusterutils.ClusterUtils,
|
||||
'_get_cluster_nodes')
|
||||
|
|
|
@ -215,11 +215,13 @@ class ISCSITargetUtilsTestCase(test_base.OsWinBaseTestCase):
|
|||
def test_get_wt_idmethod_not_found(self):
|
||||
self._test_get_wt_idmethod(idmeth_found=False)
|
||||
|
||||
def _test_create_iscsi_target_exception(self, target_exists=False,
|
||||
@mock.patch('os_win._utils.get_com_error_code')
|
||||
def _test_create_iscsi_target_exception(self, mock_get_com_err_code,
|
||||
target_exists=False,
|
||||
fail_if_exists=False):
|
||||
mock_wt_host_cls = self._tgutils._conn_wmi.WT_Host
|
||||
mock_wt_host_cls.NewHost.side_effect = test_base.FakeWMIExc
|
||||
self._tgutils._win32utils.get_com_err_code.return_value = (
|
||||
mock_get_com_err_code.return_value = (
|
||||
self._tgutils._ERR_FILE_EXISTS if target_exists else 1)
|
||||
|
||||
if target_exists and not fail_if_exists:
|
||||
|
|
|
@ -179,25 +179,6 @@ class Win32UtilsTestCase(test_base.BaseTestCase):
|
|||
last_err)
|
||||
win32utils.kernel32.SetLastError.assert_called_once_with(0)
|
||||
|
||||
def test_hresult_to_err_code(self):
|
||||
# This could differ based on the error source.
|
||||
# Only the last 2 bytes of the hresult the error code.
|
||||
fake_file_exists_hres = -0x7ff8ffb0
|
||||
file_exists_err_code = 0x50
|
||||
|
||||
ret_val = self._win32_utils.hresult_to_err_code(fake_file_exists_hres)
|
||||
self.assertEqual(file_exists_err_code, ret_val)
|
||||
|
||||
@mock.patch.object(win32utils._utils, 'get_com_error_hresult')
|
||||
@mock.patch.object(win32utils.Win32Utils, 'hresult_to_err_code')
|
||||
def test_get_com_err_code(self, mock_hres_to_err_code, mock_get_hresult):
|
||||
ret_val = self._win32_utils.get_com_err_code(mock.sentinel.com_err)
|
||||
|
||||
self.assertEqual(mock_hres_to_err_code.return_value, ret_val)
|
||||
mock_get_hresult.assert_called_once_with(mock.sentinel.com_err)
|
||||
mock_hres_to_err_code.assert_called_once_with(
|
||||
mock_get_hresult.return_value)
|
||||
|
||||
@ddt.data(0, 1)
|
||||
@mock.patch.object(win32utils.LOG, 'exception')
|
||||
def test_local_free(self, ret_val, mock_log_exc):
|
||||
|
|
|
@ -31,6 +31,7 @@ from six.moves import queue
|
|||
|
||||
from os_win._i18n import _
|
||||
from os_win import _utils
|
||||
import os_win.conf
|
||||
from os_win import constants
|
||||
from os_win import exceptions
|
||||
from os_win.utils import baseutils
|
||||
|
@ -39,6 +40,7 @@ from os_win.utils.winapi import constants as w_const
|
|||
from os_win.utils.winapi.libs import clusapi as clusapi_def
|
||||
from os_win.utils.winapi import wintypes
|
||||
|
||||
CONF = os_win.conf.CONF
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -62,27 +64,39 @@ class ClusterUtils(baseutils.BaseUtils):
|
|||
_WMI_EVENT_TIMEOUT_MS = 100
|
||||
_WMI_EVENT_CHECK_INTERVAL = 2
|
||||
|
||||
def __init__(self, host='.'):
|
||||
def __init__(self, host='.', timeout=CONF.os_win.connect_cluster_timeout):
|
||||
self._instance_name_regex = re.compile('Virtual Machine (.*)')
|
||||
self._clusapi_utils = _clusapi_utils.ClusApiUtils()
|
||||
self._cmgr = _clusapi_utils.ClusterContextManager()
|
||||
|
||||
if sys.platform == 'win32':
|
||||
self._init_hyperv_conn(host)
|
||||
self._init_hyperv_conn(host, timeout)
|
||||
|
||||
def _init_hyperv_conn(self, host):
|
||||
try:
|
||||
self._conn_cluster = self._get_wmi_conn(
|
||||
self._MS_CLUSTER_NAMESPACE % host)
|
||||
self._cluster = self._conn_cluster.MSCluster_Cluster()[0]
|
||||
def _init_hyperv_conn(self, host, timeout):
|
||||
|
||||
# extract this node name from cluster's path
|
||||
path = self._cluster.path_()
|
||||
self._this_node = re.search(r'\\\\(.*)\\root', path,
|
||||
re.IGNORECASE).group(1)
|
||||
except AttributeError:
|
||||
raise exceptions.HyperVClusterException(
|
||||
_("Could not initialize cluster wmi connection."))
|
||||
# The Failover Cluster WMI provider may be unavailable after a reboot.
|
||||
# Let's wait for it.
|
||||
@_utils.wmi_retry_decorator(
|
||||
error_codes=(w_const.ERROR_SHARING_PAUSED,
|
||||
w_const.EPT_S_NOT_REGISTERED),
|
||||
max_sleep_time=5,
|
||||
max_retry_count=None,
|
||||
timeout=timeout)
|
||||
def init():
|
||||
try:
|
||||
self._conn_cluster = self._get_wmi_conn(
|
||||
self._MS_CLUSTER_NAMESPACE % host)
|
||||
self._cluster = self._conn_cluster.MSCluster_Cluster()[0]
|
||||
|
||||
# extract this node name from cluster's path
|
||||
path = self._cluster.path_()
|
||||
self._this_node = re.search(r'\\\\(.*)\\root', path,
|
||||
re.IGNORECASE).group(1)
|
||||
except AttributeError:
|
||||
raise exceptions.HyperVClusterException(
|
||||
_("Could not initialize cluster wmi connection."))
|
||||
|
||||
init()
|
||||
|
||||
def _get_failover_watcher(self):
|
||||
raw_query = ("SELECT * FROM __InstanceModificationEvent "
|
||||
|
|
|
@ -17,6 +17,7 @@ from oslo_log import log as logging
|
|||
import six
|
||||
|
||||
from os_win._i18n import _
|
||||
from os_win import _utils
|
||||
from os_win import constants
|
||||
from os_win import exceptions
|
||||
from os_win.utils import baseutils
|
||||
|
@ -117,7 +118,7 @@ class ISCSITargetUtils(baseutils.BaseUtils):
|
|||
try:
|
||||
self._conn_wmi.WT_Host.NewHost(HostName=target_name)
|
||||
except exceptions.x_wmi as wmi_exc:
|
||||
err_code = self._win32utils.get_com_err_code(wmi_exc.com_error)
|
||||
err_code = _utils.get_com_error_code(wmi_exc.com_error)
|
||||
target_exists = err_code == self._ERR_FILE_EXISTS
|
||||
|
||||
if not target_exists or fail_if_exists:
|
||||
|
|
|
@ -110,16 +110,6 @@ class Win32Utils(object):
|
|||
kernel32.SetLastError(0)
|
||||
return error_code
|
||||
|
||||
@staticmethod
|
||||
def hresult_to_err_code(hresult):
|
||||
# The last 2 bytes of the hresult store the error code.
|
||||
return hresult & 0xFFFF
|
||||
|
||||
def get_com_err_code(self, com_error):
|
||||
hres = _utils.get_com_error_hresult(com_error)
|
||||
if hres is not None:
|
||||
return self.hresult_to_err_code(hres)
|
||||
|
||||
def local_free(self, handle):
|
||||
try:
|
||||
self._run_and_check_output(kernel32.LocalFree, handle)
|
||||
|
|
|
@ -20,6 +20,7 @@ from os_win.utils.winapi import wintypes
|
|||
# winerror.h
|
||||
ERROR_INVALID_HANDLE = 6
|
||||
ERROR_NOT_READY = 21
|
||||
ERROR_SHARING_PAUSED = 70
|
||||
ERROR_INSUFFICIENT_BUFFER = 122
|
||||
ERROR_DIR_IS_NOT_EMPTY = 145
|
||||
ERROR_PIPE_BUSY = 231
|
||||
|
@ -28,6 +29,7 @@ ERROR_MORE_DATA = 234
|
|||
ERROR_WAIT_TIMEOUT = 258
|
||||
ERROR_IO_PENDING = 997
|
||||
ERROR_NOT_FOUND = 1168
|
||||
EPT_S_NOT_REGISTERED = 1753
|
||||
|
||||
# Cluster errors
|
||||
ERROR_DEPENDENCY_NOT_FOUND = 5002
|
||||
|
|
Loading…
Reference in New Issue