Merge "Node poll URL improvements"
This commit is contained in:
commit
73b31d7c34
|
@ -312,11 +312,27 @@ class HealthManager(service.Service):
|
|||
|
||||
url_template = params['poll_url']
|
||||
verify_ssl = params['poll_url_ssl_verify']
|
||||
conn_error_as_unhealthy = params['poll_url_conn_error_as_unhealthy']
|
||||
expected_resp_str = params['poll_url_healthy_response']
|
||||
max_unhealthy_retry = params['poll_url_retry_limit']
|
||||
retry_interval = params['poll_url_retry_interval']
|
||||
node_update_timeout = params['node_update_timeout']
|
||||
|
||||
def stop_node_recovery():
|
||||
node_last_updated = node.updated_at or node.init_at
|
||||
if not timeutils.is_older_than(
|
||||
node_last_updated, node_update_timeout):
|
||||
LOG.info("Node %s was updated at %s which is less than "
|
||||
"%d secs ago. Skip node recovery.",
|
||||
node.id, node_last_updated, node_update_timeout)
|
||||
return True
|
||||
|
||||
LOG.info("Node %s is reported as down (%d retries left)",
|
||||
node.id, available_attemps)
|
||||
time.sleep(retry_interval)
|
||||
|
||||
return False
|
||||
|
||||
url = self._expand_url_template(url_template, node)
|
||||
LOG.info("Polling node status from URL: %s", url)
|
||||
|
||||
|
@ -327,9 +343,14 @@ class HealthManager(service.Service):
|
|||
try:
|
||||
result = utils.url_fetch(url, verify=verify_ssl)
|
||||
except utils.URLFetchError as ex:
|
||||
LOG.error("Error when requesting node health status from"
|
||||
" %s: %s", url, ex)
|
||||
return None
|
||||
if conn_error_as_unhealthy:
|
||||
if stop_node_recovery():
|
||||
return None
|
||||
continue
|
||||
else:
|
||||
LOG.error("Error when requesting node health status from"
|
||||
" %s: %s", url, ex)
|
||||
return None
|
||||
|
||||
LOG.debug("Node status returned from URL(%s): %s", url,
|
||||
result)
|
||||
|
@ -342,18 +363,9 @@ class HealthManager(service.Service):
|
|||
"ACTIVE state", node.id)
|
||||
return None
|
||||
|
||||
node_last_updated = node.updated_at or node.init_at
|
||||
if not timeutils.is_older_than(
|
||||
node_last_updated, node_update_timeout):
|
||||
LOG.info("Node %s was updated at %s which is less than "
|
||||
"%d secs ago. Skip node recovery.",
|
||||
node.id, node_last_updated, node_update_timeout)
|
||||
if stop_node_recovery():
|
||||
return None
|
||||
|
||||
LOG.info("Node %s is reported as down (%d retries left)",
|
||||
node.id, available_attemps)
|
||||
time.sleep(retry_interval)
|
||||
|
||||
# recover node after exhausting retries
|
||||
LOG.info("Requesting node recovery: %s", node.id)
|
||||
req = objects.NodeRecoverRequest(identity=node.id,
|
||||
|
|
|
@ -62,12 +62,13 @@ class HealthPolicy(base.Policy):
|
|||
|
||||
_DETECTION_OPTIONS = (
|
||||
DETECTION_INTERVAL, POLL_URL, POLL_URL_SSL_VERIFY,
|
||||
POLL_URL_HEALTHY_RESPONSE, POLL_URL_RETRY_LIMIT,
|
||||
POLL_URL_RETRY_INTERVAL, NODE_UPDATE_TIMEOUT,
|
||||
POLL_URL_CONN_ERROR_AS_UNHEALTHY, POLL_URL_HEALTHY_RESPONSE,
|
||||
POLL_URL_RETRY_LIMIT, POLL_URL_RETRY_INTERVAL, NODE_UPDATE_TIMEOUT,
|
||||
) = (
|
||||
'interval', 'poll_url', 'poll_url_ssl_verify',
|
||||
'poll_url_healthy_response', 'poll_url_retry_limit',
|
||||
'poll_url_retry_interval', 'node_update_timeout',
|
||||
'poll_url_conn_error_as_unhealthy', 'poll_url_healthy_response',
|
||||
'poll_url_retry_limit', 'poll_url_retry_interval',
|
||||
'node_update_timeout',
|
||||
)
|
||||
|
||||
_RECOVERY_KEYS = (
|
||||
|
@ -122,6 +123,12 @@ class HealthPolicy(base.Policy):
|
|||
"'NODE_STATUS_POLL_URL'."),
|
||||
default=True,
|
||||
),
|
||||
POLL_URL_CONN_ERROR_AS_UNHEALTHY: schema.Boolean(
|
||||
_("Whether to treat URL connection errors as an "
|
||||
"indication of an unhealthy node. Only required "
|
||||
"when type is 'NODE_STATUS_POLL_URL'."),
|
||||
default=True,
|
||||
),
|
||||
POLL_URL_HEALTHY_RESPONSE: schema.String(
|
||||
_("String pattern in the poll URL response body "
|
||||
"that indicates a healthy node. "
|
||||
|
@ -215,6 +222,8 @@ class HealthPolicy(base.Policy):
|
|||
self.interval = options.get(self.DETECTION_INTERVAL, 60)
|
||||
self.poll_url = options.get(self.POLL_URL, '')
|
||||
self.poll_url_ssl_verify = options.get(self.POLL_URL_SSL_VERIFY, True)
|
||||
self.poll_url_conn_error_as_unhealthy = options.get(
|
||||
self.POLL_URL_CONN_ERROR_AS_UNHEALTHY, True)
|
||||
self.poll_url_healthy_response = options.get(
|
||||
self.POLL_URL_HEALTHY_RESPONSE, '')
|
||||
self.poll_url_retry_limit = options.get(self.POLL_URL_RETRY_LIMIT, '')
|
||||
|
@ -280,6 +289,8 @@ class HealthPolicy(base.Policy):
|
|||
'recover_action': self.recover_actions,
|
||||
'poll_url': self.poll_url,
|
||||
'poll_url_ssl_verify': self.poll_url_ssl_verify,
|
||||
'poll_url_conn_error_as_unhealthy':
|
||||
self.poll_url_conn_error_as_unhealthy,
|
||||
'poll_url_healthy_response': self.poll_url_healthy_response,
|
||||
'poll_url_retry_limit': self.poll_url_retry_limit,
|
||||
'poll_url_retry_interval': self.poll_url_retry_interval,
|
||||
|
@ -297,6 +308,8 @@ class HealthPolicy(base.Policy):
|
|||
'interval': self.interval,
|
||||
'poll_url': self.poll_url,
|
||||
'poll_url_ssl_verify': self.poll_url_ssl_verify,
|
||||
'poll_url_conn_error_as_unhealthy':
|
||||
self.poll_url_conn_error_as_unhealthy,
|
||||
'poll_url_healthy_response': self.poll_url_healthy_response,
|
||||
'poll_url_retry_limit': self.poll_url_retry_limit,
|
||||
'poll_url_retry_interval': self.poll_url_retry_interval,
|
||||
|
|
|
@ -719,6 +719,7 @@ class TestHealthManager(base.SenlinTestCase):
|
|||
params = {
|
||||
'poll_url': 'FAKE_POLL_URL',
|
||||
'poll_url_ssl_verify': True,
|
||||
'poll_url_conn_error_as_unhealthy': True,
|
||||
'poll_url_healthy_response': 'FAKE_HEALTHY_PATTERN',
|
||||
'poll_url_retry_limit': 2,
|
||||
'poll_url_retry_interval': 1,
|
||||
|
@ -753,6 +754,7 @@ class TestHealthManager(base.SenlinTestCase):
|
|||
params = {
|
||||
'poll_url': 'FAKE_POLL_URL',
|
||||
'poll_url_ssl_verify': True,
|
||||
'poll_url_conn_error_as_unhealthy': True,
|
||||
'poll_url_healthy_response': 'FAKE_HEALTHY_PATTERN',
|
||||
'poll_url_retry_limit': 2,
|
||||
'poll_url_retry_interval': 1,
|
||||
|
@ -789,6 +791,7 @@ class TestHealthManager(base.SenlinTestCase):
|
|||
params = {
|
||||
'poll_url': 'FAKE_POLL_URL',
|
||||
'poll_url_ssl_verify': True,
|
||||
'poll_url_conn_error_as_unhealthy': True,
|
||||
'poll_url_healthy_response': 'FAKE_HEALTHY_PATTERN',
|
||||
'poll_url_retry_limit': 2,
|
||||
'poll_url_retry_interval': 1,
|
||||
|
@ -826,6 +829,7 @@ class TestHealthManager(base.SenlinTestCase):
|
|||
params = {
|
||||
'poll_url': 'FAKE_POLL_URL',
|
||||
'poll_url_ssl_verify': True,
|
||||
'poll_url_conn_error_as_unhealthy': True,
|
||||
'poll_url_healthy_response': 'FAKE_HEALTHY_PATTERN',
|
||||
'poll_url_retry_limit': 2,
|
||||
'poll_url_retry_interval': 1,
|
||||
|
@ -864,6 +868,7 @@ class TestHealthManager(base.SenlinTestCase):
|
|||
params = {
|
||||
'poll_url': 'FAKE_POLL_URL',
|
||||
'poll_url_ssl_verify': False,
|
||||
'poll_url_conn_error_as_unhealthy': True,
|
||||
'poll_url_healthy_response': 'FAKE_HEALTHY_PATTERN',
|
||||
'poll_url_retry_limit': 2,
|
||||
'poll_url_retry_interval': 1,
|
||||
|
@ -886,6 +891,89 @@ class TestHealthManager(base.SenlinTestCase):
|
|||
)
|
||||
mock_sleep.assert_has_calls([mock.call(1), mock.call(1)])
|
||||
|
||||
@mock.patch.object(time, "sleep")
|
||||
@mock.patch.object(tu, "is_older_than")
|
||||
@mock.patch.object(hm.HealthManager, "_expand_url_template")
|
||||
@mock.patch.object(utils, 'url_fetch')
|
||||
@mock.patch.object(rpc_client.EngineClient, 'call')
|
||||
def test__check_url_and_recover_node_conn_error(
|
||||
self, mock_rpc, mock_url_fetch, mock_expand_url, mock_time,
|
||||
mock_sleep):
|
||||
ctx = mock.Mock()
|
||||
node = mock.Mock()
|
||||
node.status = consts.NS_ACTIVE
|
||||
node.id = 'FAKE_ID'
|
||||
mock_time.return_value = True
|
||||
mock_expand_url.return_value = 'FAKE_EXPANDED_URL'
|
||||
x_action_check = {'action': 'CHECK_ID'}
|
||||
mock_rpc.return_value = x_action_check
|
||||
mock_url_fetch.side_effect = utils.URLFetchError("Error")
|
||||
params = {
|
||||
'poll_url': 'FAKE_POLL_URL',
|
||||
'poll_url_ssl_verify': False,
|
||||
'poll_url_conn_error_as_unhealthy': True,
|
||||
'poll_url_healthy_response': 'FAKE_HEALTHY_PATTERN',
|
||||
'poll_url_retry_limit': 2,
|
||||
'poll_url_retry_interval': 1,
|
||||
'node_update_timeout': 5,
|
||||
}
|
||||
|
||||
recover_action = {'operation': 'REBUILD'}
|
||||
|
||||
# do it
|
||||
res = self.hm._check_url_and_recover_node(ctx, node, recover_action,
|
||||
params)
|
||||
|
||||
self.assertEqual(mock_rpc.return_value, res)
|
||||
mock_rpc.assert_called_once_with(ctx, 'node_recover', mock.ANY)
|
||||
mock_url_fetch.assert_has_calls(
|
||||
[
|
||||
mock.call('FAKE_EXPANDED_URL', verify=False),
|
||||
mock.call('FAKE_EXPANDED_URL', verify=False)
|
||||
]
|
||||
)
|
||||
mock_sleep.assert_has_calls([mock.call(1), mock.call(1)])
|
||||
|
||||
@mock.patch.object(time, "sleep")
|
||||
@mock.patch.object(tu, "is_older_than")
|
||||
@mock.patch.object(hm.HealthManager, "_expand_url_template")
|
||||
@mock.patch.object(utils, 'url_fetch')
|
||||
@mock.patch.object(rpc_client.EngineClient, 'call')
|
||||
def test__check_url_and_recover_node_conn_error_noop(
|
||||
self, mock_rpc, mock_url_fetch, mock_expand_url, mock_time,
|
||||
mock_sleep):
|
||||
ctx = mock.Mock()
|
||||
node = mock.Mock()
|
||||
node.status = consts.NS_ACTIVE
|
||||
node.id = 'FAKE_ID'
|
||||
mock_time.return_value = True
|
||||
mock_expand_url.return_value = 'FAKE_EXPANDED_URL'
|
||||
mock_url_fetch.side_effect = utils.URLFetchError("Error")
|
||||
params = {
|
||||
'poll_url': 'FAKE_POLL_URL',
|
||||
'poll_url_ssl_verify': False,
|
||||
'poll_url_conn_error_as_unhealthy': False,
|
||||
'poll_url_healthy_response': 'FAKE_HEALTHY_PATTERN',
|
||||
'poll_url_retry_limit': 2,
|
||||
'poll_url_retry_interval': 1,
|
||||
'node_update_timeout': 5,
|
||||
}
|
||||
|
||||
recover_action = {'operation': 'REBUILD'}
|
||||
|
||||
# do it
|
||||
res = self.hm._check_url_and_recover_node(ctx, node, recover_action,
|
||||
params)
|
||||
|
||||
self.assertIsNone(res)
|
||||
mock_rpc.assert_not_called()
|
||||
mock_url_fetch.assert_has_calls(
|
||||
[
|
||||
mock.call('FAKE_EXPANDED_URL', verify=False),
|
||||
]
|
||||
)
|
||||
mock_sleep.assert_not_called()
|
||||
|
||||
@mock.patch.object(hm, "_chase_up")
|
||||
@mock.patch.object(hm.HealthManager, "_check_url_and_recover_node")
|
||||
@mock.patch.object(obj_node.Node, 'get_all_by_cluster')
|
||||
|
|
|
@ -120,6 +120,7 @@ class TestHealthPolicy(base.SenlinTestCase):
|
|||
'interval': self.hp.interval,
|
||||
'poll_url': '',
|
||||
'poll_url_ssl_verify': True,
|
||||
'poll_url_conn_error_as_unhealthy': True,
|
||||
'poll_url_healthy_response': '',
|
||||
'poll_url_retry_limit': 3,
|
||||
'poll_url_retry_interval': 3,
|
||||
|
@ -141,6 +142,7 @@ class TestHealthPolicy(base.SenlinTestCase):
|
|||
'recover_action': self.hp.recover_actions,
|
||||
'poll_url': '',
|
||||
'poll_url_ssl_verify': True,
|
||||
'poll_url_conn_error_as_unhealthy': True,
|
||||
'poll_url_healthy_response': '',
|
||||
'poll_url_retry_limit': 3,
|
||||
'poll_url_retry_interval': 3,
|
||||
|
|
Loading…
Reference in New Issue