apply runtime manifest deadlock waiting for management ip
Fix for "partition deleted immediately after creation" adds mutex between config_apply_runtime_manifests() and agent_audit() however: 1. config_apply_runtime_manifests is looping (max 300s) waiting for self._mgmt_ip to be set 2. agent_audit() is setting self._mgmt_ip but can't run because config_apply_runtime_manifests() is running Move retry logic on self._mgmt_ip outside of config_apply_runtime_manifests() so agent_audit() can run. Change-Id: I3b1e2ebdaa684fa16e21662fb703dffffa70abe3 Closes-Bug: #1790159
This commit is contained in:
parent
b25961082f
commit
d2dcb9882c
|
@ -36,6 +36,7 @@ Commands (from conductors) are received via RPC calls.
|
|||
import errno
|
||||
import fcntl
|
||||
import os
|
||||
import retrying
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
|
@ -1354,6 +1355,11 @@ class AgentManager(service.PeriodicService):
|
|||
self._update_config_applied(iconfig_uuid)
|
||||
self._report_config_applied(context)
|
||||
|
||||
def _retry_on_missing_mgmt_ip(self, exception):
|
||||
return isinstance(exception, exception.LocalManagementIpNotFound)
|
||||
|
||||
@retrying.retry(wait_fixed=15 * 1000, stop_max_delay=300 * 1000,
|
||||
retry_on_exception=_retry_on_missing_mgmt_ip)
|
||||
@utils.synchronized(LOCK_AGENT_ACTION, external=False)
|
||||
def config_apply_runtime_manifest(self, context, config_uuid, config_dict):
|
||||
"""Asynchronously, have the agent apply the runtime manifest with the
|
||||
|
@ -1374,44 +1380,37 @@ class AgentManager(service.PeriodicService):
|
|||
:returns: none ... uses asynchronous cast().
|
||||
"""
|
||||
|
||||
try:
|
||||
# runtime manifests can not be applied without the initial
|
||||
# configuration applied
|
||||
force = config_dict.get('force', False)
|
||||
if (not force and
|
||||
not os.path.isfile(tsc.INITIAL_CONFIG_COMPLETE_FLAG)):
|
||||
# runtime manifests can not be applied without the initial
|
||||
# configuration applied
|
||||
force = config_dict.get('force', False)
|
||||
if (not force and
|
||||
not os.path.isfile(tsc.INITIAL_CONFIG_COMPLETE_FLAG)):
|
||||
return
|
||||
|
||||
personalities = config_dict.get('personalities')
|
||||
host_uuids = config_dict.get('host_uuids')
|
||||
|
||||
if host_uuids:
|
||||
# ignore requests that are not intended for this host
|
||||
if self._ihost_uuid not in host_uuids:
|
||||
return
|
||||
|
||||
personalities = config_dict.get('personalities')
|
||||
host_uuids = config_dict.get('host_uuids')
|
||||
|
||||
if host_uuids:
|
||||
# ignore requests that are not intended for this host
|
||||
if self._ihost_uuid not in host_uuids:
|
||||
return
|
||||
else:
|
||||
# ignore requests that are not intended for host personality
|
||||
for subfunction in self.subfunctions_list_get():
|
||||
if subfunction in personalities:
|
||||
break
|
||||
else:
|
||||
# ignore requests that are not intended for host personality
|
||||
for subfunction in self.subfunctions_list_get():
|
||||
if subfunction in personalities:
|
||||
break
|
||||
else:
|
||||
return
|
||||
|
||||
LOG.info("config_apply_runtime_manifest: %s %s %s" % (
|
||||
config_uuid, config_dict, self._ihost_personality))
|
||||
|
||||
time_slept = 0
|
||||
while not self._mgmt_ip and time_slept < 300:
|
||||
time.sleep(15)
|
||||
time_slept += 15
|
||||
|
||||
if not self._mgmt_ip:
|
||||
LOG.warn("config_apply_runtime_manifest: "
|
||||
" timed out waiting for local management ip"
|
||||
" %s %s %s" %
|
||||
(config_uuid, config_dict, self._ihost_personality))
|
||||
return
|
||||
|
||||
if not self._mgmt_ip:
|
||||
raise exception.LocalManagementIpNotFound(
|
||||
config_uuid=config_uuid, config_dict=config_dict,
|
||||
host_personality=self._ihost_personality)
|
||||
|
||||
LOG.info("config_apply_runtime_manifest: %s %s %s" % (
|
||||
config_uuid, config_dict, self._ihost_personality))
|
||||
try:
|
||||
|
||||
if not os.path.exists(tsc.PUPPET_PATH):
|
||||
# we must be controller-standby or storage, mount /var/run/platform
|
||||
LOG.info("controller-standby or storage, mount /var/run/platform")
|
||||
|
|
|
@ -1265,3 +1265,9 @@ class IncompleteCephMonNetworkConfig(CephFailure):
|
|||
|
||||
class InvalidHelmNamespace(Invalid):
|
||||
message = _("Invalid helm overrides namespace (%(namespace)s) for chart %(chart)s.")
|
||||
|
||||
|
||||
class LocalManagementIpNotFound(NotFound):
|
||||
message = _("Local management IP not found: "
|
||||
"config_uuid=%(config_uuid), config_dict=%(config_dict), "
|
||||
"host_personality=%(host_personality)")
|
||||
|
|
Loading…
Reference in New Issue