diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py b/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py index be3621169e..f6b61c090e 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py @@ -108,9 +108,9 @@ class CephOperator(object): return rc - def _get_fsid(self): + def _get_fsid(self, timeout=10): try: - response, fsid = self._ceph_api.fsid(body='text', timeout=10) + response, fsid = self._ceph_api.fsid(body='text', timeout=timeout) except Exception as e: LOG.warn("ceph_api.fsid failed: " + str(e)) return None @@ -965,6 +965,40 @@ class CephOperator(object): else: return body["output"] + def have_ceph_monitor_access(self, timeout=5): + """ Verify that ceph monitor access will not timeout. + + :param timeout: Time in seconds to wait for the REST API request to + respond. + """ + available_mons = 0 + monitors = self._db_api.ceph_mon_get_list() + for m in monitors: + try: + ihost = self._db_api.ihost_get_by_hostname(m.hostname) + except exception.NodeNotFound: + LOG.error("Monitor host %s not found" % m.hostname) + continue + + if (ihost['administrative'] == constants.ADMIN_UNLOCKED and + ihost['operational'] == constants.OPERATIONAL_ENABLED): + available_mons += 1 + + # Avoid calling the ceph rest_api until we have a minimum configuration + check_access = False + if utils.is_aio_system(self._db_api) and available_mons > 0: + # one monitor: need it available + check_access = True + elif available_mons > 1: + # three monitors: need two available + check_access = True + + LOG.debug("Checking ceph monitors. Available: %s. Check cluster: " + "access %s" % (available_mons, check_access)) + if check_access: + return True if self._get_fsid(timeout) else False + return False + def get_ceph_cluster_info_availability(self): # TODO(CephPoolsDecouple): rework # Check if the ceph cluster is ready to return statistics diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index 764a7d7885..c206f0dc26 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -5079,6 +5079,19 @@ class ConductorManager(service.PeriodicService): "Preventing managed application actions.".format(e)) return True + def _met_app_apply_prerequisites(app_name): + prereqs_met = False + if app_name == constants.HELM_APP_PLATFORM: + # make sure for the ceph related apps that we have ceph access + # and the crushmap is applied to correctly set up related k8s + # resources. + crushmap_flag_file = os.path.join(constants.SYSINV_CONFIG_PATH, + constants.CEPH_CRUSH_MAP_APPLIED) + if (os.path.isfile(crushmap_flag_file) and + self._ceph.have_ceph_monitor_access()): + prereqs_met = True + return prereqs_met + LOG.debug("Periodic Task: _k8s_application_audit: Starting") # Make sure that the active controller is unlocked/enabled. Only # install an application if the controller has been provisioned. @@ -5149,6 +5162,11 @@ class ConductorManager(service.PeriodicService): # Action: Raise alarm? pass elif status == constants.APP_UPLOAD_SUCCESS: + if not _met_app_apply_prerequisites(app_name): + LOG.info("Platform managed application %s: Prerequisites " + "not met." % app_name) + continue + if _patching_operation_is_occurring(): continue