Merge "Cloudpulse ceph_check stuck in "Running" state"

This commit is contained in:
Zuul 2020-12-11 13:12:17 +00:00 committed by Gerrit Code Review
commit ca615d557d
1 changed files with 39 additions and 68 deletions

View File

@ -142,34 +142,6 @@ class operator_scenario(base.Scenario):
self.os_node_info_obj = openstack_node_info_reader(
cfg.CONF.operator_test.operator_setup_file)
def is_metric_pool(self, is_containerized, ceph_json):
if 'HEALTH_WARN' not in ceph_json['health']['status']:
return False
checks = ceph_json['health']['checks']
err = checks.get('MANY_OBJECTS_PER_PG', None)
if (not err or 'HEALTH_WARN' not in err.get('severity', None) or
('1 pools have many more objects per pg than average' not in
err['summary']['message'])):
return False
cmd = (r"ceph df")
if is_containerized:
ceph_container = get_container_name("cephmon")
cmd = ("'docker exec %s %s'" % (ceph_container, cmd))
cmd = "ansible -o all -u root -i 127.0.0.1, -a " + cmd + ' -u root'
ret = execute(cmd)
if ret['status']:
return False
for line in ret['output'].split('\\n'):
if 'metrics' not in line:
continue
try:
num_obj = int(line.split()[-1])
except ValueError:
return False
if num_obj > 1000:
return True
return False
@base.scenario(admin_only=False, operator=True)
def rabbitmq_check(self):
self.load()
@ -326,50 +298,49 @@ class operator_scenario(base.Scenario):
if "block_storage" in
node.role.split()]
if storage_nodes_from_ansible_config:
cmd = (r"ceph -f json status")
is_containerized = cfg.CONF.operator_test.containerized
if is_containerized:
ceph_container = get_container_name("cephmon")
cmd = ("'docker exec %s %s'" % (ceph_container, cmd))
anscmd = "ansible -o all -i 127.0.0.1, -a "
cmd = anscmd + cmd + ' -u root'
if not storage_nodes_from_ansible_config:
error = "Ceph cluster test skipped as no dedicated storage found"
return (300, error)
res = execute(cmd)
if not res['status']:
ceph_status = res['output']
cmd = (r"ceph -f json status")
is_containerized = cfg.CONF.operator_test.containerized
if is_containerized:
ceph_container = get_container_name("cephmon")
cmd = ("'docker exec %s %s'" % (ceph_container, cmd))
anscmd = "ansible -o all -i 127.0.0.1, -a "
cmd = anscmd + cmd + ' -u root'
ceph_status = ceph_status.replace('\n', '')
ceph_data = ceph_status.split('|')
ceph_str = ceph_data[3].replace(' (stdout) ', '') \
.replace('\\n', '')
ceph_json = simplejson.loads(ceph_str)
res = execute(cmd)
if res['status']:
error = "Couldn't get ceph status"
return (300, error)
# Handle ceph status in luminous, result should be picked form
# 'status' instead of 'overall_status'
if len(ceph_json['health']['summary']) and \
'summary' in list(ceph_json['health']['summary'][0].keys()) \
and 'mon health preluminous compat warning' in \
ceph_json['health']['summary'][0]['summary']:
overall_status = ceph_json['health']['status']
if ('HEALTH_WARN' in overall_status and
self.is_metric_pool(is_containerized, ceph_json)):
overall_status = 'HEALTH_OK'
else:
overall_status = ceph_json['health']['overall_status']
num_of_osd = ceph_json['osdmap']['osdmap']['num_osds']
num_up_osds = ceph_json['osdmap']['osdmap']['num_up_osds']
if overall_status == 'HEALTH_OK':
return (200, "Overall Status = %s, "
"Cluster status = %s/%s" %
(overall_status, num_up_osds, num_of_osd))
else:
return (404, "Overall Status = %s, "
"Cluster status = %s/%s" %
(overall_status, num_up_osds, num_of_osd))
ceph_status = res['output']
ceph_status = ceph_status.replace('\n', '')
ceph_data = ceph_status.split('|')
ceph_str = ceph_data[3].replace(' (stdout) ', '').replace('\\n', '')
ceph_json = simplejson.loads(ceph_str)
# Example of format of status in ceph 14.2.8:
# "health": {
# "checks": {
# "MON_DOWN": {
# "severity": "HEALTH_WARN",
# "summary": {
# "message": "1/3 mons down, quorum ceph-3,ceph-1"
# }
# }
# },
# "status": "HEALTH_WARN"
# },
#
overall_status = ceph_json['health'].get('status')
osdmap = ceph_json['osdmap']['osdmap']
msg = ("Overall Status = %s, Cluster status = %s/%s" %
(overall_status, osdmap['num_osds'], osdmap['num_up_osds']))
if overall_status == 'HEALTH_OK':
return (200, msg)
else:
return (300, ("Ceph cluster test skipped "
"as no dedicated storage found"))
return (404, msg)
@base.scenario(admin_only=False, operator=True)
def node_check(self):