210 lines
8.9 KiB
Python
210 lines
8.9 KiB
Python
# (C) Copyright 2015-2016 Hewlett Packard Enterprise Development LP
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import itertools
|
|
import logging
|
|
import os
|
|
|
|
from monasca_setup import agent_config
|
|
from monasca_setup.detection import Plugin
|
|
from monasca_setup.detection.utils import find_process_cmdline
|
|
from monasca_setup.detection.utils import watch_process
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
class Ceph(Plugin):
|
|
|
|
"""Detect Ceph daemons and setup configuration to monitor them.
|
|
|
|
The Ceph services comprise of three main daemons:
|
|
- Object Storage Daemon (OSD)
|
|
There is a OSD service process per disk added in the cluster
|
|
and has a corresponding daemon identifier at /var/lib/ceph/osd/
|
|
- Monitor Servers (MON)
|
|
There is a Monitor service process per monitoring host in the
|
|
cluster and has a corresponding daemon identifier at
|
|
/var/lib/ceph/mon/
|
|
- Metadata Servers (MDS)
|
|
There is a MDS service process per monitoring host in the
|
|
cluster and has a corresponding daemon identifier at
|
|
/var/lib/ceph/mds/
|
|
|
|
The ceph object store is exposed via RADOS Gateway
|
|
- RADOS Gateway (radosgw)
|
|
'radosgw' is an HTTP REST gateway for the RADOS object store,
|
|
a part of the Ceph distributed storage system.
|
|
|
|
Unlike OpenStack services monitoring a single instance of a service will
|
|
be of no use for Ceph. Every daemon needs to be monitored. This can be
|
|
achieved by scanning through the daemon identifiers.
|
|
|
|
"""
|
|
|
|
def __init__(self, template_dir, overwrite=True, args=None):
|
|
self.service_name = 'ceph'
|
|
self.process_names = ['ceph-osd', 'ceph-mon', 'ceph-mds', 'radosgw']
|
|
self.ceph_config_dir = '/etc/ceph/'
|
|
self.service_constants = dict()
|
|
for process in self.process_names:
|
|
process_type = process.replace('ceph-', '')
|
|
display_name = 'ceph-%s' % process \
|
|
if not process.startswith('ceph-') else process
|
|
self.service_constants[process_type] = {
|
|
'service_dir': '/var/lib/ceph/%s/' % process_type,
|
|
'executable': '/usr/bin/%s' % process,
|
|
'display_name': display_name
|
|
}
|
|
|
|
super(Ceph, self).__init__(template_dir, overwrite, args)
|
|
|
|
def _detect(self):
|
|
"""Run detection.
|
|
|
|
"""
|
|
self.found_processes = list()
|
|
|
|
for process in self.process_names:
|
|
if find_process_cmdline(process) is not None:
|
|
self.found_processes.append(process)
|
|
if len(self.found_processes) > 0:
|
|
self.available = True
|
|
|
|
@staticmethod
|
|
def _build_search_string(executable, options=None):
|
|
search_strings = list()
|
|
command = [executable]
|
|
if options:
|
|
command.extend(options)
|
|
|
|
for permutation in itertools.permutations(command):
|
|
if permutation[0] == executable:
|
|
search_strings.append(" ".join(permutation))
|
|
return search_strings
|
|
|
|
def _service_config(self, clusters, service_type):
|
|
display_name = self.service_constants[service_type]['display_name']
|
|
service_dir = self.service_constants[service_type]['service_dir']
|
|
executable = self.service_constants[service_type]['executable']
|
|
expected_processes = list()
|
|
|
|
for cluster in clusters:
|
|
cluster_name = cluster['cluster_name']
|
|
instance_list = list()
|
|
|
|
# Get the list of daemon identifiers for given cluster
|
|
if os.path.exists(service_dir):
|
|
instance_list = [entry for entry in os.listdir(service_dir)
|
|
if entry.split('-', 1)[0] == cluster_name]
|
|
|
|
for instance in instance_list:
|
|
# Daemon identifier is of format <cluster_name>-<id>
|
|
# 'id' for ceph-mon is alphanumeric and is usually the hostname
|
|
# where the service is running for ceph-mon
|
|
# E.g., ceph-monitor1.dom, ceph-monitor2.dom etc.
|
|
#
|
|
# 'id' for ceph-osd is a unique numeric index for
|
|
# that OSD in the cluster
|
|
# E.g., ceph-1, ceph-2 etc.
|
|
#
|
|
# 'id' for ceph-mds is alphanumeric and is usually the hostname
|
|
# where the service is running.
|
|
# E.g., ceph-mds1.dom, ceph-mds2.dom etc.
|
|
#
|
|
# 'id' for radosgw is preceded by client.rgw. plus an
|
|
# alphanumeric that is usually the hostname where the service
|
|
# is running.
|
|
# E.g., client.rgw.ceph-radosgw1.dom
|
|
process = dict()
|
|
if service_type == 'radosgw':
|
|
daemon_id = instance.split('.', 1)[-1]
|
|
process_args = ['--cluster %s' % cluster_name,
|
|
'--name client.rgw.%s' % daemon_id, '-f']
|
|
else:
|
|
daemon_id = instance.split(cluster_name + '-', 1)[1]
|
|
process_args = ['--cluster %s' % cluster_name,
|
|
'--id %s' % daemon_id, '-f']
|
|
process['search_string'] = self._build_search_string(
|
|
executable, process_args)
|
|
process['name'] = '%s-%s.%s' \
|
|
% (cluster_name, service_type, daemon_id)
|
|
process['type'] = display_name
|
|
expected_processes.append(process)
|
|
|
|
return expected_processes
|
|
|
|
def build_config(self):
|
|
"""Build the config as a Plugins object and return.
|
|
|
|
"""
|
|
config = agent_config.Plugins()
|
|
|
|
# There may be multiple clusters, and we construct a list of dicts
|
|
# containing cluster_name and config_file for each cluster
|
|
clusters = list()
|
|
|
|
# Get the cluster_name from <cluster_name>.conf in /etc/ceph/ directory
|
|
if os.path.exists(self.ceph_config_dir):
|
|
config_files = [f for f in os.listdir(self.ceph_config_dir)
|
|
if f.endswith('.conf')]
|
|
if not config_files:
|
|
return config
|
|
for config_file in config_files:
|
|
cluster_dict = dict()
|
|
cluster_dict['cluster_name'] = config_file[:-5]
|
|
cluster_dict['config_file'] = \
|
|
os.path.join(self.ceph_config_dir, config_file)
|
|
cluster_dict['admin_key'] = \
|
|
cluster_dict['cluster_name'] + '.client.admin.keyring' in \
|
|
os.listdir(self.ceph_config_dir)
|
|
clusters.append(cluster_dict)
|
|
|
|
expected_processes = list()
|
|
|
|
expected_processes.extend(self._service_config(clusters, 'mon'))
|
|
expected_processes.extend(self._service_config(clusters, 'osd'))
|
|
expected_processes.extend(self._service_config(clusters, 'mds'))
|
|
expected_processes.extend(self._service_config(clusters, 'radosgw'))
|
|
|
|
for process in expected_processes:
|
|
# Watch the service processes
|
|
log.info("\tMonitoring the {0} {1} process.".format(
|
|
process['name'], self.service_name))
|
|
config.merge(watch_process(search_strings=process['search_string'],
|
|
service=self.service_name,
|
|
component=process['type'],
|
|
process_name=process['name'],
|
|
exact_match=False))
|
|
|
|
# Configure ceph plugin
|
|
instances = []
|
|
for cluster in clusters:
|
|
cluster_config = {}
|
|
cluster_config['cluster_name'] = cluster['cluster_name']
|
|
|
|
# If there is no client admin key installed for this cluster
|
|
# then we cannot invoke Ceph commands for cluster monitoring.
|
|
# In that case we only monitor the locally active processes.
|
|
if not cluster['admin_key']:
|
|
cluster_config['collect_usage_metrics'] = False
|
|
cluster_config['collect_stats_metrics'] = False
|
|
cluster_config['collect_mon_metrics'] = False
|
|
cluster_config['collect_osd_metrics'] = False
|
|
cluster_config['collect_pool_metrics'] = False
|
|
|
|
log.info("\tMonitoring ceph cluster: '{0}'.".format(cluster['cluster_name']))
|
|
instances.append(cluster_config)
|
|
config['ceph'] = {'init_config': None, 'instances': instances}
|
|
return config
|