300 lines
11 KiB
Python
300 lines
11 KiB
Python
# Copyright (c) 2013 Mirantis Inc.
|
|
# Copyright (c) 2015-2016 Red Hat
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import multiprocessing
|
|
import threading
|
|
import time
|
|
|
|
import cotyledon
|
|
from futurist import periodics
|
|
import msgpack
|
|
from oslo_config import cfg
|
|
from oslo_log import log
|
|
from oslo_utils import timeutils
|
|
import six
|
|
import tooz
|
|
from tooz import coordination
|
|
|
|
from gnocchi import archive_policy
|
|
from gnocchi import indexer
|
|
from gnocchi import service
|
|
from gnocchi import statsd as statsd_service
|
|
from gnocchi import storage
|
|
from gnocchi import utils
|
|
|
|
|
|
LOG = log.getLogger(__name__)
|
|
|
|
|
|
def upgrade():
|
|
conf = cfg.ConfigOpts()
|
|
conf.register_cli_opts([
|
|
cfg.BoolOpt("skip-index", default=False,
|
|
help="Skip index upgrade."),
|
|
cfg.BoolOpt("skip-storage", default=False,
|
|
help="Skip storage upgrade."),
|
|
cfg.BoolOpt("skip-archive-policies-creation", default=False,
|
|
help="Skip default archive policies creation."),
|
|
cfg.BoolOpt("create-legacy-resource-types", default=False,
|
|
help="Creation of Ceilometer legacy resource types.")
|
|
])
|
|
conf = service.prepare_service(conf=conf)
|
|
index = indexer.get_driver(conf)
|
|
index.connect()
|
|
if not conf.skip_index:
|
|
LOG.info("Upgrading indexer %s" % index)
|
|
index.upgrade(
|
|
create_legacy_resource_types=conf.create_legacy_resource_types)
|
|
if not conf.skip_storage:
|
|
s = storage.get_driver(conf)
|
|
LOG.info("Upgrading storage %s" % s)
|
|
s.upgrade(index)
|
|
|
|
if (not conf.skip_archive_policies_creation
|
|
and not index.list_archive_policies()
|
|
and not index.list_archive_policy_rules()):
|
|
for name, ap in six.iteritems(archive_policy.DEFAULT_ARCHIVE_POLICIES):
|
|
index.create_archive_policy(ap)
|
|
index.create_archive_policy_rule("default", "*", "low")
|
|
|
|
|
|
def statsd():
|
|
statsd_service.start()
|
|
|
|
|
|
class MetricProcessBase(cotyledon.Service):
|
|
def __init__(self, worker_id, conf, interval_delay=0):
|
|
super(MetricProcessBase, self).__init__(worker_id)
|
|
self.conf = conf
|
|
self.startup_delay = worker_id
|
|
self.interval_delay = interval_delay
|
|
self._shutdown = threading.Event()
|
|
self._shutdown_done = threading.Event()
|
|
|
|
def _configure(self):
|
|
self.store = storage.get_driver(self.conf)
|
|
self.index = indexer.get_driver(self.conf)
|
|
self.index.connect()
|
|
|
|
def run(self):
|
|
self._configure()
|
|
# Delay startup so workers are jittered.
|
|
time.sleep(self.startup_delay)
|
|
|
|
while not self._shutdown.is_set():
|
|
with timeutils.StopWatch() as timer:
|
|
self._run_job()
|
|
self._shutdown.wait(max(0, self.interval_delay -
|
|
timer.elapsed()))
|
|
self._shutdown_done.set()
|
|
|
|
def terminate(self):
|
|
self._shutdown.set()
|
|
self.close_services()
|
|
LOG.info("Waiting ongoing metric processing to finish")
|
|
self._shutdown_done.wait()
|
|
|
|
@staticmethod
|
|
def close_services():
|
|
pass
|
|
|
|
@staticmethod
|
|
def _run_job():
|
|
raise NotImplementedError
|
|
|
|
|
|
class MetricReporting(MetricProcessBase):
|
|
name = "reporting"
|
|
|
|
def __init__(self, worker_id, conf):
|
|
super(MetricReporting, self).__init__(
|
|
worker_id, conf, conf.storage.metric_reporting_delay)
|
|
|
|
def _run_job(self):
|
|
try:
|
|
report = self.store.measures_report(details=False)
|
|
LOG.info("%d measurements bundles across %d "
|
|
"metrics wait to be processed.",
|
|
report['summary']['measures'],
|
|
report['summary']['metrics'])
|
|
except Exception:
|
|
LOG.error("Unexpected error during pending measures reporting",
|
|
exc_info=True)
|
|
|
|
|
|
class MetricScheduler(MetricProcessBase):
|
|
name = "scheduler"
|
|
MAX_OVERLAP = 0.3
|
|
GROUP_ID = "gnocchi-scheduler"
|
|
SYNC_RATE = 30
|
|
TASKS_PER_WORKER = 16
|
|
BLOCK_SIZE = 4
|
|
|
|
def __init__(self, worker_id, conf, queue):
|
|
super(MetricScheduler, self).__init__(
|
|
worker_id, conf, conf.storage.metric_processing_delay)
|
|
self._coord, self._my_id = utils.get_coordinator_and_start(
|
|
conf.storage.coordination_url)
|
|
self.queue = queue
|
|
self.previously_scheduled_metrics = set()
|
|
self.workers = conf.metricd.workers
|
|
self.block_index = 0
|
|
self.block_size_default = self.workers * self.TASKS_PER_WORKER
|
|
self.block_size = self.block_size_default
|
|
self.periodic = None
|
|
|
|
def set_block(self, event):
|
|
get_members_req = self._coord.get_members(self.GROUP_ID)
|
|
try:
|
|
members = sorted(get_members_req.get())
|
|
self.block_index = members.index(self._my_id)
|
|
reqs = list(self._coord.get_member_capabilities(self.GROUP_ID, m)
|
|
for m in members)
|
|
for req in reqs:
|
|
cap = msgpack.loads(req.get(), encoding='utf-8')
|
|
max_workers = max(cap['workers'], self.workers)
|
|
self.block_size = max_workers * self.TASKS_PER_WORKER
|
|
LOG.info('New set of agents detected. Now working on block: %s, '
|
|
'with up to %s metrics', self.block_index,
|
|
self.block_size)
|
|
except Exception:
|
|
LOG.warning('Error getting block to work on, defaulting to first')
|
|
self.block_index = 0
|
|
self.block_size = self.block_size_default
|
|
|
|
@utils.retry
|
|
def _configure(self):
|
|
super(MetricScheduler, self)._configure()
|
|
try:
|
|
cap = msgpack.dumps({'workers': self.workers})
|
|
join_req = self._coord.join_group(self.GROUP_ID, cap)
|
|
join_req.get()
|
|
LOG.info('Joined coordination group: %s', self.GROUP_ID)
|
|
self.set_block(None)
|
|
|
|
@periodics.periodic(spacing=self.SYNC_RATE, run_immediately=True)
|
|
def run_watchers():
|
|
self._coord.run_watchers()
|
|
|
|
self.periodic = periodics.PeriodicWorker.create([])
|
|
self.periodic.add(run_watchers)
|
|
t = threading.Thread(target=self.periodic.start)
|
|
t.daemon = True
|
|
t.start()
|
|
|
|
self._coord.watch_join_group(self.GROUP_ID, self.set_block)
|
|
self._coord.watch_leave_group(self.GROUP_ID, self.set_block)
|
|
except coordination.GroupNotCreated as e:
|
|
create_group_req = self._coord.create_group(self.GROUP_ID)
|
|
try:
|
|
create_group_req.get()
|
|
except coordination.GroupAlreadyExist:
|
|
pass
|
|
raise utils.Retry(e)
|
|
except tooz.NotImplemented:
|
|
LOG.warning('Configured coordination driver does not support '
|
|
'required functionality. Coordination is disabled.')
|
|
except Exception as e:
|
|
LOG.error('Failed to configure coordination. Coordination is '
|
|
'disabled: %s', e)
|
|
|
|
def _run_job(self):
|
|
try:
|
|
metrics = set(self.store.list_metric_with_measures_to_process(
|
|
self.block_size, self.block_index))
|
|
if metrics and not self.queue.empty():
|
|
# NOTE(gordc): drop metrics we previously process to avoid
|
|
# handling twice
|
|
number_of_scheduled_metrics = len(metrics)
|
|
metrics = metrics - self.previously_scheduled_metrics
|
|
if (float(number_of_scheduled_metrics - len(metrics)) /
|
|
self.block_size > self.MAX_OVERLAP):
|
|
LOG.warning('Metric processing lagging scheduling rate. '
|
|
'It is recommended to increase the number of '
|
|
'workers or to lengthen processing interval.')
|
|
metrics = list(metrics)
|
|
for i in six.moves.range(0, len(metrics), self.BLOCK_SIZE):
|
|
self.queue.put(metrics[i:i + self.BLOCK_SIZE])
|
|
self.previously_scheduled_metrics = set(metrics)
|
|
LOG.debug("%d metrics scheduled for processing.", len(metrics))
|
|
except Exception:
|
|
LOG.error("Unexpected error scheduling metrics for processing",
|
|
exc_info=True)
|
|
|
|
def close_services(self):
|
|
if self.periodic:
|
|
self.periodic.stop()
|
|
self.periodic.wait()
|
|
self._coord.leave_group(self.GROUP_ID)
|
|
self._coord.stop()
|
|
|
|
|
|
class MetricJanitor(MetricProcessBase):
|
|
name = "janitor"
|
|
|
|
def __init__(self, worker_id, conf):
|
|
super(MetricJanitor, self).__init__(
|
|
worker_id, conf, conf.storage.metric_cleanup_delay)
|
|
|
|
def _run_job(self):
|
|
try:
|
|
self.store.expunge_metrics(self.index)
|
|
LOG.debug("Metrics marked for deletion removed from backend")
|
|
except Exception:
|
|
LOG.error("Unexpected error during metric cleanup", exc_info=True)
|
|
|
|
|
|
class MetricProcessor(MetricProcessBase):
|
|
name = "processing"
|
|
|
|
def __init__(self, worker_id, conf, queue):
|
|
super(MetricProcessor, self).__init__(worker_id, conf, 0)
|
|
self.queue = queue
|
|
|
|
def _run_job(self):
|
|
try:
|
|
try:
|
|
metrics = self.queue.get(block=True, timeout=10)
|
|
except six.moves.queue.Empty:
|
|
# NOTE(sileht): Allow the process to exit gracefully every
|
|
# 10 seconds
|
|
return
|
|
self.store.process_background_tasks(self.index, metrics)
|
|
except Exception:
|
|
LOG.error("Unexpected error during measures processing",
|
|
exc_info=True)
|
|
|
|
|
|
class MetricdServiceManager(cotyledon.ServiceManager):
|
|
def __init__(self, conf):
|
|
super(MetricdServiceManager, self).__init__()
|
|
self.conf = conf
|
|
self.queue = multiprocessing.Manager().Queue()
|
|
|
|
self.add(MetricScheduler, args=(self.conf, self.queue))
|
|
self.add(MetricProcessor, args=(self.conf, self.queue),
|
|
workers=conf.metricd.workers)
|
|
self.add(MetricReporting, args=(self.conf,))
|
|
self.add(MetricJanitor, args=(self.conf,))
|
|
|
|
def run(self):
|
|
super(MetricdServiceManager, self).run()
|
|
self.queue.close()
|
|
|
|
|
|
def metricd():
|
|
conf = service.prepare_service()
|
|
MetricdServiceManager(conf).run()
|