Merge "implement hashring partitioning"
This commit is contained in:
commit
6aa5491203
|
@ -19,10 +19,13 @@ import time
|
||||||
|
|
||||||
import cotyledon
|
import cotyledon
|
||||||
from cotyledon import oslo_config_glue
|
from cotyledon import oslo_config_glue
|
||||||
|
from futurist import periodics
|
||||||
from oslo_config import cfg
|
from oslo_config import cfg
|
||||||
from oslo_log import log
|
from oslo_log import log
|
||||||
from oslo_utils import timeutils
|
from oslo_utils import timeutils
|
||||||
import six
|
import six
|
||||||
|
import tenacity
|
||||||
|
import tooz
|
||||||
|
|
||||||
from gnocchi import archive_policy
|
from gnocchi import archive_policy
|
||||||
from gnocchi import genconfig
|
from gnocchi import genconfig
|
||||||
|
@ -139,12 +142,56 @@ class MetricReporting(MetricProcessBase):
|
||||||
|
|
||||||
class MetricProcessor(MetricProcessBase):
|
class MetricProcessor(MetricProcessBase):
|
||||||
name = "processing"
|
name = "processing"
|
||||||
|
GROUP_ID = "gnocchi-processing"
|
||||||
|
|
||||||
def __init__(self, worker_id, conf):
|
def __init__(self, worker_id, conf):
|
||||||
super(MetricProcessor, self).__init__(
|
super(MetricProcessor, self).__init__(
|
||||||
worker_id, conf, conf.metricd.metric_processing_delay)
|
worker_id, conf, conf.metricd.metric_processing_delay)
|
||||||
self._coord, self._my_id = utils.get_coordinator_and_start(
|
self._coord, self._my_id = utils.get_coordinator_and_start(
|
||||||
conf.storage.coordination_url)
|
conf.storage.coordination_url)
|
||||||
|
self._tasks = []
|
||||||
|
self.group_state = None
|
||||||
|
|
||||||
|
@utils.retry
|
||||||
|
def _configure(self):
|
||||||
|
super(MetricProcessor, self)._configure()
|
||||||
|
# create fallback in case paritioning fails or assigned no tasks
|
||||||
|
self.fallback_tasks = list(
|
||||||
|
six.moves.range(self.store.incoming.NUM_SACKS))
|
||||||
|
try:
|
||||||
|
self.partitioner = self._coord.join_partitioned_group(
|
||||||
|
self.GROUP_ID, partitions=200)
|
||||||
|
LOG.info('Joined coordination group: %s', self.GROUP_ID)
|
||||||
|
|
||||||
|
@periodics.periodic(spacing=self.conf.metricd.worker_sync_rate,
|
||||||
|
run_immediately=True)
|
||||||
|
def run_watchers():
|
||||||
|
self._coord.run_watchers()
|
||||||
|
|
||||||
|
self.periodic = periodics.PeriodicWorker.create([])
|
||||||
|
self.periodic.add(run_watchers)
|
||||||
|
t = threading.Thread(target=self.periodic.start)
|
||||||
|
t.daemon = True
|
||||||
|
t.start()
|
||||||
|
except NotImplementedError:
|
||||||
|
LOG.warning('Coordinator does not support partitioning. Worker '
|
||||||
|
'will battle against other workers for jobs.')
|
||||||
|
except tooz.ToozError as e:
|
||||||
|
LOG.error('Unexpected error configuring coordinator for '
|
||||||
|
'partitioning. Retrying: %s', e)
|
||||||
|
raise tenacity.TryAgain(e)
|
||||||
|
|
||||||
|
def _get_tasks(self):
|
||||||
|
try:
|
||||||
|
if (not self._tasks or
|
||||||
|
self.group_state != self.partitioner.ring.nodes):
|
||||||
|
self.group_state = self.partitioner.ring.nodes.copy()
|
||||||
|
# TODO(gordc): make replicas configurable
|
||||||
|
self._tasks = [
|
||||||
|
i for i in six.moves.range(self.store.incoming.NUM_SACKS)
|
||||||
|
if self.partitioner.belongs_to_self(i, replicas=3)]
|
||||||
|
finally:
|
||||||
|
return self._tasks or self.fallback_tasks
|
||||||
|
|
||||||
def _sack_lock(self, sack):
|
def _sack_lock(self, sack):
|
||||||
lock_name = b'gnocchi-sack-%s-lock' % str(sack).encode('ascii')
|
lock_name = b'gnocchi-sack-%s-lock' % str(sack).encode('ascii')
|
||||||
|
@ -154,7 +201,7 @@ class MetricProcessor(MetricProcessBase):
|
||||||
m_count = 0
|
m_count = 0
|
||||||
s_count = 0
|
s_count = 0
|
||||||
in_store = self.store.incoming
|
in_store = self.store.incoming
|
||||||
for s in six.moves.range(in_store.NUM_SACKS):
|
for s in self._get_tasks():
|
||||||
# TODO(gordc): support delay release lock so we don't
|
# TODO(gordc): support delay release lock so we don't
|
||||||
# process a sack right after another process
|
# process a sack right after another process
|
||||||
lock = self._sack_lock(s)
|
lock = self._sack_lock(s)
|
||||||
|
|
|
@ -85,6 +85,13 @@ def list_opts():
|
||||||
required=True,
|
required=True,
|
||||||
help="How many seconds to wait between "
|
help="How many seconds to wait between "
|
||||||
"cleaning of expired data"),
|
"cleaning of expired data"),
|
||||||
|
cfg.IntOpt('worker_sync_rate',
|
||||||
|
default=30,
|
||||||
|
help="Frequency to detect when metricd workers join or "
|
||||||
|
"leave system (in seconds). A shorter rate, may "
|
||||||
|
"improve rebalancing but create more coordination "
|
||||||
|
"load"),
|
||||||
|
|
||||||
)),
|
)),
|
||||||
("api", (
|
("api", (
|
||||||
cfg.StrOpt('paste_config',
|
cfg.StrOpt('paste_config',
|
||||||
|
|
Loading…
Reference in New Issue