Update zoned executor stats
This gives us 3 sets of executor stats: zoned, unzoned, and all. The zoned and unzoned stats are moved to a hierarchy that avoids name collisions. The only way to detect whether an executor in a zone is online is with a function with its zone name registered. Since the only function currently registered with a zone name is "execute:execute" and that is unregistered when an executor is online but not accepting, we need to add a new dummy function in order to do this accounting. This also updates the docs and adds a release note about the (minor) change in stats meaning. Change-Id: Ie28963426024f2d54275426794549f31ace9d998
This commit is contained in:
parent
59c93ed108
commit
7599b6bdc0
|
@ -337,7 +337,14 @@ These metrics are emitted by the Zuul :ref:`scheduler`:
|
|||
|
||||
.. stat:: zuul.executors
|
||||
|
||||
Holds metrics related to Zuul executors.
|
||||
Holds metrics related to unzoned executors.
|
||||
|
||||
This is a copy of :stat:`zuul.executors.unzoned`. It does not
|
||||
include information about zoned executors.
|
||||
|
||||
.. warning:: The metrics at this location are deprecated and will
|
||||
be removed in a future version. Please begin using
|
||||
:stat:`zuul.executors.unzoned` instead.
|
||||
|
||||
.. stat:: online
|
||||
:type: gauge
|
||||
|
@ -361,6 +368,61 @@ These metrics are emitted by the Zuul :ref:`scheduler`:
|
|||
executor to run on. This should ideally be at zero; persistent
|
||||
higher values indicate more executor resources would be useful.
|
||||
|
||||
.. stat:: unzoned
|
||||
|
||||
Holds metrics related to unzoned executors.
|
||||
|
||||
.. stat:: online
|
||||
:type: gauge
|
||||
|
||||
The number of unzoned Zuul executor processes online.
|
||||
|
||||
.. stat:: accepting
|
||||
:type: gauge
|
||||
|
||||
The number of unzoned Zuul executor processes accepting new
|
||||
jobs.
|
||||
|
||||
.. stat:: jobs_running
|
||||
:type: gauge
|
||||
|
||||
The number of unzoned executor jobs running.
|
||||
|
||||
.. stat:: jobs_queued
|
||||
:type: gauge
|
||||
|
||||
The number of jobs allocated nodes, but queued waiting for an
|
||||
unzoned executor to run on. This should ideally be at zero;
|
||||
persistent higher values indicate more executor resources
|
||||
would be useful.
|
||||
|
||||
.. stat:: zone
|
||||
|
||||
Holds metrics related to zoned executors.
|
||||
|
||||
.. stat:: <zone>.online
|
||||
:type: gauge
|
||||
|
||||
The number of Zuul executor processes online in this zone.
|
||||
|
||||
.. stat:: <zone>.accepting
|
||||
:type: gauge
|
||||
|
||||
The number of Zuul executor processes accepting new jobs in
|
||||
this zone.
|
||||
|
||||
.. stat:: <zone>.jobs_running
|
||||
:type: gauge
|
||||
|
||||
The number of executor jobs running in this zone.
|
||||
|
||||
.. stat:: <zone>.jobs_queued
|
||||
:type: gauge
|
||||
|
||||
The number of jobs allocated nodes, but queued waiting for an
|
||||
executor in this zone to run on. This should ideally be at
|
||||
zero; persistent higher values indicate more executor
|
||||
resources would be useful.
|
||||
|
||||
.. stat:: zuul.scheduler
|
||||
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
---
|
||||
upgrade:
|
||||
- |
|
||||
Two sets of statsd metrics are now reported for executors: zoned
|
||||
and unzoned. The existing statsd keys are now deprecated; new
|
||||
statsd keys are available for both zoned and unzoned executors.
|
||||
See :stat:`zuul.executors` for details.
|
||||
fixes:
|
||||
- |
|
||||
If zoned executors were used with prior releases of Zuul, the
|
||||
reported executor statistics would only represent a single,
|
||||
unspecified zone. This has now been corrected.
|
|
@ -114,11 +114,21 @@ class TestSchedulerZone(ZuulTestCase):
|
|||
|
||||
# Validate that the reported executor stats are correct. There must
|
||||
# be two executors online and two accepting (one unzoned and one zoned)
|
||||
self.assertReportedStat('zuul.executors.online', value='2', kind='g')
|
||||
# TODO(corvus): remove deprecated top-level stats in 5.0
|
||||
self.assertReportedStat(
|
||||
'zuul.executors.online', value='1', kind='g')
|
||||
self.assertReportedStat(
|
||||
'zuul.executors.accepting', value='1', kind='g')
|
||||
self.assertReportedStat(
|
||||
'zuul.executors.test-provider_vpn.accepting', value='1', kind='g')
|
||||
'zuul.executors.unzoned.online', value='1', kind='g')
|
||||
self.assertReportedStat(
|
||||
'zuul.executors.unzoned.accepting', value='1', kind='g')
|
||||
self.assertReportedStat(
|
||||
'zuul.executors.zone.test-provider_vpn.online',
|
||||
value='1', kind='g')
|
||||
self.assertReportedStat(
|
||||
'zuul.executors.zone.test-provider_vpn.accepting',
|
||||
value='1', kind='g')
|
||||
|
||||
self.gearman_server.hold_jobs_in_queue = True
|
||||
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
|
||||
|
|
|
@ -2699,10 +2699,17 @@ class ExecutorServer(BaseMergeServer):
|
|||
if self.zone:
|
||||
function_name += ':%s' % self.zone
|
||||
|
||||
# This function only exists so we can count how many executors
|
||||
# are online.
|
||||
online_name = 'executor:online'
|
||||
if self.zone:
|
||||
online_name += ':%s' % self.zone
|
||||
|
||||
self.executor_jobs = {
|
||||
"executor:resume:%s" % self.hostname: self.resumeJob,
|
||||
"executor:stop:%s" % self.hostname: self.stopJob,
|
||||
function_name: self.executeJob,
|
||||
online_name: self.noop,
|
||||
}
|
||||
|
||||
self.executor_gearworker = ZuulGearWorker(
|
||||
|
@ -2720,6 +2727,10 @@ class ExecutorServer(BaseMergeServer):
|
|||
def _repoLock(self, connection_name, project_name):
|
||||
return self.repo_locks.getRepoLock(connection_name, project_name)
|
||||
|
||||
def noop(self, job):
|
||||
"""A noop gearman job so we can register for statistics."""
|
||||
job.sendWorkComplete()
|
||||
|
||||
def start(self):
|
||||
# Start merger worker only if we process merge jobs
|
||||
if self.process_merge_jobs:
|
||||
|
|
|
@ -436,39 +436,55 @@ class Scheduler(threading.Thread):
|
|||
return
|
||||
functions = getGearmanFunctions(self.rpc.gearworker.gearman)
|
||||
functions.update(getGearmanFunctions(self.rpc_slow.gearworker.gearman))
|
||||
executors_online = 0
|
||||
mergers_online = 0
|
||||
merge_queue = 0
|
||||
merge_running = 0
|
||||
for (name, (queued, running, registered)) in functions.items():
|
||||
if name.startswith('executor:execute'):
|
||||
executors_accepting = registered
|
||||
execute_queue = queued - running
|
||||
execute_running = running
|
||||
tokens = name.split(':', 2)
|
||||
if len(tokens) == 2:
|
||||
# unzoned case
|
||||
self.statsd.gauge('zuul.executors.unzoned.accepting',
|
||||
registered)
|
||||
self.statsd.gauge('zuul.executors.unzoned.jobs_running',
|
||||
running)
|
||||
self.statsd.gauge('zuul.executors.unzoned.jobs_queued',
|
||||
execute_queue)
|
||||
# TODO(corvus): Remove for 5.0:
|
||||
self.statsd.gauge('zuul.executors.accepting',
|
||||
executors_accepting)
|
||||
registered)
|
||||
self.statsd.gauge('zuul.executors.jobs_running',
|
||||
execute_running)
|
||||
running)
|
||||
self.statsd.gauge('zuul.executors.jobs_queued',
|
||||
execute_queue)
|
||||
else:
|
||||
# zoned case
|
||||
zone = tokens[2]
|
||||
self.statsd.gauge('zuul.executors.%s.accepting' %
|
||||
self.statsd.gauge('zuul.executors.zone.%s.accepting' %
|
||||
normalize_statsd_name(zone),
|
||||
executors_accepting)
|
||||
self.statsd.gauge('zuul.executors.%s.jobs_running' %
|
||||
registered)
|
||||
self.statsd.gauge('zuul.executors.zone.%s.jobs_running' %
|
||||
normalize_statsd_name(zone),
|
||||
execute_running)
|
||||
self.statsd.gauge('zuul.executors.%s.jobs_queued' %
|
||||
running)
|
||||
self.statsd.gauge('zuul.executors.zone.%s.jobs_queued' %
|
||||
normalize_statsd_name(zone),
|
||||
execute_queue)
|
||||
|
||||
if name.startswith('executor:stop'):
|
||||
executors_online += registered
|
||||
if name.startswith('executor:online'):
|
||||
tokens = name.split(':', 2)
|
||||
if len(tokens) == 2:
|
||||
# unzoned case
|
||||
self.statsd.gauge('zuul.executors.unzoned.online',
|
||||
registered)
|
||||
# TODO(corvus): Remove for 5.0:
|
||||
self.statsd.gauge('zuul.executors.online', registered)
|
||||
else:
|
||||
# zoned case
|
||||
zone = tokens[2]
|
||||
self.statsd.gauge('zuul.executors.zone.%s.online' %
|
||||
normalize_statsd_name(zone),
|
||||
registered)
|
||||
if name == 'merger:merge':
|
||||
mergers_online = registered
|
||||
if name.startswith('merger:'):
|
||||
|
@ -477,8 +493,6 @@ class Scheduler(threading.Thread):
|
|||
self.statsd.gauge('zuul.mergers.online', mergers_online)
|
||||
self.statsd.gauge('zuul.mergers.jobs_running', merge_running)
|
||||
self.statsd.gauge('zuul.mergers.jobs_queued', merge_queue)
|
||||
self.statsd.gauge('zuul.executors.online', executors_online)
|
||||
|
||||
self.statsd.gauge('zuul.scheduler.eventqueues.trigger',
|
||||
self.trigger_event_queue.qsize())
|
||||
self.statsd.gauge('zuul.scheduler.eventqueues.result',
|
||||
|
|
Loading…
Reference in New Issue