Monitor and report executor inode usage

This adds inodes to the hdd executor sensor and reports usage
to statsd as well.

Change-Id: Ifd9a63cfc7682f6679322e39809be69abca6827e
This commit is contained in:
James E. Blair 2024-02-16 17:55:21 -08:00
parent 922a6b53ed
commit 8dd4011aa0
5 changed files with 70 additions and 10 deletions

View File

@ -740,6 +740,16 @@ The following sections of ``zuul.conf`` are used by the executor:
disk space divided by the total real storage capacity multiplied by
100.
.. attr:: min_avail_inodes
:default: 5.0
This is the minimum percentage of HDD inodes available for the
:attr:`executor.state_dir` directory. The executor will stop accepting
more than 1 job at a time until more inodes are available. The
available inode percentage is calculated from the total available
inodes divided by the total real inode capacity multiplied by
100.
.. attr:: min_avail_mem
:default: 5.0

View File

@ -490,6 +490,11 @@ These metrics are emitted by the Zuul :ref:`scheduler`:
The used disk on this executor, as a percentage multiplied by 100.
.. stat:: pct_used_inodes
:type: gauge
The used inodes on this executor, as a percentage multiplied by 100.
.. stat:: pct_used_ram
:type: gauge

View File

@ -0,0 +1,8 @@
---
features:
- |
The executor now monitors filesystem inode usage as well as
storage usage. The threshold for accepting jobs can be configured
independenty with :attr:`executor.min_avail_inodes`. Inode
usage is also reported separately with the
:stat:`zuul.executor.<executor>.pct_used_inodes` metric.

View File

@ -714,8 +714,10 @@ class TestGovernor(ZuulTestCase):
hdd = Dummy()
hdd.f_frsize = 4096
hdd.f_blocks = 120920708
hdd.f_bfree = 95716701
statvfs_mock.return_value = hdd # 20.84% used
hdd.f_bfree = 95716701 # 20.84% used
hdd.f_files = 61022208
hdd.f_ffree = 32147841 # 47.31% used
statvfs_mock.return_value = hdd
loadavg_mock.return_value = (0.0, 0.0, 0.0)
self.executor_server.manageLoad()
@ -724,9 +726,12 @@ class TestGovernor(ZuulTestCase):
self.assertReportedStat(
'zuul.executor.test-executor-hostname_example_com.pct_used_hdd',
value='2084', kind='g')
self.assertReportedStat(
'zuul.executor.test-executor-hostname_example_com.pct_used_inodes',
value='4731', kind='g')
hdd.f_bfree = 5716701
statvfs_mock.return_value = hdd # 95.27% used
hdd.f_bfree = 5716701 # 95.27% used
statvfs_mock.return_value = hdd
self.executor_server.manageLoad()
self.assertFalse(self.executor_server.accepting_work)
@ -734,6 +739,23 @@ class TestGovernor(ZuulTestCase):
self.assertReportedStat(
'zuul.executor.test-executor-hostname_example_com.pct_used_hdd',
value='9527', kind='g')
self.assertReportedStat(
'zuul.executor.test-executor-hostname_example_com.pct_used_inodes',
value='4731', kind='g')
hdd.f_bfree = 95716701 # 20.84% used
hdd.f_ffree = 1336387 # 97.80% used
statvfs_mock.return_value = hdd
self.executor_server.manageLoad()
self.assertFalse(self.executor_server.accepting_work)
self.assertReportedStat(
'zuul.executor.test-executor-hostname_example_com.pct_used_hdd',
value='2084', kind='g')
self.assertReportedStat(
'zuul.executor.test-executor-hostname_example_com.pct_used_inodes',
value='9780', kind='g')
@mock.patch('os.getloadavg')
def test_pause_governor(self, loadavg_mock):

View File

@ -20,12 +20,17 @@ from zuul.executor.sensors import SensorInterface
from zuul.lib.config import get_default
def get_avail_hdd_pct(path):
def get_avail_hdd_inode_pct(path):
s = os.statvfs(path)
used = float(s.f_blocks - s.f_bfree)
percent = (used / s.f_blocks) * 100
blocks_used = float(s.f_blocks - s.f_bfree)
blocks_percent = (blocks_used / s.f_blocks) * 100
blocks_percent_avail = 100.0 - blocks_percent
return (100.0 - percent)
files_used = float(s.f_files - s.f_ffree)
files_percent = (files_used / s.f_files) * 100
files_percent_avail = 100.0 - files_percent
return (blocks_percent_avail, files_percent_avail)
class HDDSensor(SensorInterface):
@ -35,20 +40,30 @@ class HDDSensor(SensorInterface):
super().__init__(statsd, base_key)
self.min_avail_hdd = float(
get_default(config, 'executor', 'min_avail_hdd', '5.0'))
self.min_avail_inodes = float(
get_default(config, 'executor', 'min_avail_inodes', '5.0'))
self.state_dir = get_default(
config, 'executor', 'state_dir', '/var/lib/zuul', expand_user=True)
def isOk(self):
avail_hdd_pct = get_avail_hdd_pct(self.state_dir)
avail_hdd_pct, avail_inodes_pct = get_avail_hdd_inode_pct(
self.state_dir)
if self.statsd:
# We multiply the percentage by 100 so we can report it to
# 2 decimal points.
self.statsd.gauge(self.base_key + '.pct_used_hdd',
int((100.0 - avail_hdd_pct) * 100))
self.statsd.gauge(self.base_key + '.pct_used_inodes',
int((100.0 - avail_inodes_pct) * 100))
if avail_hdd_pct < self.min_avail_hdd:
return False, "low disk space {:3.1f}% < {}".format(
avail_hdd_pct, self.min_avail_hdd)
if avail_inodes_pct < self.min_avail_inodes:
return False, "low disk inodes {:3.1f}% < {}".format(
avail_inodes_pct, self.min_avail_inodes)
return True, "{:3.1f}% <= {}".format(avail_hdd_pct, self.min_avail_hdd)
return True, "{:3.1f}% <= {}, {:3.1f}% <= {}".format(
avail_hdd_pct, self.min_avail_hdd,
avail_inodes_pct, self.min_avail_inodes)