diff --git a/doc/source/configuration.rst b/doc/source/configuration.rst index 3920f04305..1863df1df7 100644 --- a/doc/source/configuration.rst +++ b/doc/source/configuration.rst @@ -740,6 +740,16 @@ The following sections of ``zuul.conf`` are used by the executor: disk space divided by the total real storage capacity multiplied by 100. + .. attr:: min_avail_inodes + :default: 5.0 + + This is the minimum percentage of HDD inodes available for the + :attr:`executor.state_dir` directory. The executor will stop accepting + more than 1 job at a time until more inodes are available. The + available inode percentage is calculated from the total available + inodes divided by the total real inode capacity multiplied by + 100. + .. attr:: min_avail_mem :default: 5.0 diff --git a/doc/source/monitoring.rst b/doc/source/monitoring.rst index f25b82a96d..5bed8819f8 100644 --- a/doc/source/monitoring.rst +++ b/doc/source/monitoring.rst @@ -490,6 +490,11 @@ These metrics are emitted by the Zuul :ref:`scheduler`: The used disk on this executor, as a percentage multiplied by 100. + .. stat:: pct_used_inodes + :type: gauge + + The used inodes on this executor, as a percentage multiplied by 100. + .. stat:: pct_used_ram :type: gauge diff --git a/releasenotes/notes/inodes-116b429ce06e91c8.yaml b/releasenotes/notes/inodes-116b429ce06e91c8.yaml new file mode 100644 index 0000000000..a0506759d3 --- /dev/null +++ b/releasenotes/notes/inodes-116b429ce06e91c8.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + The executor now monitors filesystem inode usage as well as + storage usage. The threshold for accepting jobs can be configured + independenty with :attr:`executor.min_avail_inodes`. Inode + usage is also reported separately with the + :stat:`zuul.executor..pct_used_inodes` metric. diff --git a/tests/unit/test_executor.py b/tests/unit/test_executor.py index 0180de03df..105a91b709 100644 --- a/tests/unit/test_executor.py +++ b/tests/unit/test_executor.py @@ -714,8 +714,10 @@ class TestGovernor(ZuulTestCase): hdd = Dummy() hdd.f_frsize = 4096 hdd.f_blocks = 120920708 - hdd.f_bfree = 95716701 - statvfs_mock.return_value = hdd # 20.84% used + hdd.f_bfree = 95716701 # 20.84% used + hdd.f_files = 61022208 + hdd.f_ffree = 32147841 # 47.31% used + statvfs_mock.return_value = hdd loadavg_mock.return_value = (0.0, 0.0, 0.0) self.executor_server.manageLoad() @@ -724,9 +726,12 @@ class TestGovernor(ZuulTestCase): self.assertReportedStat( 'zuul.executor.test-executor-hostname_example_com.pct_used_hdd', value='2084', kind='g') + self.assertReportedStat( + 'zuul.executor.test-executor-hostname_example_com.pct_used_inodes', + value='4731', kind='g') - hdd.f_bfree = 5716701 - statvfs_mock.return_value = hdd # 95.27% used + hdd.f_bfree = 5716701 # 95.27% used + statvfs_mock.return_value = hdd self.executor_server.manageLoad() self.assertFalse(self.executor_server.accepting_work) @@ -734,6 +739,23 @@ class TestGovernor(ZuulTestCase): self.assertReportedStat( 'zuul.executor.test-executor-hostname_example_com.pct_used_hdd', value='9527', kind='g') + self.assertReportedStat( + 'zuul.executor.test-executor-hostname_example_com.pct_used_inodes', + value='4731', kind='g') + + hdd.f_bfree = 95716701 # 20.84% used + hdd.f_ffree = 1336387 # 97.80% used + statvfs_mock.return_value = hdd + + self.executor_server.manageLoad() + self.assertFalse(self.executor_server.accepting_work) + + self.assertReportedStat( + 'zuul.executor.test-executor-hostname_example_com.pct_used_hdd', + value='2084', kind='g') + self.assertReportedStat( + 'zuul.executor.test-executor-hostname_example_com.pct_used_inodes', + value='9780', kind='g') @mock.patch('os.getloadavg') def test_pause_governor(self, loadavg_mock): diff --git a/zuul/executor/sensors/hdd.py b/zuul/executor/sensors/hdd.py index 769512e32a..c58c357683 100644 --- a/zuul/executor/sensors/hdd.py +++ b/zuul/executor/sensors/hdd.py @@ -20,12 +20,17 @@ from zuul.executor.sensors import SensorInterface from zuul.lib.config import get_default -def get_avail_hdd_pct(path): +def get_avail_hdd_inode_pct(path): s = os.statvfs(path) - used = float(s.f_blocks - s.f_bfree) - percent = (used / s.f_blocks) * 100 + blocks_used = float(s.f_blocks - s.f_bfree) + blocks_percent = (blocks_used / s.f_blocks) * 100 + blocks_percent_avail = 100.0 - blocks_percent - return (100.0 - percent) + files_used = float(s.f_files - s.f_ffree) + files_percent = (files_used / s.f_files) * 100 + files_percent_avail = 100.0 - files_percent + + return (blocks_percent_avail, files_percent_avail) class HDDSensor(SensorInterface): @@ -35,20 +40,30 @@ class HDDSensor(SensorInterface): super().__init__(statsd, base_key) self.min_avail_hdd = float( get_default(config, 'executor', 'min_avail_hdd', '5.0')) + self.min_avail_inodes = float( + get_default(config, 'executor', 'min_avail_inodes', '5.0')) self.state_dir = get_default( config, 'executor', 'state_dir', '/var/lib/zuul', expand_user=True) def isOk(self): - avail_hdd_pct = get_avail_hdd_pct(self.state_dir) + avail_hdd_pct, avail_inodes_pct = get_avail_hdd_inode_pct( + self.state_dir) if self.statsd: # We multiply the percentage by 100 so we can report it to # 2 decimal points. self.statsd.gauge(self.base_key + '.pct_used_hdd', int((100.0 - avail_hdd_pct) * 100)) + self.statsd.gauge(self.base_key + '.pct_used_inodes', + int((100.0 - avail_inodes_pct) * 100)) if avail_hdd_pct < self.min_avail_hdd: return False, "low disk space {:3.1f}% < {}".format( avail_hdd_pct, self.min_avail_hdd) + if avail_inodes_pct < self.min_avail_inodes: + return False, "low disk inodes {:3.1f}% < {}".format( + avail_inodes_pct, self.min_avail_inodes) - return True, "{:3.1f}% <= {}".format(avail_hdd_pct, self.min_avail_hdd) + return True, "{:3.1f}% <= {}, {:3.1f}% <= {}".format( + avail_hdd_pct, self.min_avail_hdd, + avail_inodes_pct, self.min_avail_inodes)