From 4ae0a6f9a648ba169c31c88b6143f69951d0a8a5 Mon Sep 17 00:00:00 2001 From: Simon Westphahl Date: Tue, 30 Jan 2024 13:59:36 +0100 Subject: [PATCH] Refactor config loading in builder and launcher In I93400cc156d09ea1add4fc753846df923242c0e6 we've refactore the launcher config loading to use the last modified timestamps of the config files to detect if a reload is necessary. In the builder the situation is even worse as we reload and compare the config much more often e.g. in the build worker when checking for manual or scheduled image updates. With a larger config (2-3MB range) this is a significant performance problem that can lead to builders being busy with config loading instead of building images. Yappi profile (performed with the optimization proposed in I786daa20ca428039a44d14b1e389d4d3fd62a735, which doesn't fully solve the problem): name ncall tsub ttot tavg ..py:880 AwsProviderDiskImage.__eq__ 812.. 17346.57 27435.41 0.000034 ..odepool/config.py:281 Label.__eq__ 155.. 1.189220 27403.11 0.176285 ..643 BuildWorker._checkConfigRecent 58 0.000000 27031.40 466.0586 ..depool/config.py:118 Config.__eq__ 58 0.000000 26733.50 460.9225 Change-Id: I929bdb757eb9e077012b530f6f872bea96ec8bbc --- nodepool/builder.py | 50 ++++++++++++++++++++++---------------------- nodepool/config.py | 24 +++++++++++++++++++++ nodepool/launcher.py | 16 +++----------- 3 files changed, 52 insertions(+), 38 deletions(-) diff --git a/nodepool/builder.py b/nodepool/builder.py index d659f53ce..0aca542f9 100644 --- a/nodepool/builder.py +++ b/nodepool/builder.py @@ -132,6 +132,10 @@ class BaseWorker(threading.Thread): self.log.debug("Detected ZooKeeper server changes") self._zk.resetHosts(list(new_config.zookeeper_servers.values())) + def _checkConfigRecent(self): + return self._config and nodepool_config.checkRecentConfig( + self._config, self._config_path, self._secure_path) + def _readConfig(self): new_config = nodepool_config.loadConfig(self._config_path) if self._secure_path: @@ -585,16 +589,15 @@ class CleanupWorker(BaseWorker): ''' Body of run method for exception handling purposes. ''' - new_config = self._readConfig() - if not self._config: + if not self._checkConfigRecent(): + new_config = self._readConfig() + if not self._config: + self._config = new_config + self._checkForZooKeeperChanges(new_config) + provider_manager.ProviderManager.reconfigure( + self._config, new_config, self._zk, only_image_manager=True) self._config = new_config - self._checkForZooKeeperChanges(new_config) - provider_manager.ProviderManager.reconfigure(self._config, new_config, - self._zk, - only_image_manager=True) - self._config = new_config - self._cleanup() self._emitBuildRequestStats() @@ -648,8 +651,7 @@ class BuildWorker(BaseWorker): if not self._running or self._zk.suspended or self._zk.lost: return try: - new_config = self._readConfig() - if new_config != self._config: + if not self._checkConfigRecent(): # If our config isn't up to date then return and start # over with a new config load. return @@ -761,8 +763,7 @@ class BuildWorker(BaseWorker): if not self._running or self._zk.suspended or self._zk.lost: return try: - new_config = self._readConfig() - if new_config != self._config: + if not self._checkConfigRecent(): # If our config isn't up to date then return and start # over with a new config load. return @@ -1036,13 +1037,13 @@ class BuildWorker(BaseWorker): ''' Body of run method for exception handling purposes. ''' - # NOTE: For the first iteration, we expect self._config to be None - new_config = self._readConfig() - if not self._config: + if not self._checkConfigRecent(): + new_config = self._readConfig() + if not self._config: + self._config = new_config + self._checkForZooKeeperChanges(new_config) self._config = new_config - self._checkForZooKeeperChanges(new_config) - self._config = new_config self._checkForScheduledImageUpdates() self._checkForManualBuildRequest() @@ -1059,16 +1060,15 @@ class UploadWorker(BaseWorker): ''' Reload the nodepool configuration file. ''' - new_config = self._readConfig() - if not self._config: + if not self._checkConfigRecent(): + new_config = self._readConfig() + if not self._config: + self._config = new_config + self._checkForZooKeeperChanges(new_config) + provider_manager.ProviderManager.reconfigure( + self._config, new_config, self._zk, only_image_manager=True) self._config = new_config - self._checkForZooKeeperChanges(new_config) - provider_manager.ProviderManager.reconfigure(self._config, new_config, - self._zk, - only_image_manager=True) - self._config = new_config - def _uploadImage(self, build_id, upload_id, image_name, images, provider, username, python_path, shell_type): ''' diff --git a/nodepool/config.py b/nodepool/config.py index 71b7ff670..29838f0cf 100644 --- a/nodepool/config.py +++ b/nodepool/config.py @@ -114,6 +114,8 @@ class Config(ConfigValue): self.max_hold_age = None self.webapp = None self.tenant_resource_limits = {} + # Last modified timestamps of loaded config files + self.config_mtimes = {} def __eq__(self, other): if isinstance(other, Config): @@ -133,6 +135,9 @@ class Config(ConfigValue): ) return False + def setConfigPathMtime(self, path, mtime): + self.config_mtimes[path] = mtime + def setElementsDir(self, value): self.elements_dir = value @@ -428,6 +433,7 @@ def openConfig(path, env): def loadConfig(config_path, env=os.environ): + config_mtime = os.stat(config_path).st_mtime_ns config = openConfig(config_path, env) # Call driver config reset now to clean global hooks like openstacksdk @@ -449,11 +455,13 @@ def loadConfig(config_path, env=os.environ): newconfig.setProviders(config.get('providers')) newconfig.setZooKeeperTLS(config.get('zookeeper-tls')) newconfig.setTenantResourceLimits(config.get('tenant-resource-limits')) + newconfig.setConfigPathMtime(config_path, config_mtime) return newconfig def loadSecureConfig(config, secure_config_path, env=os.environ): + secure_mtime = os.stat(secure_config_path).st_mtime_ns secure = openConfig(secure_config_path, env) if not secure: # empty file return @@ -466,3 +474,19 @@ def loadSecureConfig(config, secure_config_path, env=os.environ): config.setSecureDiskimageEnv( secure.get('diskimages', []), secure_config_path) config.setZooKeeperTLS(secure.get('zookeeper-tls')) + config.setConfigPathMtime(secure_config_path, secure_mtime) + + +def checkRecentConfig(config, config_path, secure_path=None): + current_config_mtime = config.config_mtimes.get(config_path, 0) + new_config_mtime = os.stat(config_path).st_mtime_ns + if current_config_mtime != new_config_mtime: + return False + + if secure_path: + current_secure_mtime = config.config_mtimes.get(secure_path, 0) + new_secure_mtime = os.stat(secure_path).st_mtime_ns + if current_secure_mtime != new_secure_mtime: + return False + + return True diff --git a/nodepool/launcher.py b/nodepool/launcher.py index 7f0df839f..aa14944f5 100644 --- a/nodepool/launcher.py +++ b/nodepool/launcher.py @@ -1034,9 +1034,7 @@ class NodePool(threading.Thread): watermark_sleep=WATERMARK_SLEEP): threading.Thread.__init__(self, name='NodePool') self.securefile = securefile - self._secure_mtime = 0 self.configfile = configfile - self._config_mtime = 0 self.watermark_sleep = watermark_sleep self.cleanup_interval = 60 self.delete_interval = 5 @@ -1145,15 +1143,9 @@ class NodePool(threading.Thread): t.provider_name == provider_name] def updateConfig(self): - secure_mtime = 0 - config_mtime = os.stat(self.configfile).st_mtime_ns - if self._config_mtime == config_mtime: - if self.securefile: - secure_mtime = os.stat(self.securefile).st_mtime_ns - if self._secure_mtime == secure_mtime: - return - else: - return + if self.config and nodepool_config.checkRecentConfig( + self.config, self.configfile, self.securefile): + return config = self.loadConfig() self.reconfigureZooKeeper(config) @@ -1164,8 +1156,6 @@ class NodePool(threading.Thread): del config.providers[provider_name] self.setConfig(config) - self._config_mtime = config_mtime - self._secure_mtime = secure_mtime def removeCompletedRequests(self): '''