Delete stale jobdirs at startup

This will delete the contents of the jobdir root (e.g.,
/var/lib/zuul/builds) at startup if --keep is not specified on
the command line.  If an executor is uncleanly stopped, it may
leave a substantial amount of data in the jobdir root -- enough
to cause it to run out of space upon recovery.  This should make
recovery more automatic.

Change-Id: Ibfe3f788f3c2146649fde0a764470dde3961f780
Signed-off-by: Paul Belanger <pabelanger@redhat.com>
This commit is contained in:
James E. Blair 2018-01-05 14:21:41 -08:00 committed by Paul Belanger
parent cde55b8adb
commit 5f1c1bb48a
No known key found for this signature in database
GPG Key ID: 611A80832067AF38
2 changed files with 10 additions and 1 deletions

View File

@ -86,7 +86,7 @@ class Executor(zuul.cmd.ZuulDaemonApp):
job_dir=self.job_dir))
sys.exit(1)
else:
self.job_dir = tempfile.gettempdir()
self.job_dir = tempfile.mkdtemp()
self.setup_logging('executor', 'log_config')
self.log = logging.getLogger("zuul.Executor")

View File

@ -1868,6 +1868,15 @@ class ExecutorServer(object):
with open(os.path.join(zuul_dir, '__init__.py'), 'w'):
pass
# If keep is not set, ensure the job dir is empty on startup,
# in case we were uncleanly shut down.
if not self.keep_jobdir:
for fn in os.listdir(self.jobdir_root):
if not os.path.isdir(fn):
continue
self.log.info("Deleting stale jobdir %s", fn)
shutil.rmtree(os.path.join(self.jobdir_root, fn))
self.job_workers = {}
self.disk_accountant = DiskAccountant(self.jobdir_root,
self.disk_limit_per_job,