From 11e28563d4ffacfeb1a65b55b5fe792c61e7d0e4 Mon Sep 17 00:00:00 2001 From: Lucian Petrut Date: Wed, 5 Sep 2018 17:52:58 +0300 Subject: [PATCH] Add wmi job terminate timeout cfg opt We're using a hardcoded 15 seconds timeout when canceling WMI jobs. Some jobs may not be cancelable, some may take longer to stop. For example, Nova instances may end up in error state after attempting to delete them as this hardcoded timeout gets exceeded. That being considered, we're now adding a config option for this, also increasing the default timeout to 2 minutes. Ideally, Nova should pass a timeout here. Still, by adding a config option at the os-win level, it's easier to backport this to older releases, also providing a default for any kind of WMI job. Change-Id: I797366c40a3a4bb74fe925bd10697f227e22915c Closes-Bug: #1790890 --- os_win/conf.py | 5 +++++ os_win/tests/unit/utils/compute/test_vmutils.py | 3 ++- os_win/utils/compute/vmutils.py | 4 ++-- os_win/utils/jobutils.py | 9 +++++++-- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/os_win/conf.py b/os_win/conf.py index 0276b7fa..96691ad5 100644 --- a/os_win/conf.py +++ b/os_win/conf.py @@ -29,6 +29,11 @@ os_win_opts = [ 'almost all operations require a reference to a ' 'switch port. The cached objects are no longer valid ' 'if the VM they are associated with is destroyed.'), + cfg.IntOpt('wmi_job_terminate_timeout', + default=120, + help='The default amount of seconds to wait when stopping ' + 'pending WMI jobs. Setting this value to 0 will ' + 'disable the timeout.'), ] CONF = cfg.CONF diff --git a/os_win/tests/unit/utils/compute/test_vmutils.py b/os_win/tests/unit/utils/compute/test_vmutils.py index dbaa8743..ab867ccd 100644 --- a/os_win/tests/unit/utils/compute/test_vmutils.py +++ b/os_win/tests/unit/utils/compute/test_vmutils.py @@ -1203,7 +1203,8 @@ class VMUtilsTestCase(test_base.OsWinBaseTestCase): self._vmutils.stop_vm_jobs(mock.sentinel.vm_name) - self._vmutils._jobutils.stop_jobs.assert_called_once_with(mock_vm) + self._vmutils._jobutils.stop_jobs.assert_called_once_with( + mock_vm, None) def test_set_secure_boot(self): vs_data = mock.MagicMock() diff --git a/os_win/utils/compute/vmutils.py b/os_win/utils/compute/vmutils.py index 7f02a459..527f53cd 100644 --- a/os_win/utils/compute/vmutils.py +++ b/os_win/utils/compute/vmutils.py @@ -985,9 +985,9 @@ class VMUtils(baseutils.BaseUtilsVirt): pass return constants.VM_GEN_1 - def stop_vm_jobs(self, vm_name): + def stop_vm_jobs(self, vm_name, timeout=None): vm = self._lookup_vm_check(vm_name, as_vssd=False) - self._jobutils.stop_jobs(vm) + self._jobutils.stop_jobs(vm, timeout) def enable_secure_boot(self, vm_name, msft_ca_required): """Enables Secure Boot for the instance with the given name. diff --git a/os_win/utils/jobutils.py b/os_win/utils/jobutils.py index e3d7f054..12355cd9 100644 --- a/os_win/utils/jobutils.py +++ b/os_win/utils/jobutils.py @@ -24,10 +24,13 @@ import monotonic from oslo_log import log as logging from os_win import _utils +import os_win.conf from os_win import constants from os_win import exceptions from os_win.utils import baseutils +CONF = os_win.conf.CONF + LOG = logging.getLogger(__name__) @@ -35,7 +38,6 @@ class JobUtils(baseutils.BaseUtilsVirt): _CONCRETE_JOB_CLASS = "Msvm_ConcreteJob" - _DEFAULT_JOB_TERMINATE_TIMEOUT = 15 # seconds _KILL_JOB_STATE_CHANGE_REQUEST = 5 _completed_job_states = [constants.JOB_STATE_COMPLETED, @@ -203,7 +205,7 @@ class JobUtils(baseutils.BaseUtilsVirt): def _is_job_completed(self, job): return job.JobState in self._completed_job_states - def stop_jobs(self, element, timeout=_DEFAULT_JOB_TERMINATE_TIMEOUT): + def stop_jobs(self, element, timeout=None): """Stops the Hyper-V jobs associated with the given resource. :param element: string representing the path of the Hyper-V resource @@ -214,6 +216,9 @@ class JobUtils(baseutils.BaseUtilsVirt): associated with the given resource and the given timeout amount of time has passed. """ + if timeout is None: + timeout = CONF.os_win.wmi_job_terminate_timeout + @_utils.retry_decorator(exceptions=exceptions.JobTerminateFailed, timeout=timeout, max_retry_count=None) def _stop_jobs_with_timeout():