Add wmi job terminate timeout cfg opt

We're using a hardcoded 15 seconds timeout when canceling WMI jobs.

Some jobs may not be cancelable, some may take longer to stop. For
example, Nova instances may end up in error state after attempting
to delete them as this hardcoded timeout gets exceeded.

That being considered, we're now adding a config option for this,
also increasing the default timeout to 2 minutes.

Ideally, Nova should pass a timeout here. Still, by adding a config
option at the os-win level, it's easier to backport this to older
releases, also providing a default for any kind of WMI job.

Change-Id: I797366c40a3a4bb74fe925bd10697f227e22915c
Closes-Bug: #1790890
This commit is contained in:
Lucian Petrut 2018-09-05 17:52:58 +03:00
parent 90b359d103
commit 11e28563d4
4 changed files with 16 additions and 5 deletions

View File

@ -29,6 +29,11 @@ os_win_opts = [
'almost all operations require a reference to a '
'switch port. The cached objects are no longer valid '
'if the VM they are associated with is destroyed.'),
cfg.IntOpt('wmi_job_terminate_timeout',
default=120,
help='The default amount of seconds to wait when stopping '
'pending WMI jobs. Setting this value to 0 will '
'disable the timeout.'),
]
CONF = cfg.CONF

View File

@ -1203,7 +1203,8 @@ class VMUtilsTestCase(test_base.OsWinBaseTestCase):
self._vmutils.stop_vm_jobs(mock.sentinel.vm_name)
self._vmutils._jobutils.stop_jobs.assert_called_once_with(mock_vm)
self._vmutils._jobutils.stop_jobs.assert_called_once_with(
mock_vm, None)
def test_set_secure_boot(self):
vs_data = mock.MagicMock()

View File

@ -985,9 +985,9 @@ class VMUtils(baseutils.BaseUtilsVirt):
pass
return constants.VM_GEN_1
def stop_vm_jobs(self, vm_name):
def stop_vm_jobs(self, vm_name, timeout=None):
vm = self._lookup_vm_check(vm_name, as_vssd=False)
self._jobutils.stop_jobs(vm)
self._jobutils.stop_jobs(vm, timeout)
def enable_secure_boot(self, vm_name, msft_ca_required):
"""Enables Secure Boot for the instance with the given name.

View File

@ -24,10 +24,13 @@ import monotonic
from oslo_log import log as logging
from os_win import _utils
import os_win.conf
from os_win import constants
from os_win import exceptions
from os_win.utils import baseutils
CONF = os_win.conf.CONF
LOG = logging.getLogger(__name__)
@ -35,7 +38,6 @@ class JobUtils(baseutils.BaseUtilsVirt):
_CONCRETE_JOB_CLASS = "Msvm_ConcreteJob"
_DEFAULT_JOB_TERMINATE_TIMEOUT = 15 # seconds
_KILL_JOB_STATE_CHANGE_REQUEST = 5
_completed_job_states = [constants.JOB_STATE_COMPLETED,
@ -203,7 +205,7 @@ class JobUtils(baseutils.BaseUtilsVirt):
def _is_job_completed(self, job):
return job.JobState in self._completed_job_states
def stop_jobs(self, element, timeout=_DEFAULT_JOB_TERMINATE_TIMEOUT):
def stop_jobs(self, element, timeout=None):
"""Stops the Hyper-V jobs associated with the given resource.
:param element: string representing the path of the Hyper-V resource
@ -214,6 +216,9 @@ class JobUtils(baseutils.BaseUtilsVirt):
associated with the given resource and the given timeout amount of
time has passed.
"""
if timeout is None:
timeout = CONF.os_win.wmi_job_terminate_timeout
@_utils.retry_decorator(exceptions=exceptions.JobTerminateFailed,
timeout=timeout, max_retry_count=None)
def _stop_jobs_with_timeout():