os-win/os_win/utils/compute/clusterutils.py

677 lines
26 KiB
Python

# Copyright 2016 Cloudbase Solutions Srl
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Utility class for VM related operations on Hyper-V Clusters.
"""
import ctypes
import re
import sys
import threading
import time
from eventlet import patcher
from eventlet import tpool
from oslo_log import log as logging
from oslo_utils import excutils
from six.moves import queue
from os_win._i18n import _
from os_win import _utils
from os_win import constants
from os_win import exceptions
from os_win.utils import baseutils
from os_win.utils.compute import _clusapi_utils
from os_win.utils.winapi import constants as w_const
from os_win.utils.winapi.libs import clusapi as clusapi_def
from os_win.utils.winapi import wintypes
LOG = logging.getLogger(__name__)
class ClusterUtils(baseutils.BaseUtils):
_MSCLUSTER_NODE = 'MSCluster_Node'
_MSCLUSTER_RES = 'MSCluster_Resource'
_VM_BASE_NAME = 'Virtual Machine %s'
_VM_TYPE = 'Virtual Machine'
_VM_GROUP_TYPE = 111
_MS_CLUSTER_NAMESPACE = '//%s/root/MSCluster'
_LIVE_MIGRATION_TYPE = 4
_IGNORE_LOCKED = 1
_DESTROY_GROUP = 1
_FAILBACK_WINDOW_MIN = 0
_FAILBACK_WINDOW_MAX = 23
_WMI_EVENT_TIMEOUT_MS = 100
_WMI_EVENT_CHECK_INTERVAL = 2
def __init__(self, host='.'):
self._instance_name_regex = re.compile('Virtual Machine (.*)')
self._clusapi_utils = _clusapi_utils.ClusApiUtils()
if sys.platform == 'win32':
self._init_hyperv_conn(host)
def _init_hyperv_conn(self, host):
try:
self._conn_cluster = self._get_wmi_conn(
self._MS_CLUSTER_NAMESPACE % host)
self._cluster = self._conn_cluster.MSCluster_Cluster()[0]
# extract this node name from cluster's path
path = self._cluster.path_()
self._this_node = re.search(r'\\\\(.*)\\root', path,
re.IGNORECASE).group(1)
except AttributeError:
raise exceptions.HyperVClusterException(
_("Could not initialize cluster wmi connection."))
def _get_failover_watcher(self):
raw_query = ("SELECT * FROM __InstanceModificationEvent "
"WITHIN %(wmi_check_interv)s WHERE TargetInstance ISA "
"'%(cluster_res)s' AND "
"TargetInstance.Type='%(cluster_res_type)s' AND "
"TargetInstance.OwnerNode != PreviousInstance.OwnerNode" %
{'wmi_check_interv': self._WMI_EVENT_CHECK_INTERVAL,
'cluster_res': self._MSCLUSTER_RES,
'cluster_res_type': self._VM_TYPE})
return self._conn_cluster.watch_for(raw_wql=raw_query)
def check_cluster_state(self):
if len(self._get_cluster_nodes()) < 1:
raise exceptions.HyperVClusterException(
_("Not enough cluster nodes."))
def get_node_name(self):
return self._this_node
def _get_cluster_nodes(self):
cluster_assoc = self._conn_cluster.MSCluster_ClusterToNode(
Antecedent=self._cluster.path_())
return [x.Dependent for x in cluster_assoc]
def _get_vm_groups(self):
assocs = self._conn_cluster.MSCluster_ClusterToResourceGroup(
GroupComponent=self._cluster.path_())
resources = [a.PartComponent for a in assocs]
return (r for r in resources if
hasattr(r, 'GroupType') and
r.GroupType == self._VM_GROUP_TYPE)
def _lookup_vm_group_check(self, vm_name):
vm = self._lookup_vm_group(vm_name)
if not vm:
raise exceptions.HyperVVMNotFoundException(vm_name=vm_name)
return vm
def _lookup_vm_group(self, vm_name):
return self._lookup_res(self._conn_cluster.MSCluster_ResourceGroup,
vm_name)
def _lookup_vm_check(self, vm_name):
vm = self._lookup_vm(vm_name)
if not vm:
raise exceptions.HyperVVMNotFoundException(vm_name=vm_name)
return vm
def _lookup_vm(self, vm_name):
vm_name = self._VM_BASE_NAME % vm_name
return self._lookup_res(self._conn_cluster.MSCluster_Resource, vm_name)
def _lookup_res(self, resource_source, res_name):
res = resource_source(Name=res_name)
n = len(res)
if n == 0:
return None
elif n > 1:
raise exceptions.HyperVClusterException(
_('Duplicate resource name %s found.') % res_name)
else:
return res[0]
def get_cluster_node_names(self):
nodes = self._get_cluster_nodes()
return [n.Name for n in nodes]
def get_vm_host(self, vm_name):
return self._lookup_vm_group_check(vm_name).OwnerNode
def list_instances(self):
return [r.Name for r in self._get_vm_groups()]
def list_instance_uuids(self):
return [r.Id for r in self._get_vm_groups()]
def add_vm_to_cluster(self, vm_name, max_failover_count=1,
failover_period=6, auto_failback=True):
"""Adds the VM to the Hyper-V Cluster.
:param vm_name: The name of the VM to be added to the Hyper-V Cluster
:param max_failover_count: The number of times the Hyper-V Cluster will
try to failover the VM within the given failover period. If the VM
will try to failover more than this number of the given
failover_period, the VM will end up in a failed state.
:param failover_period: The period (hours) over which the given
max_failover_count failovers can occur. After this period expired,
the failover count for the given VM is reset.
:param auto_failback: boolean, whether the VM will be allowed to
move back to its original host when it is available again.
"""
LOG.debug("Add vm to cluster called for vm %s" % vm_name)
self._cluster.AddVirtualMachine(vm_name)
vm_group = self._lookup_vm_group_check(vm_name)
vm_group.FailoverThreshold = max_failover_count
vm_group.FailoverPeriod = failover_period
vm_group.PersistentState = True
vm_group.AutoFailbackType = int(bool(auto_failback))
# set the earliest and latest time that the group can be moved
# back to its preferred node. The unit is in hours.
vm_group.FailbackWindowStart = self._FAILBACK_WINDOW_MIN
vm_group.FailbackWindowEnd = self._FAILBACK_WINDOW_MAX
vm_group.put()
def bring_online(self, vm_name):
vm = self._lookup_vm_check(vm_name)
vm.BringOnline()
def take_offline(self, vm_name):
vm = self._lookup_vm_check(vm_name)
vm.TakeOffline()
def delete(self, vm_name):
vm = self._lookup_vm_group_check(vm_name)
vm.DestroyGroup(self._DESTROY_GROUP)
def vm_exists(self, vm_name):
return self._lookup_vm(vm_name) is not None
def live_migrate_vm(self, vm_name, new_host, timeout=None):
self._migrate_vm(vm_name, new_host, self._LIVE_MIGRATION_TYPE,
constants.CLUSTER_GROUP_ONLINE,
timeout)
def _migrate_vm(self, vm_name, new_host, migration_type,
exp_state_after_migr, timeout):
syntax = w_const.CLUSPROP_SYNTAX_LIST_VALUE_DWORD
migr_type = wintypes.DWORD(migration_type)
prop_entries = [
self._clusapi_utils.get_property_list_entry(
w_const.CLUS_RESTYPE_NAME_VM, syntax, migr_type),
self._clusapi_utils.get_property_list_entry(
w_const.CLUS_RESTYPE_NAME_VM_CONFIG, syntax, migr_type)
]
prop_list = self._clusapi_utils.get_property_list(prop_entries)
flags = (
w_const.CLUSAPI_GROUP_MOVE_RETURN_TO_SOURCE_NODE_ON_ERROR |
w_const.CLUSAPI_GROUP_MOVE_QUEUE_ENABLED |
w_const.CLUSAPI_GROUP_MOVE_HIGH_PRIORITY_START)
cluster_handle = None
group_handle = None
dest_node_handle = None
try:
cluster_handle = self._clusapi_utils.open_cluster()
group_handle = self._clusapi_utils.open_cluster_group(
cluster_handle, vm_name)
dest_node_handle = self._clusapi_utils.open_cluster_node(
cluster_handle, new_host)
with _ClusterGroupStateChangeListener(cluster_handle,
vm_name) as listener:
self._clusapi_utils.move_cluster_group(group_handle,
dest_node_handle,
flags,
prop_list)
try:
self._wait_for_cluster_group_migration(
listener,
vm_name,
group_handle,
exp_state_after_migr,
timeout)
except exceptions.ClusterGroupMigrationTimeOut:
with excutils.save_and_reraise_exception() as ctxt:
self._cancel_cluster_group_migration(
listener, vm_name, group_handle,
exp_state_after_migr, timeout)
# This is rather unlikely to happen but we're
# covering it out.
try:
self._validate_migration(group_handle,
vm_name,
exp_state_after_migr,
new_host)
LOG.warning(
'Cluster group migration completed '
'successfuly after cancel attempt. '
'Suppressing timeout exception.')
ctxt.reraise = False
except exceptions.ClusterGroupMigrationFailed:
pass
else:
self._validate_migration(group_handle,
vm_name,
exp_state_after_migr,
new_host)
finally:
if group_handle:
self._clusapi_utils.close_cluster_group(group_handle)
if dest_node_handle:
self._clusapi_utils.close_cluster_node(dest_node_handle)
if cluster_handle:
self._clusapi_utils.close_cluster(cluster_handle)
def _validate_migration(self, group_handle, group_name,
expected_state, expected_node):
state_info = self._clusapi_utils.get_cluster_group_state(group_handle)
owner_node = state_info['owner_node']
group_state = state_info['state']
if (expected_state != group_state or
expected_node.lower() != owner_node.lower()):
raise exceptions.ClusterGroupMigrationFailed(
group_name=group_name,
expected_state=expected_state,
expected_node=expected_node,
group_state=group_state,
owner_node=owner_node)
def cancel_cluster_group_migration(self, group_name, expected_state,
timeout=None):
cluster_handle = None
group_handle = None
try:
cluster_handle = self._clusapi_utils.open_cluster()
group_handle = self._clusapi_utils.open_cluster_group(
cluster_handle, group_name)
with _ClusterGroupStateChangeListener(cluster_handle,
group_name) as listener:
self._cancel_cluster_group_migration(
listener, group_name, group_handle,
expected_state, timeout)
finally:
if group_handle:
self._clusapi_utils.close_cluster_group(group_handle)
if cluster_handle:
self._clusapi_utils.close_cluster(cluster_handle)
def _cancel_cluster_group_migration(self, event_listener,
group_name, group_handle,
expected_state,
timeout=None):
LOG.info("Canceling cluster group '%s' migration", group_name)
try:
cancel_finished = (
self._clusapi_utils.cancel_cluster_group_operation(
group_handle))
except exceptions.Win32Exception as ex:
group_state_info = self._get_cluster_group_state(group_handle)
migration_pending = self._is_migration_pending(
group_state_info['state'],
group_state_info['status_info'],
expected_state)
if (ex.error_code == w_const.ERROR_INVALID_STATE and
not migration_pending):
LOG.debug('Ignoring group migration cancel error. '
'No migration is pending.')
cancel_finished = True
else:
raise
if not cancel_finished:
LOG.debug("Waiting for group migration to be canceled.")
try:
self._wait_for_cluster_group_migration(
event_listener, group_name, group_handle,
expected_state,
timeout=timeout)
except Exception:
LOG.exception("Failed to cancel cluster group migration.")
raise exceptions.JobTerminateFailed()
LOG.info("Cluster group migration canceled.")
def _is_migration_queued(self, group_status_info):
return bool(
group_status_info &
w_const.CLUSGRP_STATUS_WAITING_IN_QUEUE_FOR_MOVE)
def _is_migration_pending(self, group_state, group_status_info,
expected_state):
migration_pending = (
group_state != expected_state or
self._is_migration_queued(group_status_info))
return migration_pending
def _wait_for_cluster_group_migration(self, event_listener,
group_name, group_handle,
expected_state,
timeout=None):
time_start = time.time()
time_left = timeout if timeout else 'undefined'
group_state_info = self._get_cluster_group_state(group_handle)
group_state = group_state_info['state']
group_status_info = group_state_info['status_info']
migration_pending = self._is_migration_pending(
group_state,
group_status_info,
expected_state)
if not migration_pending:
return
while not timeout or time_left > 0:
time_elapsed = time.time() - time_start
time_left = timeout - time_elapsed if timeout else 'undefined'
LOG.debug("Waiting for cluster group '%(group_name)s' "
"migration to finish. "
"Time left: %(time_left)s.",
dict(group_name=group_name,
time_left=time_left))
try:
event = event_listener.get(time_left if timeout else None)
except queue.Empty:
break
group_state = event.get('state', group_state)
group_status_info = event.get('status_info', group_status_info)
migration_pending = self._is_migration_pending(group_state,
group_status_info,
expected_state)
if not migration_pending:
return
LOG.error("Cluster group migration timed out.")
raise exceptions.ClusterGroupMigrationTimeOut(
group_name=group_name,
time_elapsed=time.time() - time_start)
def get_cluster_group_state_info(self, group_name):
"""Gets cluster group state info.
:return: a dict containing the following keys:
['state', 'migration_queued', 'owner_node']
"""
cluster_handle = None
group_handle = None
try:
cluster_handle = self._clusapi_utils.open_cluster()
group_handle = self._clusapi_utils.open_cluster_group(
cluster_handle, group_name)
state_info = self._get_cluster_group_state(group_handle)
migration_queued = self._is_migration_queued(
state_info['status_info'])
return dict(owner_node=state_info['owner_node'],
state=state_info['state'],
migration_queued=migration_queued)
finally:
if group_handle:
self._clusapi_utils.close_cluster_group(group_handle)
if cluster_handle:
self._clusapi_utils.close_cluster(cluster_handle)
def _get_cluster_group_state(self, group_handle):
state_info = self._clusapi_utils.get_cluster_group_state(group_handle)
buff, buff_sz = self._clusapi_utils.cluster_group_control(
group_handle,
w_const.CLUSCTL_GROUP_GET_RO_COMMON_PROPERTIES)
status_info = self._clusapi_utils.get_cluster_group_status_info(
ctypes.byref(buff), buff_sz)
state_info['status_info'] = status_info
return state_info
def _monitor_vm_failover(self, watcher, callback,
event_timeout_ms=_WMI_EVENT_TIMEOUT_MS):
"""Creates a monitor to check for new WMI MSCluster_Resource
events.
This method will poll the last _WMI_EVENT_CHECK_INTERVAL + 1
seconds for new events and listens for _WMI_EVENT_TIMEOUT_MS
milliseconds, since listening is a thread blocking action.
Any event object caught will then be processed.
"""
vm_name = None
new_host = None
try:
# wait for new event for _WMI_EVENT_TIMEOUT_MS milliseconds.
if patcher.is_monkey_patched('thread'):
wmi_object = tpool.execute(watcher,
event_timeout_ms)
else:
wmi_object = watcher(event_timeout_ms)
old_host = wmi_object.previous.OwnerNode
new_host = wmi_object.OwnerNode
# wmi_object.Name field is of the form:
# 'Virtual Machine nova-instance-template'
# wmi_object.Name filed is a key and as such is not affected
# by locale, so it will always be 'Virtual Machine'
match = self._instance_name_regex.search(wmi_object.Name)
if match:
vm_name = match.group(1)
if vm_name:
try:
callback(vm_name, old_host, new_host)
except Exception:
LOG.exception(
"Exception during failover callback.")
except exceptions.x_wmi_timed_out:
pass
def get_vm_owner_change_listener(self):
def listener(callback):
watcher = self._get_failover_watcher()
while True:
# We avoid setting an infinite timeout in order to let
# the process gracefully stop. Note that the os-win WMI
# event listeners are meant to be used as long running
# daemons, so no stop API is provided ATM.
try:
self._monitor_vm_failover(
watcher,
callback,
constants.DEFAULT_WMI_EVENT_TIMEOUT_MS)
except Exception:
LOG.exception("The VM cluster group owner change "
"event listener encountered an "
"unexpected exception.")
time.sleep(constants.DEFAULT_WMI_EVENT_TIMEOUT_MS / 1000)
return listener
# At the moment, those event listeners are not meant to be used outside
# os-win, mostly because of the underlying API limitations.
class _ClusterEventListener(object):
_notif_keys = {}
_notif_port_h = None
_cluster_handle = None
_running = False
def __init__(self, cluster_handle, notif_filters_list):
self._cluster_handle = cluster_handle
self._notif_filters_list = notif_filters_list
self._clusapi_utils = _clusapi_utils.ClusApiUtils()
self._event_queue = queue.Queue()
self._setup()
def __enter__(self):
self._ensure_listener_running()
return self
def _get_notif_key_dw(self, notif_key):
notif_key_dw = self._notif_keys.get(notif_key)
if notif_key_dw is None:
notif_key_dw = wintypes.DWORD(notif_key)
# We have to make sure those addresses are preserved.
self._notif_keys[notif_key] = notif_key_dw
return notif_key_dw
def _add_filter(self, notif_filter, notif_key=0):
notif_key_dw = self._get_notif_key_dw(notif_key)
# We'll get a notification handle if not already existing.
self._notif_port_h = self._clusapi_utils.create_cluster_notify_port_v2(
self._cluster_handle, notif_filter,
self._notif_port_h, notif_key_dw)
def _setup_notif_port(self):
for notif_filter in self._notif_filters_list:
filter_struct = clusapi_def.NOTIFY_FILTER_AND_TYPE(
dwObjectType=notif_filter['object_type'],
FilterFlags=notif_filter['filter_flags'])
notif_key = notif_filter.get('notif_key', 0)
self._add_filter(filter_struct, notif_key)
def _setup(self):
self._setup_notif_port()
# If eventlet monkey patching is used, this will actually be a
# greenthread. We just don't want to enforce eventlet usage.
worker = threading.Thread(target=self._listen)
worker.setDaemon(True)
self._running = True
worker.start()
def __exit__(self, exc_type, exc_value, traceback):
self.stop()
def _signal_stopped(self):
self._running = False
self._event_queue.put(None)
def stop(self):
self._signal_stopped()
if self._notif_port_h:
self._clusapi_utils.close_cluster_notify_port(self._notif_port_h)
def _listen(self):
while self._running:
try:
# We're using an indefinite timeout here. When the listener is
# closed, this will raise an 'invalid handle value' error,
# which we're going to ignore.
event = _utils.avoid_blocking_call(
self._clusapi_utils.get_cluster_notify_v2,
self._notif_port_h,
timeout_ms=-1)
processed_event = self._process_event(event)
if processed_event:
self._event_queue.put(processed_event)
except Exception:
if self._running:
LOG.exception(
"Unexpected exception in event listener loop. "
"The cluster event listener will now close.")
self._signal_stopped()
def _process_event(self, event):
return event
def get(self, timeout=None):
self._ensure_listener_running()
event = self._event_queue.get(timeout=timeout)
self._ensure_listener_running()
return event
def _ensure_listener_running(self):
if not self._running:
raise exceptions.OSWinException(
_("Cluster event listener is not running."))
class _ClusterGroupStateChangeListener(_ClusterEventListener):
_NOTIF_KEY_GROUP_STATE = 0
_NOTIF_KEY_GROUP_COMMON_PROP = 1
_notif_filters_list = [
dict(object_type=w_const.CLUSTER_OBJECT_TYPE_GROUP,
filter_flags=w_const.CLUSTER_CHANGE_GROUP_STATE_V2,
notif_key=_NOTIF_KEY_GROUP_STATE),
dict(object_type=w_const.CLUSTER_OBJECT_TYPE_GROUP,
filter_flags=w_const.CLUSTER_CHANGE_GROUP_COMMON_PROPERTY_V2,
notif_key=_NOTIF_KEY_GROUP_COMMON_PROP)]
def __init__(self, cluster_handle, group_name=None):
self._group_name = group_name
super(_ClusterGroupStateChangeListener, self).__init__(
cluster_handle, self._notif_filters_list)
def _process_event(self, event):
group_name = event['cluster_object_name']
if self._group_name and self._group_name.lower() != group_name.lower():
return
preserved_keys = ['cluster_object_name', 'object_type',
'filter_flags', 'notif_key']
processed_event = {key: event[key] for key in preserved_keys}
notif_key = event['notif_key']
if notif_key == self._NOTIF_KEY_GROUP_STATE:
if event['buff_sz'] != ctypes.sizeof(wintypes.DWORD):
raise exceptions.ClusterPropertyRetrieveFailed()
state_p = ctypes.cast(event['buff'], wintypes.PDWORD)
state = state_p.contents.value
processed_event['state'] = state
return processed_event
elif notif_key == self._NOTIF_KEY_GROUP_COMMON_PROP:
try:
status_info = (
self._clusapi_utils.get_cluster_group_status_info(
ctypes.byref(event['buff']), event['buff_sz']))
processed_event['status_info'] = status_info
return processed_event
except exceptions.ClusterPropertyListEntryNotFound:
# At the moment, we only care about the 'StatusInformation'
# common property.
pass