ironic/ironic/drivers/modules/oneview/deploy.py

409 lines
17 KiB
Python

# Copyright 2016 Hewlett Packard Enterprise Development LP.
# Copyright 2016 Universidade Federal de Campina Grande
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
from futurist import periodics
from ironic_lib import metrics_utils
from oslo_log import log as logging
import retrying
import six
from ironic.common import exception
from ironic.common.i18n import _LE, _LI, _LW
from ironic.common import states
from ironic.conductor import utils as manager_utils
from ironic.conf import CONF
from ironic.drivers.modules import agent
from ironic.drivers.modules import agent_base_vendor
from ironic.drivers.modules import deploy_utils as ironic_deploy_utils
from ironic.drivers.modules import iscsi_deploy
from ironic.drivers.modules.oneview import common
from ironic.drivers.modules.oneview import deploy_utils
from ironic import objects
LOG = logging.getLogger(__name__)
METRICS = metrics_utils.get_metrics_logger(__name__)
@six.add_metaclass(abc.ABCMeta)
class OneViewPeriodicTasks(object):
@abc.abstractproperty
def oneview_driver(self):
pass
@periodics.periodic(spacing=CONF.oneview.periodic_check_interval,
enabled=CONF.oneview.enable_periodic_tasks)
def _periodic_check_nodes_taken_by_oneview(self, manager, context):
"""Checks if nodes in Ironic were taken by OneView users.
This driver periodic task will check for nodes that were taken by
OneView users while the node is in available state, set the node to
maintenance mode with an appropriate maintenance reason message and
move the node to manageable state.
:param manager: a ConductorManager instance
:param context: request context
:returns: None.
"""
filters = {
'provision_state': states.AVAILABLE,
'maintenance': False,
'driver': self.oneview_driver
}
node_iter = manager.iter_nodes(filters=filters)
for node_uuid, driver in node_iter:
node = objects.Node.get(context, node_uuid)
try:
oneview_using = deploy_utils.is_node_in_use_by_oneview(
self.oneview_client, node
)
except exception.OneViewError as e:
# NOTE(xavierr): Skip this node and process the
# remaining nodes. This node will be checked in
# the next periodic call.
LOG.error(_LE("Error while determining if node "
"%(node_uuid)s is in use by OneView. "
"Error: %(error)s"),
{'node_uuid': node.uuid, 'error': e})
continue
if oneview_using:
purpose = (_LI('Updating node %(node_uuid)s in use '
'by OneView from %(provision_state)s state '
'to %(target_state)s state and maintenance '
'mode %(maintenance)s.'),
{'node_uuid': node_uuid,
'provision_state': states.AVAILABLE,
'target_state': states.MANAGEABLE,
'maintenance': True})
LOG.info(purpose)
node.maintenance = True
node.maintenance_reason = common.NODE_IN_USE_BY_ONEVIEW
manager.update_node(context, node)
manager.do_provisioning_action(context, node.uuid, 'manage')
@periodics.periodic(spacing=CONF.oneview.periodic_check_interval,
enabled=CONF.oneview.enable_periodic_tasks)
def _periodic_check_nodes_freed_by_oneview(self, manager, context):
"""Checks if nodes taken by OneView users were freed.
This driver periodic task will be responsible to poll the nodes that
are in maintenance mode and on manageable state to check if the Server
Profile was removed, indicating that the node was freed by the OneView
user. If so, it'll provide the node, that will pass through the
cleaning process and become available to be provisioned.
:param manager: a ConductorManager instance
:param context: request context
:returns: None.
"""
filters = {
'provision_state': states.MANAGEABLE,
'maintenance': True,
'driver': self.oneview_driver
}
node_iter = manager.iter_nodes(fields=['maintenance_reason'],
filters=filters)
for node_uuid, driver, maintenance_reason in node_iter:
if maintenance_reason == common.NODE_IN_USE_BY_ONEVIEW:
node = objects.Node.get(context, node_uuid)
try:
oneview_using = deploy_utils.is_node_in_use_by_oneview(
self.oneview_client, node
)
except exception.OneViewError as e:
# NOTE(xavierr): Skip this node and process the
# remaining nodes. This node will be checked in
# the next periodic call.
LOG.error(_LE("Error while determining if node "
"%(node_uuid)s is in use by OneView. "
"Error: %(error)s"),
{'node_uuid': node.uuid, 'error': e})
continue
if not oneview_using:
purpose = (_LI('Bringing node %(node_uuid)s back from '
'use by OneView from %(provision_state)s '
'state to %(target_state)s state and '
'maintenance mode %(maintenance)s.'),
{'node_uuid': node_uuid,
'provision_state': states.MANAGEABLE,
'target_state': states.AVAILABLE,
'maintenance': False})
LOG.info(purpose)
node.maintenance = False
node.maintenance_reason = None
manager.update_node(context, node)
manager.do_provisioning_action(
context, node.uuid, 'provide'
)
@periodics.periodic(spacing=CONF.oneview.periodic_check_interval,
enabled=CONF.oneview.enable_periodic_tasks)
def _periodic_check_nodes_taken_on_cleanfail(self, manager, context):
"""Checks failed deploys due to Oneview users taking Server Hardware.
This last driver periodic task will take care of nodes that would be
caught on a race condition between OneView and a deploy by Ironic. In
such cases, the validation will fail, throwing the node on deploy fail
and, afterwards on clean fail.
This task will set the node to maintenance mode with a proper reason
message and move it to manageable state, from where the second task
can rescue the node as soon as the Server Profile is removed.
:param manager: a ConductorManager instance
:param context: request context
:returns: None.
"""
filters = {
'provision_state': states.CLEANFAIL,
'driver': self.oneview_driver
}
node_iter = manager.iter_nodes(fields=['driver_internal_info'],
filters=filters)
for node_uuid, driver, driver_internal_info in node_iter:
node_oneview_error = driver_internal_info.get('oneview_error')
if node_oneview_error == common.SERVER_HARDWARE_ALLOCATION_ERROR:
node = objects.Node.get(context, node_uuid)
purpose = (_LI('Bringing node %(node_uuid)s back from use '
'by OneView from %(provision_state)s state '
'to %(target_state)s state and '
'maintenance mode %(maintenance)s.'),
{'node_uuid': node_uuid,
'provision_state': states.CLEANFAIL,
'target_state': states.MANAGEABLE,
'maintenance': False})
LOG.info(purpose)
node.maintenance = True
node.maintenance_reason = common.NODE_IN_USE_BY_ONEVIEW
driver_internal_info = node.driver_internal_info
driver_internal_info.pop('oneview_error', None)
node.driver_internal_info = driver_internal_info
manager.update_node(context, node)
manager.do_provisioning_action(context, node.uuid, 'manage')
class OneViewIscsiDeploy(iscsi_deploy.ISCSIDeploy, OneViewPeriodicTasks):
"""Class for OneView ISCSI deployment driver."""
oneview_driver = common.ISCSI_PXE_ONEVIEW
def __init__(self):
super(OneViewIscsiDeploy, self).__init__()
self.oneview_client = common.get_oneview_client()
def get_properties(self):
return deploy_utils.get_properties()
@METRICS.timer('OneViewIscsiDeploy.validate')
def validate(self, task):
common.verify_node_info(task.node)
try:
common.validate_oneview_resources_compatibility(
self.oneview_client, task)
except exception.OneViewError as oneview_exc:
raise exception.InvalidParameterValue(oneview_exc)
super(OneViewIscsiDeploy, self).validate(task)
@METRICS.timer('OneViewIscsiDeploy.prepare')
def prepare(self, task):
if common.is_dynamic_allocation_enabled(task.node):
deploy_utils.prepare(self.oneview_client, task)
super(OneViewIscsiDeploy, self).prepare(task)
@METRICS.timer('OneViewIscsiDeploy.tear_down')
def tear_down(self, task):
if (common.is_dynamic_allocation_enabled(task.node) and
not CONF.conductor.automated_clean):
deploy_utils.tear_down(self.oneview_client, task)
return super(OneViewIscsiDeploy, self).tear_down(task)
@METRICS.timer('OneViewIscsiDeploy.prepare_cleaning')
def prepare_cleaning(self, task):
if common.is_dynamic_allocation_enabled(task.node):
deploy_utils.prepare_cleaning(self.oneview_client, task)
return super(OneViewIscsiDeploy, self).prepare_cleaning(task)
@METRICS.timer('OneViewIscsiDeploy.tear_down_cleaning')
def tear_down_cleaning(self, task):
if common.is_dynamic_allocation_enabled(task.node):
deploy_utils.tear_down_cleaning(self.oneview_client, task)
super(OneViewIscsiDeploy, self).tear_down_cleaning(task)
# NOTE (thiagop): We overwrite this interface because we cannot change the boot
# device of OneView managed blades while they are still powered on. We moved
# the call of node_set_boot_device from reboot_to_instance to
# reboot_and_finish_deploy and changed the behavior to shutdown the node before
# doing it.
# TODO(thiagop): remove this interface once bug/1503855 is fixed
class OneViewAgentDeployMixin(object):
@METRICS.timer('OneViewAgentDeployMixin.reboot_to_instance')
def reboot_to_instance(self, task, **kwargs):
task.process_event('resume')
node = task.node
error = self.check_deploy_success(node)
if error is not None:
# TODO(jimrollenhagen) power off if using neutron dhcp to
# align with pxe driver?
msg = (_('node %(node)s command status errored: %(error)s') %
{'node': node.uuid, 'error': error})
LOG.error(msg)
ironic_deploy_utils.set_failed_state(task, msg)
return
LOG.info(_LI('Image successfully written to node %s'), node.uuid)
LOG.debug('Rebooting node %s to instance', node.uuid)
self.reboot_and_finish_deploy(task)
# NOTE(TheJulia): If we deployed a whole disk image, we
# should expect a whole disk image and clean-up the tftp files
# on-disk incase the node is disregarding the boot preference.
# TODO(rameshg87): Not all in-tree drivers using reboot_to_instance
# have a boot interface. So include a check for now. Remove this
# check once all in-tree drivers have a boot interface.
if task.driver.boot:
task.driver.boot.clean_up_ramdisk(task)
@METRICS.timer('OneViewAgentDeployMixin.reboot_and_finish_deploy')
def reboot_and_finish_deploy(self, task):
"""Helper method to trigger reboot on the node and finish deploy.
This method initiates a reboot on the node. On success, it
marks the deploy as complete. On failure, it logs the error
and marks deploy as failure.
:param task: a TaskManager object containing the node
:raises: InstanceDeployFailure, if node reboot failed.
"""
wait = CONF.agent.post_deploy_get_power_state_retry_interval * 1000
attempts = CONF.agent.post_deploy_get_power_state_retries + 1
@retrying.retry(
stop_max_attempt_number=attempts,
retry_on_result=lambda state: state != states.POWER_OFF,
wait_fixed=wait
)
def _wait_until_powered_off(task):
return task.driver.power.get_power_state(task)
node = task.node
try:
try:
self._client.power_off(node)
_wait_until_powered_off(task)
except Exception as e:
LOG.warning(
_LW('Failed to soft power off node %(node_uuid)s '
'in at least %(timeout)d seconds. Error: %(error)s'),
{'node_uuid': node.uuid,
'timeout': (wait * (attempts - 1)) / 1000,
'error': e})
manager_utils.node_power_action(task, states.POWER_OFF)
task.driver.network.remove_provisioning_network(task)
task.driver.network.configure_tenant_networks(task)
manager_utils.node_set_boot_device(task, 'disk',
persistent=True)
manager_utils.node_power_action(task, states.POWER_ON)
except Exception as e:
msg = (_('Error rebooting node %(node)s after deploy. '
'Error: %(error)s') %
{'node': node.uuid, 'error': e})
agent_base_vendor.log_and_raise_deployment_error(task, msg)
task.process_event('done')
LOG.info(_LI('Deployment to node %s done'), task.node.uuid)
class OneViewAgentDeploy(OneViewAgentDeployMixin, agent.AgentDeploy,
OneViewPeriodicTasks):
"""Class for OneView Agent deployment driver."""
oneview_driver = common.AGENT_PXE_ONEVIEW
def __init__(self):
super(OneViewAgentDeploy, self).__init__()
self.oneview_client = common.get_oneview_client()
def get_properties(self):
return deploy_utils.get_properties()
@METRICS.timer('OneViewAgentDeploy.validate')
def validate(self, task):
common.verify_node_info(task.node)
try:
common.validate_oneview_resources_compatibility(
self.oneview_client, task)
except exception.OneViewError as oneview_exc:
raise exception.InvalidParameterValue(oneview_exc)
super(OneViewAgentDeploy, self).validate(task)
@METRICS.timer('OneViewAgentDeploy.prepare')
def prepare(self, task):
if common.is_dynamic_allocation_enabled(task.node):
deploy_utils.prepare(self.oneview_client, task)
super(OneViewAgentDeploy, self).prepare(task)
@METRICS.timer('OneViewAgentDeploy.tear_down')
def tear_down(self, task):
if (common.is_dynamic_allocation_enabled(task.node) and
not CONF.conductor.automated_clean):
deploy_utils.tear_down(self.oneview_client, task)
return super(OneViewAgentDeploy, self).tear_down(task)
@METRICS.timer('OneViewAgentDeploy.prepare_cleaning')
def prepare_cleaning(self, task):
if common.is_dynamic_allocation_enabled(task.node):
deploy_utils.prepare_cleaning(self.oneview_client, task)
return super(OneViewAgentDeploy, self).prepare_cleaning(task)
@METRICS.timer('OneViewAgentDeploy.tear_down_cleaning')
def tear_down_cleaning(self, task):
if common.is_dynamic_allocation_enabled(task.node):
deploy_utils.tear_down_cleaning(self.oneview_client, task)
super(OneViewAgentDeploy, self).tear_down_cleaning(task)