bareon-ironic/bareon_ironic/modules/bareon_base.py

1188 lines
44 KiB
Python

#
# Copyright 2017 Cray Inc., All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Bareon deploy driver.
"""
import abc
import inspect
import json
import os
import pprint
import stat
import sys
import eventlet
import pkg_resources
import stevedore
import six
from oslo_concurrency import processutils
from oslo_config import cfg
from oslo_log import log
from oslo_service import loopingcall
from ironic.common import boot_devices
from ironic.common import exception
from ironic.common import states
from ironic.common.i18n import _
from ironic.common.i18n import _LI
from ironic.conductor import task_manager
from ironic.conductor import utils as manager_utils
from ironic.drivers import base
from ironic.drivers.modules import deploy_utils
from ironic.objects import node as db_node
from bareon_ironic.modules import bareon_exception
from bareon_ironic.modules import bareon_utils
from bareon_ironic.modules.resources import actions
from bareon_ironic.modules.resources import image_service
from bareon_ironic.modules.resources import resources
agent_opts = [
cfg.StrOpt('bareon_pxe_append_params',
default='nofb nomodeset vga=normal',
help='Additional append parameters for baremetal PXE boot.'),
cfg.StrOpt('deploy_kernel',
help='UUID (from Glance) of the default deployment kernel.'),
cfg.StrOpt('deploy_ramdisk',
help='UUID (from Glance) of the default deployment ramdisk.'),
cfg.StrOpt('deploy_config_priority',
default='instance:node:image:conf',
help='Priority for deploy config'),
cfg.StrOpt('deploy_config',
help='A uuid or name of glance image representing '
'deploy config.'),
cfg.IntOpt('deploy_timeout',
default=15,
help="Timeout in minutes for the node continue-deploy process "
"(deployment phase following the callback)."),
cfg.IntOpt('check_terminate_interval',
help='Time interval in seconds to check whether the deployment '
'driver has responded to termination signal',
default=5),
cfg.IntOpt('check_terminate_max_retries',
help='Max retries to check is node already terminated',
default=20),
cfg.StrOpt('agent_data_driver',
default='ironic',
help='Bareon data driver'),
]
CONF = cfg.CONF
CONF.register_opts(agent_opts, group='bareon')
LOG = log.getLogger(__name__)
REQUIRED_PROPERTIES = {}
OTHER_PROPERTIES = {
'deploy_kernel': _('UUID (from Glance) of the deployment kernel.'),
'deploy_ramdisk': _('UUID (from Glance) of the deployment ramdisk.'),
'bareon_username': _('SSH username; default is "root" Optional.'),
'bareon_key_filename': _('Name of SSH private key file; default is '
'"/etc/ironic/bareon_key". Optional.'),
'bareon_ssh_port': _('SSH port; default is 22. Optional.'),
'bareon_deploy_script': _('path to bareon executable entry point; '
'default is "bareon-provision" Optional.'),
'deploy_config': _('Deploy config Glance image id/name'),
}
COMMON_PROPERTIES = OTHER_PROPERTIES
REQUIRED_BAREON_VERSION = "0.0."
TERMINATE_FLAG = 'terminate_deployment'
def _clean_up_images(task):
node = task.node
if node.instance_info.get('images_cleaned_up', False):
return
try:
with open(get_tenant_images_json_path(node)) as f:
images_json = json.loads(f.read())
except Exception as ex:
LOG.warning("Cannot find tenant_images.json for the %s node to"
"finish cleanup." % node)
LOG.warning(str(ex))
else:
images = resources.ResourceList.from_dict(images_json, task)
images.cleanup_resources()
bareon_utils.change_node_dict(task.node, 'instance_info',
{'images_cleaned_up': True})
class BareonDeploy(base.DeployInterface):
"""Interface for deploy-related actions."""
def __init__(self):
super(BareonDeploy, self).__init__()
self._deployment_config_validators = {}
def get_properties(self):
"""Return the properties of the interface.
:returns: dictionary of <property name>:<property description> entries.
"""
return COMMON_PROPERTIES
def validate(self, task):
"""Validate the driver-specific Node deployment info.
This method validates whether the properties of the supplied node
contain the required information for this driver to deploy images to
the node.
:param task: a TaskManager instance
:raises: MissingParameterValue
"""
_NodeDriverInfoAdapter(task.node)
self._validate_deployment_config(task)
@task_manager.require_exclusive_lock
def deploy(self, task):
"""Perform a deployment to a node.
Perform the necessary work to deploy an image onto the specified node.
This method will be called after prepare(), which may have already
performed any preparatory steps, such as pre-caching some data for the
node.
:param task: a TaskManager instance.
:returns: status of the deploy. One of ironic.common.states.
"""
manager_utils.node_power_action(task, states.REBOOT)
return states.DEPLOYWAIT
@task_manager.require_exclusive_lock
def tear_down(self, task):
"""Tear down a previous deployment on the task's node.
:param task: a TaskManager instance.
:returns: status of the deploy. One of ironic.common.states.
"""
manager_utils.node_power_action(task, states.POWER_OFF)
return states.DELETED
def prepare(self, task):
"""Prepare the deployment environment for this node.
:param task: a TaskManager instance.
"""
self._fetch_resources(task)
# Temporary set possible missing driver_info fields. This changes will
# not become persistent until someone do
# node.driver_info = updated_driver_info
# node.save()
driver_info = task.node.driver_info
for field, value in (
('deploy_kernel', CONF.bareon.deploy_kernel),
('deploy_ramdisk', CONF.bareon.deploy_ramdisk)):
driver_info.setdefault(field, value)
task.driver.boot.prepare_ramdisk(task,
self._build_pxe_config_options(task))
def clean_up(self, task):
"""Clean up the deployment environment for this node.
If preparation of the deployment environment ahead of time is possible,
this method should be implemented by the driver. It should erase
anything cached by the `prepare` method.
If implemented, this method must be idempotent. It may be called
multiple times for the same node on the same conductor, and it may be
called by multiple conductors in parallel. Therefore, it must not
require an exclusive lock.
This method is called before `tear_down`.
:param task: a TaskManager instance.
"""
task.driver.boot.clean_up_ramdisk(task)
_clean_up_images(task)
def take_over(self, task):
pass
def _fetch_resources(self, task):
self._fetch_provision_json(task)
self._fetch_actions(task)
def _fetch_provision_json(self, task):
config = self._get_deploy_config(task)
config = self._add_image_deployment_config(task, config)
deploy_data = config.get('deploy_data', {})
if 'kernel_params' not in deploy_data:
deploy_data['kernel_params'] = CONF.bareon.bareon_pxe_append_params
config['deploy_data'] = deploy_data
LOG.info('[{0}] Resulting provision.json is:\n{1}'.format(
task.node.uuid, config))
# On fail script is not passed to the agent, it is handled on
# Conductor.
on_fail_script_url = config.pop("on_fail_script", None)
self._fetch_on_fail_script(task, on_fail_script_url)
filename = get_provision_json_path(task.node)
LOG.info('[{0}] Writing provision.json to:\n{1}'.format(
task.node.uuid, filename))
with open(filename, 'w') as f:
f.write(json.dumps(config))
def _validate_deployment_config(self, task):
data_driver_name = bareon_utils.node_data_driver(task.node)
validator = self._get_deployment_config_validator(data_driver_name)
validator(get_provision_json_path(task.node))
def _get_deploy_config(self, task):
node = task.node
instance_info = node.instance_info
# Get options passed by nova, if any.
deploy_config_options = instance_info.get('deploy_config_options', {})
# Get options available at ironic side.
deploy_config_options['node'] = node.driver_info.get('deploy_config')
deploy_config_options['conf'] = CONF.bareon.deploy_config
# Cleaning empty options.
deploy_config_options = {k: v for k, v in
six.iteritems(deploy_config_options) if v}
configs = self._fetch_deploy_configs(task.context, node,
deploy_config_options)
return self._merge_configs(configs)
def _fetch_deploy_configs(self, context, node, cfg_options):
configs = {}
for key, url in six.iteritems(cfg_options):
configs[key] = resources.url_download_json(context, node,
url)
return configs
@staticmethod
def _merge_configs(configs):
# Merging first level attributes of configs according to priority
priority_list = CONF.bareon.deploy_config_priority.split(':')
unknown_sources = set(priority_list) - {'instance', 'node', 'conf',
'image'}
if unknown_sources:
raise ValueError('Unknown deploy config source %s' % str(
unknown_sources))
result = {}
for k in priority_list[::-1]:
if k in configs:
result.update(configs[k])
LOG.debug('Resulting deploy config:')
LOG.debug('%s', result)
return result
def _fetch_on_fail_script(self, task, url):
if not url:
return
path = get_on_fail_script_path(task.node)
LOG.info('[{0}] Fetching on_fail_script to:\n{1}'.format(
task.node.uuid, path))
resources.url_download(task.context, task.node, url, path)
def _fetch_actions(self, task):
driver_actions_url = task.node.instance_info.get('driver_actions')
actions_data = resources.url_download_json(task.context,
task.node,
driver_actions_url)
if not actions_data:
LOG.info("[%s] No driver_actions specified" % task.node.uuid)
return
controller = actions.ActionController(task, actions_data)
controller.fetch_action_resources()
actions_data = controller.to_dict()
LOG.info('[{0}] Deploy actions for the node are:\n{1}'.format(
task.node.uuid, actions_data))
filename = get_actions_json_path(task.node)
LOG.info('[{0}] Writing actions.json to:\n{1}'.format(
task.node.uuid, filename))
with open(filename, 'w') as f:
f.write(json.dumps(actions_data))
def _build_pxe_config_options(self, task):
"""Builds the pxe config options for booting agent.
This method builds the config options to be replaced on
the agent pxe config template.
:param task: a TaskManager instance
:returns: a dict containing the options to be applied on
the agent pxe config template.
"""
agent_config_opts = {
'deployment_id': task.node.uuid,
'ironic_api_url': deploy_utils.get_ironic_api_url(),
}
return agent_config_opts
def _get_image_resource_mode(self):
raise NotImplementedError
def _get_deploy_driver(self):
raise NotImplementedError
def _add_image_deployment_config(self, task, provision_config):
node = task.node
bareon_utils.change_node_dict(
node, 'instance_info',
{'deploy_driver': self._get_deploy_driver()})
node.save()
image_resource_mode = self._get_image_resource_mode()
boot_image = node.instance_info['image_source']
default_user_images = [
{
'name': boot_image,
'url': boot_image,
'target': "/",
}
]
user_images = provision_config.get('images', default_user_images)
invalid_images = []
origin_names = [None] * len(user_images)
for idx, image in enumerate(user_images):
try:
bareon_utils.validate_json(('name', 'url'), image)
except exception.MissingParameterValue as e:
invalid_images.append(
'Invalid "image" record - there is no key {key} (#{idx}: '
'{payload})'.format(
key=e, idx=idx, payload=json.dumps(image)))
continue
origin_names[idx] = image['name']
image_uuid, image_name = image_service.get_glance_image_uuid_name(
task, image['url'])
image['boot'] = (boot_image == image_uuid)
image['url'] = "glance:%s" % image_uuid
image['mode'] = image_resource_mode
image['image_uuid'] = image_uuid
image['image_name'] = image_name
if invalid_images:
raise exception.InvalidParameterValue(
err='\n'.join(invalid_images))
fetched_image_resources = self._fetch_images(task, user_images)
image_deployment_config = [
{
# Grab name from source data to keep it untouched, because
# "resources" subsystem replace all not alphanumeric symbols
# to underscores in 'name' field.
'name': name,
'image_pull_url': image.pull_url,
'target': image.target,
'boot': image.boot,
'image_uuid': image.image_uuid,
'image_name': image.image_name
}
for name, image in zip(origin_names, fetched_image_resources)
]
bareon_utils.change_node_dict(
task.node, 'instance_info',
{'multiboot': len(image_deployment_config) > 1})
node.save()
provision_config['images'] = image_deployment_config
return provision_config
def _fetch_images(self, task, image_resources):
images = resources.ResourceList({
"name": "tenant_images",
"resources": image_resources
}, task)
images.fetch_resources()
# NOTE(lobur): serialize tenant images json for further cleanup.
images_json = images.to_dict()
with open(get_tenant_images_json_path(task.node), 'w') as f:
f.write(json.dumps(images_json))
return images.resources
def terminate_deployment(self, task):
node = task.node
if TERMINATE_FLAG not in node.instance_info:
def _wait_for_node_to_become_terminated(retries, max_retries,
task):
task_node = task.node
retries[0] += 1
if retries[0] > max_retries:
bareon_utils.change_node_dict(
task_node, 'instance_info',
{TERMINATE_FLAG: 'failed'})
task_node.reservation = None
task_node.save()
raise bareon_exception.RetriesException(
retry_count=max_retries)
current_node = db_node.Node.get_by_uuid(task.context,
task_node.uuid)
if current_node.instance_info.get(TERMINATE_FLAG) == 'done':
raise loopingcall.LoopingCallDone()
bareon_utils.change_node_dict(
node, 'instance_info',
{TERMINATE_FLAG: 'requested'})
node.save()
retries = [0]
interval = CONF.bareon.check_terminate_interval
max_retries = CONF.bareon.check_terminate_max_retries
timer = loopingcall.FixedIntervalLoopingCall(
_wait_for_node_to_become_terminated,
retries, max_retries, task)
try:
timer.start(interval=interval).wait()
except bareon_exception.RetriesException as ex:
LOG.error('Failed to terminate node. Error: %(error)s' % {
'error': ex})
@property
def can_terminate_deployment(self):
return True
def _get_deployment_config_validator(self, driver_name):
try:
validator = self._deployment_config_validators[driver_name]
except KeyError:
validator = DeploymentConfigValidator(driver_name)
self._deployment_config_validators[driver_name] = validator
return validator
class BareonVendor(base.VendorInterface):
def get_properties(self):
"""Return the properties of the interface.
:returns: dictionary of <property name>:<property description> entries.
"""
return COMMON_PROPERTIES
def validate(self, task, method=None, **kwargs):
"""Validate the driver-specific Node deployment info.
:param task: a TaskManager instance
:param method: method to be validated
"""
if method in ('exec_actions', 'deploy_steps'):
return
if method == 'switch_boot':
self.validate_switch_boot(task, **kwargs)
return
if not kwargs.get('address'):
raise exception.MissingParameterValue(_('Bareon must pass '
'address of a node.'))
_NodeDriverInfoAdapter(task.node)
def validate_switch_boot(self, task, **kwargs):
if not kwargs.get('image'):
raise exception.MissingParameterValue(_('No image info passed.'))
if not kwargs.get('ssh_key'):
raise exception.MissingParameterValue(_('No ssh key info passed.'))
if not kwargs.get('ssh_user'):
raise exception.MissingParameterValue(_('No ssh user info '
'passed.'))
@base.passthru(['GET', 'POST'], async=False)
def deploy_steps(self, task, **data):
http_method = data.pop('http_method')
driver_info = _NodeDriverInfoAdapter(task.node)
if http_method == 'GET':
ssh_keys_step = _InjectSSHKeyStepRequest(task, driver_info)
return ssh_keys_step()
steps_mapping = _DeployStepMapping()
data = _DeployStepsAdapter(data)
try:
request_cls = steps_mapping.name_to_step[data.action]
except KeyError:
if data.action is not None:
raise RuntimeError(
'There is no name mapping for deployment step: '
'{!r}'.format(data.action))
message = (
'Bareon\'s callback service have failed with internall error')
if data.status_details:
message += '\nFailure details: {}'.format(
pprint.pformat(data.status_details))
# TODO(dbogun): add support for existing log extraction mechanism
deploy_utils.set_failed_state(
task, message, collect_logs=False)
else:
handler = request_cls.result_handler(
task, driver_info, data)
handler()
return {'url': None}
@base.passthru(['POST'])
@task_manager.require_exclusive_lock
def pass_deploy_info(self, task, **kwargs):
"""Continues the deployment of baremetal node."""
node = task.node
task.process_event('resume')
driver_info = _NodeDriverInfoAdapter(task.node)
cmd = '{} --data_driver "{}" --deploy_driver "{}"'.format(
driver_info.entry_point, bareon_utils.node_data_driver(node),
node.instance_info['deploy_driver'])
if CONF.debug:
cmd += ' --debug'
instance_info = node.instance_info
connect_args = {
'username': driver_info.ssh_login,
'key_filename': driver_info.ssh_key,
'host': kwargs['address']}
if driver_info.ssh_port:
connect_args['port'] = driver_info.ssh_port
try:
ssh = bareon_utils.get_ssh_connection(task, **connect_args)
sftp = ssh.open_sftp()
self._check_bareon_version(ssh, node.uuid)
provision_config_path = get_provision_json_path(task.node)
# TODO(yuriyz) no hardcode
sftp.put(provision_config_path, '/tmp/provision.json')
# swift configdrive store should be disabled
configdrive = instance_info.get('configdrive')
if configdrive is not None:
# TODO(yuriyz) no hardcode
bareon_utils.sftp_write_to(sftp, configdrive,
'/tmp/config-drive.img')
out, err = self._deploy(task, ssh, cmd, **connect_args)
LOG.info(_LI('[%(node)s] Bareon pass on node %(node)s'),
{'node': node.uuid})
LOG.debug('[%s] Bareon stdout is: "%s"', node.uuid, out)
LOG.debug('[%s] Bareon stderr is: "%s"', node.uuid, err)
self._get_boot_info(task, ssh)
self._run_actions(task, ssh, sftp, connect_args)
manager_utils.node_power_action(task, states.POWER_OFF)
manager_utils.node_set_boot_device(task, boot_devices.DISK,
persistent=True)
manager_utils.node_power_action(task, states.POWER_ON)
except exception.SSHConnectFailed as e:
msg = (
_('[%(node)s] SSH connect to node %(host)s failed. '
'Error: %(error)s') % {'host': connect_args['host'],
'error': e, 'node': node.uuid})
self._deploy_failed(task, msg)
except exception.ConfigInvalid as e:
msg = (_('[%(node)s] Invalid provision config. '
'Error: %(error)s') % {'error': e, 'node': node.uuid})
self._deploy_failed(task, msg)
except bareon_exception.DeployTerminationSucceed:
LOG.info(_LI('[%(node)s] Deployment was terminated'),
{'node': node.uuid})
except Exception as e:
self._run_on_fail_script(task, sftp, ssh)
msg = (_('[%(node)s] Deploy failed for node %(node)s. '
'Error: %(error)s') % {'node': node.uuid, 'error': e})
self._bareon_log(task, ssh)
self._deploy_failed(task, msg)
else:
task.process_event('done')
LOG.info(_LI('Deployment to node %s done'), task.node.uuid)
finally:
self._clean_up_deployment_resources(task)
def _deploy_failed(self, task, msg):
LOG.error(msg)
deploy_utils.set_failed_state(task, msg, collect_logs=False)
def _check_bareon_version(self, ssh, node_uuid):
try:
stdout, stderr = processutils.ssh_execute(
ssh, 'cat /etc/bareon-release')
LOG.info(_LI("[{0}] Tracing Bareon version.\n{1}").format(
node_uuid, stdout))
version = ""
lines = stdout.splitlines()
if lines:
version_line = lines[0]
name, _, version = version_line.partition("==")
if version.startswith(REQUIRED_BAREON_VERSION):
return
msg = ("Bareon version '%(req)s' is required, but version "
"'%(found)s' found on the ramdisk."
% dict(req=REQUIRED_BAREON_VERSION,
found=version))
raise bareon_exception.IncompatibleRamdiskVersion(details=msg)
except processutils.ProcessExecutionError:
msg = "Bareon version cannot be read on the ramdisk."
raise bareon_exception.IncompatibleRamdiskVersion(details=msg)
def _get_boot_info(self, task, ssh):
node = task.node
node_uuid = node.uuid
if not node.instance_info.get('multiboot', False):
return
try:
stdout, stderr = processutils.ssh_execute(
ssh, 'cat /tmp/boot_entries.json')
except processutils.ProcessExecutionError as exec_err:
LOG.warning(_LI('[%(node)s] Error getting boot info. '
'Error: %(error)s') % {'node': node_uuid,
'error': exec_err})
raise
else:
multiboot_info = json.loads(stdout)
bareon_utils.change_node_dict(node, 'instance_info', {
'multiboot_info': multiboot_info
})
LOG.info("[{1}] {0} Multiboot info {0}\n{2}"
"\n".format("#" * 20, node_uuid, multiboot_info))
def _run_actions(self, task, ssh, sftp, sshparams):
actions_path = get_actions_json_path(task.node)
if not os.path.exists(actions_path):
LOG.info(_LI("[%(node)s] No actions specified. Skipping")
% {'node': task.node.uuid})
return
with open(actions_path) as f:
actions_data = json.loads(f.read())
actions_controller = actions.ActionController(
task, actions_data
)
actions_controller.execute(ssh, sftp, **sshparams)
def _bareon_log(self, task, ssh):
node_uuid = task.node.uuid
try:
# TODO(oberezovskyi): Chenge log pulling mechanism (e.g. use
# remote logging feature of syslog)
stdout, stderr = processutils.ssh_execute(
ssh, 'cat /var/log/bareon.log')
except processutils.ProcessExecutionError as exec_err:
LOG.warning(_LI('[%(node)s] Error getting Bareon log. '
'Error: %(error)s') % {'node': node_uuid,
'error': exec_err})
else:
LOG.info("[{1}] {0} Start Bareon log {0}\n{2}\n"
"[{1}] {0} End Bareon log {0}".format("#" * 20,
node_uuid,
stdout))
def _run_on_fail_script(self, task, sftp, ssh):
node = task.node
node_uuid = node.uuid
try:
on_fail_script_path = get_on_fail_script_path(node)
if not os.path.exists(on_fail_script_path):
LOG.info(_LI("[%(node)s] No on_fail_script passed. Skipping")
% {'node': node_uuid})
return
LOG.debug(_LI('[%(node)s] Uploading on_fail script to the node.'),
{'node': node_uuid})
sftp.put(on_fail_script_path, '/tmp/bareon_on_fail.sh')
LOG.debug("[%(node)s] Executing on_fail_script."
% {'node': node_uuid})
out, err = processutils.ssh_execute(
ssh, "bash %s" % '/tmp/bareon_on_fail.sh')
except processutils.ProcessExecutionError as ex:
LOG.warning(_LI('[%(node)s] Error executing OnFail script. '
'Error: %(er)s') % {'node': node_uuid, 'er': ex})
except exception.SSHConnectFailed as ex:
LOG.warning(_LI('[%(node)s] SSH connection error. '
'Error: %(er)s') % {'node': node_uuid, 'er': ex})
except Exception as ex:
LOG.warning(_LI('[%(node)s] Unknown error. '
'Error: %(error)s') % {'node': node_uuid,
'error': ex})
else:
LOG.info(
"{0} [{1}] on_fail sctipt result below {0}".format("#" * 40,
node_uuid))
LOG.info(out)
LOG.info(err)
LOG.info("{0} [{1}] End on_fail script "
"result {0}".format("#" * 40, node_uuid))
def _clean_up_deployment_resources(self, task):
_clean_up_images(task)
self._clean_up_actions(task)
def _clean_up_actions(self, task):
filename = get_actions_json_path(task.node)
if not os.path.exists(filename):
return
with open(filename) as f:
actions_data = json.loads(f.read())
controller = actions.ActionController(task, actions_data)
controller.cleanup_action_resources()
@base.passthru(['POST'])
@task_manager.require_exclusive_lock
def exec_actions(self, task, **kwargs):
actions_json = resources.url_download_json(
task.context, task.node, kwargs.get('driver_actions'))
if not actions_json:
LOG.info("[%s] No driver_actions specified." % task.node.uuid)
return
ssh_user = actions_json.pop('action_user')
ssh_key_url = actions_json.pop('action_key')
node_ip = bareon_utils.get_node_ip(task)
controller = actions.ActionController(task, actions_json)
controller.ssh_and_execute(node_ip, ssh_user, ssh_key_url)
def _execute_deploy_script(self, task, ssh, cmd, *args, **kwargs):
# NOTE(oberezovskyi): minutes to seconds
timeout = CONF.bareon.deploy_timeout * 60
LOG.debug('[%s] Running cmd (SSH): %s', task.node.uuid, cmd)
try:
out, err = bareon_utils.ssh_execute(ssh, cmd, timeout=timeout,
check_exit_code=True)
except exception.SSHCommandFailed as err:
LOG.debug('[%s] Deploy script execute failed: "%s"',
task.node.uuid, err)
raise bareon_exception.DeploymentTimeout(timeout=timeout)
return out, err
def _deploy(self, task, ssh, cmd, **params):
deployment_thread = eventlet.spawn(self._execute_deploy_script,
task, ssh, cmd, **params)
def _wait_for_deployment_finished(task, thread):
task_node = task.node
current_node = db_node.Node.get_by_uuid(task.context,
task_node.uuid)
# NOTE(oberezovskyi): greenthread have no way to check is
# thread already finished, so need to access to
# private variable
if thread._exit_event.ready():
raise loopingcall.LoopingCallDone()
if (current_node.instance_info.get(TERMINATE_FLAG,
'') == 'requested'):
thread.kill()
bareon_utils.change_node_dict(
task_node, 'instance_info',
{TERMINATE_FLAG: 'done'})
task_node.save()
raise bareon_exception.DeployTerminationSucceed()
timer = loopingcall.FixedIntervalLoopingCall(
_wait_for_deployment_finished, task, deployment_thread)
timer.start(interval=5).wait()
return deployment_thread.wait()
@base.passthru(['POST'], async=False)
@task_manager.require_exclusive_lock
def switch_boot(self, task, **kwargs):
# NOTE(oberezovskyi): exception messages should not be changed because
# of hardcode in nova-ironic driver
image = kwargs.get('image')
LOG.info('[{0}] Attempt to switch boot to {1} '
'image'.format(task.node.uuid, image))
msg = ""
try:
if not task.node.instance_info.get('multiboot', False):
msg = "[{}] Non-multiboot deployment".format(task.node.uuid)
raise exception.IronicException(message=msg, code=400)
boot_info = task.node.instance_info.get('multiboot_info',
{'elements': []})
grub_id = next((element['grub_id']
for element in boot_info['elements']
if (element['image_uuid'] == image or
element['image_name'] == image)), None)
if grub_id is None:
msg = ('[{}] Can\'t find desired multiboot '
'image'.format(task.node.uuid))
raise exception.IronicException(message=msg, code=400)
elif grub_id == boot_info.get('current_element', None):
msg = ('[{}] Already in desired boot '
'device.'.format(task.node.uuid))
raise exception.IronicException(message=msg, code=400)
node_ip = bareon_utils.get_node_ip(task)
ssh_key = resources.url_download_raw_secured(task.context,
task.node,
kwargs['ssh_key'])
ssh = bareon_utils.get_ssh_connection(task, **{
'host': node_ip,
'username': kwargs['ssh_user'],
'key_contents': ssh_key
})
tmp_path = processutils.ssh_execute(ssh, 'mktemp -d')[0].split()[0]
cfg_path = os.path.join(tmp_path, 'boot', 'grub2', 'grub.cfg')
commands = [
'mount /dev/disk/by-uuid/{} {}'.format(
boot_info['multiboot_partition'],
tmp_path),
"sed -i 's/\(set default=\)[0-9]*/\\1{}/' {}".format(grub_id,
cfg_path),
'umount {}'.format(tmp_path),
'rmdir {}'.format(tmp_path)
]
map(lambda cmd: processutils.ssh_execute(ssh, 'sudo ' + cmd),
commands)
except exception.SSHConnectFailed as e:
msg = (
_('[%(node)s] SSH connect to node %(host)s failed. '
'Error: %(error)s') % {'host': node_ip, 'error': e,
'node': task.node.uuid})
raise exception.IronicException(message=msg, code=400)
except exception.IronicException as e:
msg = str(e)
raise
except Exception as e:
msg = (_('[%(node)s] Multiboot switch failed for node %(node)s. '
'Error: %(error)s') % {'node': task.node.uuid,
'error': e})
raise exception.IronicException(message=msg, code=400)
else:
boot_info['current_element'] = grub_id
bareon_utils.change_node_dict(
task.node, 'instance_info',
{'multiboot_info': boot_info})
task.node.save()
finally:
if msg:
LOG.error(msg)
task.node.last_error = msg
task.node.save()
class DeploymentConfigValidator(object):
_driver = None
_namespace = 'bareon.drivers.data'
_min_version = pkg_resources.parse_version('0.0.2')
def __init__(self, driver_name):
self.driver_name = driver_name
LOG.debug('Loading bareon data-driver "%s"', self.driver_name)
try:
manager = stevedore.driver.DriverManager(
self._namespace, self.driver_name, verify_requirements=True)
extension = manager[driver_name]
version = extension.entry_point.dist.version
version = pkg_resources.parse_version(version)
LOG.info('Driver %s-%s loaded', extension.name, version)
if version < self._min_version:
raise RuntimeError(
'bareon version less than {} does not support '
'deployment config validation'.format(self._min_version))
except RuntimeError as e:
LOG.warning(
'Fail to load bareon data-driver "%s": %s',
self.driver_name, e)
return
self._driver = manager.driver
def __call__(self, deployment_config):
if self._driver is None:
LOG.info(
'Skipping deployment config validation due to problem in '
'loading bareon data driver')
return
try:
with open(deployment_config, 'rt') as stream:
payload = json.load(stream)
self._driver.validate_data(payload)
except (IOError, ValueError, TypeError) as e:
raise exception.InvalidParameterValue(
'Unable to load deployment config "{}": {}'.format(
deployment_config, e))
except self._driver.exc.WrongInputDataError as e:
raise exception.InvalidParameterValue(
'Deployment config has failed validation.\n'
'{0.message}'.format(e))
def get_provision_json_path(node):
return os.path.join(resources.get_node_resources_dir(node),
"provision.json")
def get_actions_json_path(node):
return os.path.join(resources.get_node_resources_dir(node),
"actions.json")
def get_on_fail_script_path(node):
return os.path.join(resources.get_node_resources_dir(node),
"on_fail_script.sh")
def get_tenant_images_json_path(node):
return os.path.join(resources.get_node_resources_dir(node),
"tenant_images.json")
class _AbstractAdapter(object):
def __init__(self, data):
self._raw = data
def _extract_fields(self, mapping):
for attr, name in mapping:
try:
value = self._raw[name]
except KeyError:
continue
setattr(self, attr, value)
class _DeployStepsAdapter(_AbstractAdapter):
action = action_payload = None
status = status_details = None
def __init__(self, data):
super(_DeployStepsAdapter, self).__init__(data)
self._extract_fields({
'action': 'name',
'status': 'status'}.items())
self.action_payload = self._raw.get('payload', {})
self.status_details = self._raw.get('status-details', '')
# TODO(dbogun): handle all driver_info keys
class _NodeDriverInfoAdapter(_AbstractAdapter):
_exc_prefix = 'driver_info: '
ssh_port = None
# TODO(dbogun): check API way to defined access defaults
ssh_login = 'root'
ssh_key = '/etc/ironic/bareon_key'
ssh_key_pub = None
entry_point = 'bareon-provision'
def __init__(self, node):
super(_NodeDriverInfoAdapter, self).__init__(node.driver_info)
self.node = node
self._extract_fields({
'ssh_port': 'bareon_ssh_port',
'ssh_key': 'bareon_key_filename',
'ssh_key_pub': 'bareon_public_key_filename',
'ssh_login': 'bareon_username',
'entry_point': 'bareon_deploy_script'}.items())
self._process()
self._validate()
def _process(self):
if self.ssh_key_pub is None:
self.ssh_key_pub = '{}.pub'.format(self.ssh_key)
if self.ssh_port is not None:
self.ssh_port = int(self.ssh_port)
if not 0 < self.ssh_port < 65536:
raise exception.InvalidParameterValue(
'{}Invalid SSH port number({}) is outside of allowed '
'range.'.format(self._exc_prefix, 'bareon_ssh_port'))
def _validate(self):
self._validate_ssh_key()
def _validate_ssh_key(self):
missing = []
pkey_stats = None
for idx, target in enumerate((self.ssh_key, self.ssh_key_pub)):
try:
target_stat = os.stat(target)
if not idx:
pkey_stats = target_stat
except OSError as e:
missing.append(e)
missing = ['{0.filename}: {0.strerror}'.format(x) for x in missing]
if missing:
raise exception.InvalidParameterValue(
'{}Unable to use SSH key:\n{}'.format(
self._exc_prefix, '\n'.join(missing)))
issue = None
if not stat.S_ISREG(pkey_stats.st_mode):
issue = 'SSH private key {!r} is not a regular file.'.format(
self.ssh_key)
if pkey_stats.st_mode & 0o177:
issue = 'Permissions {} for {!r} are too open.'.format(
oct(pkey_stats.st_mode & 0o777), self.ssh_key)
if issue:
raise exception.InvalidParameterValue(issue)
@six.add_metaclass(abc.ABCMeta)
class _AbstractDeployStepHandler(object):
def __init__(self, task, driver_info):
self.task = task
self.driver_info = driver_info
@abc.abstractmethod
def __call__(self):
pass
@six.add_metaclass(abc.ABCMeta)
class _AbstractDeployStepResult(_AbstractDeployStepHandler):
def __init__(self, task, driver_info, step_info):
super(_AbstractDeployStepResult, self).__init__(task, driver_info)
self.step_info = step_info
def __call__(self):
if not self.step_info.status:
self._handle_error()
return
return self._handle()
@abc.abstractmethod
def _handle(self):
pass
def _handle_error(self):
message = 'Deployment step "{}" have failed: {}'.format(
self.step_info.action, self.step_info.status_details)
# TODO(dbogun): add support for existing log extraction mechanism
deploy_utils.set_failed_state(self.task, message, collect_logs=False)
@six.add_metaclass(abc.ABCMeta)
class _AbstractDeployStepRequest(_AbstractDeployStepHandler):
@abc.abstractproperty
def name(self):
pass
@abc.abstractproperty
def result_handler(self):
pass
def __call__(self):
payload = self._handle()
return {
'name': self.name,
'payload': payload}
@abc.abstractmethod
def _handle(self):
pass
class _InjectSSHKeyStepResult(_AbstractDeployStepResult):
def _handle(self):
pass
class _InjectSSHKeyStepRequest(_AbstractDeployStepRequest):
name = 'inject-ssh-keys'
result_handler = _InjectSSHKeyStepResult
def _handle(self):
try:
with open(self.driver_info.ssh_key_pub) as data:
ssh_key = data.read()
except IOError as e:
raise bareon_exception.DeployTaskError(
name=type(self).__name__, details=e)
return {
'ssh-keys': {
self.driver_info.ssh_login: [ssh_key]}}
class _DeployStepMapping(object):
def __init__(self):
self.steps = []
base_cls = _AbstractDeployStepRequest
target = sys.modules[__name__]
for name in dir(target):
value = getattr(target, name)
if (inspect.isclass(value)
and issubclass(value, base_cls)
and value is not base_cls):
self.steps.append(value)
self.name_to_step = {}
self.step_to_name = {}
for task in self.steps:
self.name_to_step[task.name] = task
self.step_to_name[task] = task.name