From 8770b8563d6bd4411630d7145315da1611accf3c Mon Sep 17 00:00:00 2001 From: Pavlo Shchelokovskyy Date: Wed, 31 Aug 2016 18:55:21 +0300 Subject: [PATCH] Use new Ironic's heartbeat API also make the callback script more resilent by retrying attempts to contact Ironic API (hardcoded 50 times). Change-Id: I269c1b95ee5702ed435df30834e239bd6f6f0582 --- .../ironic_fa_deploy/modules/fuel_agent.py | 49 +++++-------------- fuel_agent/cmd/ironic_callback.py | 47 +++++++++++------- 2 files changed, 41 insertions(+), 55 deletions(-) diff --git a/contrib/ironic/ironic-fa-deploy/ironic_fa_deploy/modules/fuel_agent.py b/contrib/ironic/ironic-fa-deploy/ironic_fa_deploy/modules/fuel_agent.py index a5f6084b..1381efbe 100644 --- a/contrib/ironic/ironic-fa-deploy/ironic_fa_deploy/modules/fuel_agent.py +++ b/contrib/ironic/ironic-fa-deploy/ironic_fa_deploy/modules/fuel_agent.py @@ -27,6 +27,7 @@ from oslo_service import loopingcall from oslo_utils import excutils from oslo_utils import fileutils import six +import six.moves.urllib.parse as urlparse from ironic.common import boot_devices from ironic.common import dhcp_factory @@ -466,49 +467,23 @@ class FuelAgentDeploy(base.DeployInterface): def take_over(self, task): pass - -class FuelAgentVendor(base.VendorInterface): - - def get_properties(self): - """Return the properties of the interface. - - :returns: dictionary of : entries. - """ - return COMMON_PROPERTIES - - def validate(self, task, method, **kwargs): - """Validate the driver-specific Node deployment info. - - :param task: a TaskManager instance - :param method: method to be validated - """ - _parse_driver_info(task.node) - if not kwargs.get('status'): - raise exception.MissingParameterValue(_('Unknown Fuel Agent status' - ' on a node.')) - if not kwargs.get('address'): - raise exception.MissingParameterValue(_('Fuel Agent must pass ' - 'address of a node.')) - - @base.passthru(['POST']) - @task_manager.require_exclusive_lock - def heartbeat(self, task, **kwargs): + def heartbeat(self, task, callback_url): """Continues the deployment of baremetal node.""" node = task.node - task.process_event('resume') - err_msg = _('Failed to continue deployment with Fuel Agent.') - agent_status = kwargs.get('status') - if agent_status != 'ready': - LOG.error(_LE('Deploy failed for node %(node)s. Fuel Agent is not ' - 'in ready state, error: %(error)s'), {'node': node.uuid, - 'error': kwargs.get('error_message')}) - deploy_utils.set_failed_state(task, err_msg) + # NOTE(pas-ha) this driver does not support cleaning, + # so the only valid state to continue on heartbeat is DEPLOYWAIT + if node.provision_state != states.DEPLOYWAIT: + LOG.warning(_LW('Call back from %(node)s in invalid provision ' + 'state %(state)s'), + {'node': node.uuid, 'state': node.provision_state}) return + task.upgrade_lock(purpose='deploy') + task.process_event('resume') params = _parse_driver_info(node) - params['host'] = kwargs.get('address') + params['host'] = urlparse.urlparse(callback_url).netloc.split(':')[0] cmd = ('%s --data_driver ironic --config-file ' '/etc/fuel-agent/fuel-agent.conf' % params.pop('script')) if CONF.debug: @@ -543,7 +518,7 @@ class FuelAgentVendor(base.VendorInterface): msg = (_('Deploy failed for node %(node)s. Error: %(error)s') % {'node': node.uuid, 'error': e}) LOG.error(msg) - deploy_utils.set_failed_state(task, msg) + deploy_utils.set_failed_state(task, msg, collect_logs=False) else: task.process_event('done') LOG.info(_LI('Deployment to node %s done'), task.node.uuid) diff --git a/fuel_agent/cmd/ironic_callback.py b/fuel_agent/cmd/ironic_callback.py index 89917e43..90f61cf1 100755 --- a/fuel_agent/cmd/ironic_callback.py +++ b/fuel_agent/cmd/ironic_callback.py @@ -20,6 +20,10 @@ import requests from fuel_agent.utils import utils +_GET_ADDR_MAX_ITERATION = 50 +_POST_CALLBACK_MAX_ITERATION = 50 +_RETRY_INTERVAL = 5 + def _process_error(message): sys.stderr.write(message) @@ -56,28 +60,35 @@ def main(): # The leading `01-' denotes the device type (Ethernet) and is not a part of # the MAC address boot_mac = bootif[3:].replace('-', ':') - for n in range(10): + for n in range(_GET_ADDR_MAX_ITERATION): boot_ip = utils.get_interface_ip(boot_mac) if boot_ip is not None: break - time.sleep(10) + time.sleep(_RETRY_INTERVAL) else: _process_error('Cannot find IP address of boot interface.') - data = {"address": boot_ip, - "status": "ready", - "error_message": "no errors"} + # NOTE(pas-ha) supporting only Ironic API >= 1.22 !!! + headers = {'Content-Type': 'application/json', + 'Accept': 'application/json', + 'X-OpenStack-Ironic-API-Version': '1.22'} + data = {"callback_url": "ssh://" + boot_ip} + heartbeat = '{api_url}/v1/heartbeat/{uuid}'.format(api_url=api_url, + uuid=deployment_id) - passthru = '%(api-url)s/v1/nodes/%(deployment_id)s/vendor_passthru' \ - '/heartbeat' % {'api-url': api_url, - 'deployment_id': deployment_id} - try: - resp = requests.post(passthru, data=json.dumps(data), - headers={'Content-Type': 'application/json', - 'Accept': 'application/json'}) - except Exception as e: - _process_error(str(e)) - - if resp.status_code != 202: - _process_error('Wrong status code %d returned from Ironic API' % - resp.status_code) + for attempt in range(_POST_CALLBACK_MAX_ITERATION): + try: + resp = requests.post(heartbeat, data=json.dumps(data), + headers=headers) + except Exception as e: + error = str(e) + else: + if resp.status_code != 202: + error = ('Wrong status code %d returned from Ironic API' % + resp.status_code) + else: + break + time.sleep(_RETRY_INTERVAL) + else: + # executed only when whole for block was executed w/o breaks + _process_error(error)