diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 45a384715c..92cc11cd3c 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -47,6 +47,7 @@ import tempfile import eventlet from futurist import periodics +from ironic_lib import metrics_utils from oslo_log import log import oslo_messaging as messaging from oslo_utils import excutils @@ -74,6 +75,8 @@ MANAGER_TOPIC = 'ironic.conductor_manager' LOG = log.getLogger(__name__) +METRICS = metrics_utils.get_metrics_logger(__name__) + SYNC_EXCLUDED_STATES = (states.DEPLOYWAIT, states.CLEANWAIT, states.ENROLL) @@ -89,6 +92,7 @@ class ConductorManager(base_manager.BaseConductorManager): super(ConductorManager, self).__init__(host, topic) self.power_state_sync_count = collections.defaultdict(int) + @METRICS.timer('ConductorManager.update_node') @messaging.expected_exceptions(exception.InvalidParameterValue, exception.MissingParameterValue, exception.NodeLocked, @@ -143,6 +147,7 @@ class ConductorManager(base_manager.BaseConductorManager): return node_obj + @METRICS.timer('ConductorManager.change_node_power_state') @messaging.expected_exceptions(exception.InvalidParameterValue, exception.MissingParameterValue, exception.NoFreeConductorWorker, @@ -184,6 +189,7 @@ class ConductorManager(base_manager.BaseConductorManager): task.spawn_after(self._spawn_worker, utils.node_power_action, task, new_state) + @METRICS.timer('ConductorManager.vendor_passthru') @messaging.expected_exceptions(exception.NoFreeConductorWorker, exception.NodeLocked, exception.InvalidParameterValue, @@ -271,6 +277,7 @@ class ConductorManager(base_manager.BaseConductorManager): 'async': is_async, 'attach': vendor_opts['attach']} + @METRICS.timer('ConductorManager.driver_vendor_passthru') @messaging.expected_exceptions(exception.NoFreeConductorWorker, exception.InvalidParameterValue, exception.MissingParameterValue, @@ -350,6 +357,7 @@ class ConductorManager(base_manager.BaseConductorManager): 'async': is_async, 'attach': vendor_opts['attach']} + @METRICS.timer('ConductorManager.get_node_vendor_passthru_methods') @messaging.expected_exceptions(exception.UnsupportedDriverExtension) def get_node_vendor_passthru_methods(self, context, node_id): """Retrieve information about vendor methods of the given node. @@ -372,6 +380,7 @@ class ConductorManager(base_manager.BaseConductorManager): return get_vendor_passthru_metadata( task.driver.vendor.vendor_routes) + @METRICS.timer('ConductorManager.get_driver_vendor_passthru_methods') @messaging.expected_exceptions(exception.UnsupportedDriverExtension, exception.DriverNotFound) def get_driver_vendor_passthru_methods(self, context, driver_name): @@ -394,6 +403,7 @@ class ConductorManager(base_manager.BaseConductorManager): return get_vendor_passthru_metadata(driver.vendor.driver_routes) + @METRICS.timer('ConductorManager.do_node_deploy') @messaging.expected_exceptions(exception.NoFreeConductorWorker, exception.NodeLocked, exception.NodeInMaintenance, @@ -486,6 +496,7 @@ class ConductorManager(base_manager.BaseConductorManager): action=event, node=task.node.uuid, state=task.node.provision_state) + @METRICS.timer('ConductorManager.do_node_tear_down') @messaging.expected_exceptions(exception.NoFreeConductorWorker, exception.NodeLocked, exception.InstanceDeployFailure, @@ -618,6 +629,7 @@ class ConductorManager(base_manager.BaseConductorManager): ind = None return ind + @METRICS.timer('ConductorManager.do_node_clean') @messaging.expected_exceptions(exception.InvalidParameterValue, exception.InvalidStateRequested, exception.NodeInMaintenance, @@ -683,6 +695,7 @@ class ConductorManager(base_manager.BaseConductorManager): action='manual clean', node=node.uuid, state=node.provision_state) + @METRICS.timer('ConductorManager.continue_node_clean') def continue_node_clean(self, context, node_id): """RPC method to continue cleaning a node. @@ -997,6 +1010,7 @@ class ConductorManager(base_manager.BaseConductorManager): node.save() LOG.info(info_message) + @METRICS.timer('ConductorManager.do_provisioning_action') @messaging.expected_exceptions(exception.NoFreeConductorWorker, exception.NodeLocked, exception.InvalidParameterValue, @@ -1093,6 +1107,7 @@ class ConductorManager(base_manager.BaseConductorManager): action=action, node=node.uuid, state=node.provision_state) + @METRICS.timer('ConductorManager._sync_power_states') @periodics.periodic(spacing=CONF.conductor.sync_power_state_interval) def _sync_power_states(self, context): """Periodic task to sync power states for the nodes. @@ -1161,6 +1176,7 @@ class ConductorManager(base_manager.BaseConductorManager): # Yield on every iteration eventlet.sleep(0) + @METRICS.timer('ConductorManager._check_deploy_timeouts') @periodics.periodic(spacing=CONF.conductor.check_provision_state_interval) def _check_deploy_timeouts(self, context): """Periodically checks whether a deploy RPC call has timed out. @@ -1183,6 +1199,7 @@ class ConductorManager(base_manager.BaseConductorManager): self._fail_if_in_state(context, filters, states.DEPLOYWAIT, sort_key, callback_method, err_handler) + @METRICS.timer('ConductorManager._check_deploying_status') @periodics.periodic(spacing=CONF.conductor.check_provision_state_interval) def _check_deploying_status(self, context): """Periodically checks the status of nodes in DEPLOYING state. @@ -1234,6 +1251,7 @@ class ConductorManager(base_manager.BaseConductorManager): callback_method=utils.cleanup_after_timeout, err_handler=utils.provisioning_error_handler) + @METRICS.timer('ConductorManager._do_adoption') @task_manager.require_exclusive_lock def _do_adoption(self, task): """Adopt the node. @@ -1282,6 +1300,7 @@ class ConductorManager(base_manager.BaseConductorManager): node.last_error = msg task.process_event('fail') + @METRICS.timer('ConductorManager._do_takeover') def _do_takeover(self, task): """Take over this node. @@ -1314,6 +1333,7 @@ class ConductorManager(base_manager.BaseConductorManager): task.node.conductor_affinity = self.conductor.id task.node.save() + @METRICS.timer('ConductorManager._check_cleanwait_timeouts') @periodics.periodic(spacing=CONF.conductor.check_provision_state_interval) def _check_cleanwait_timeouts(self, context): """Periodically checks for nodes being cleaned. @@ -1336,6 +1356,7 @@ class ConductorManager(base_manager.BaseConductorManager): keep_target_state=True, callback_method=utils.cleanup_cleanwait_timeout) + @METRICS.timer('ConductorManager._sync_local_state') @periodics.periodic(spacing=CONF.conductor.sync_local_state_interval) def _sync_local_state(self, context): """Perform any actions necessary to sync local state. @@ -1380,6 +1401,7 @@ class ConductorManager(base_manager.BaseConductorManager): if workers_count == CONF.conductor.periodic_max_workers: break + @METRICS.timer('ConductorManager.validate_driver_interfaces') @messaging.expected_exceptions(exception.NodeLocked) def validate_driver_interfaces(self, context, node_id): """Validate the `core` and `standardized` interfaces for drivers. @@ -1426,6 +1448,7 @@ class ConductorManager(base_manager.BaseConductorManager): ret_dict[iface_name]['reason'] = reason return ret_dict + @METRICS.timer('ConductorManager.destroy_node') @messaging.expected_exceptions(exception.NodeLocked, exception.NodeAssociated, exception.InvalidState) @@ -1479,6 +1502,7 @@ class ConductorManager(base_manager.BaseConductorManager): LOG.info(_LI('Successfully deleted node %(node)s.'), {'node': node.uuid}) + @METRICS.timer('ConductorManager.destroy_port') @messaging.expected_exceptions(exception.NodeLocked, exception.NodeNotFound) def destroy_port(self, context, port): @@ -1501,6 +1525,7 @@ class ConductorManager(base_manager.BaseConductorManager): '%(node)s'), {'port': port.uuid, 'node': task.node.uuid}) + @METRICS.timer('ConductorManager.destroy_portgroup') @messaging.expected_exceptions(exception.NodeLocked, exception.NodeNotFound, exception.PortgroupNotEmpty) @@ -1525,6 +1550,7 @@ class ConductorManager(base_manager.BaseConductorManager): '%(node)s'), {'portgroup': portgroup.uuid, 'node': task.node.uuid}) + @METRICS.timer('ConductorManager.get_console_information') @messaging.expected_exceptions(exception.NodeLocked, exception.UnsupportedDriverExtension, exception.NodeConsoleNotEnabled, @@ -1557,6 +1583,7 @@ class ConductorManager(base_manager.BaseConductorManager): task.driver.console.validate(task) return task.driver.console.get_console(task) + @METRICS.timer('ConductorManager.set_console_mode') @messaging.expected_exceptions(exception.NoFreeConductorWorker, exception.NodeLocked, exception.UnsupportedDriverExtension, @@ -1627,6 +1654,7 @@ class ConductorManager(base_manager.BaseConductorManager): finally: node.save() + @METRICS.timer('ConductorManager.update_port') @messaging.expected_exceptions(exception.NodeLocked, exception.FailedToUpdateMacOnPort, exception.MACAlreadyExists, @@ -1696,6 +1724,7 @@ class ConductorManager(base_manager.BaseConductorManager): return port_obj + @METRICS.timer('ConductorManager.update_portgroup') @messaging.expected_exceptions(exception.NodeLocked, exception.FailedToUpdateMacOnPort, exception.PortgroupMACAlreadyExists, @@ -1782,6 +1811,7 @@ class ConductorManager(base_manager.BaseConductorManager): return portgroup_obj + @METRICS.timer('ConductorManager.get_driver_properties') @messaging.expected_exceptions(exception.DriverNotFound) def get_driver_properties(self, context, driver_name): """Get the properties of the driver. @@ -1798,6 +1828,7 @@ class ConductorManager(base_manager.BaseConductorManager): driver = driver_factory.get_driver(driver_name) return driver.get_properties() + @METRICS.timer('ConductorManager._send_sensor_data') @periodics.periodic(spacing=CONF.conductor.send_sensor_data_interval) def _send_sensor_data(self, context): """Periodically sends sensor data to Ceilometer.""" @@ -1880,6 +1911,7 @@ class ConductorManager(base_manager.BaseConductorManager): return dict((sensor_type, sensor_value) for (sensor_type, sensor_value) in sensors_data.items() if sensor_type.lower() in allowed) + @METRICS.timer('ConductorManager.set_boot_device') @messaging.expected_exceptions(exception.NodeLocked, exception.UnsupportedDriverExtension, exception.InvalidParameterValue, @@ -1914,6 +1946,7 @@ class ConductorManager(base_manager.BaseConductorManager): task.driver.management.set_boot_device(task, device, persistent=persistent) + @METRICS.timer('ConductorManager.get_boot_device') @messaging.expected_exceptions(exception.NodeLocked, exception.UnsupportedDriverExtension, exception.InvalidParameterValue, @@ -1948,6 +1981,7 @@ class ConductorManager(base_manager.BaseConductorManager): task.driver.management.validate(task) return task.driver.management.get_boot_device(task) + @METRICS.timer('ConductorManager.get_supported_boot_devices') @messaging.expected_exceptions(exception.NodeLocked, exception.UnsupportedDriverExtension, exception.InvalidParameterValue, @@ -1978,6 +2012,7 @@ class ConductorManager(base_manager.BaseConductorManager): driver=task.node.driver, extension='management') return task.driver.management.get_supported_boot_devices(task) + @METRICS.timer('ConductorManager.inspect_hardware') @messaging.expected_exceptions(exception.NoFreeConductorWorker, exception.NodeLocked, exception.HardwareInspectionFailure, @@ -2032,6 +2067,7 @@ class ConductorManager(base_manager.BaseConductorManager): action='inspect', node=task.node.uuid, state=task.node.provision_state) + @METRICS.timer('ConductorManager._check_inspect_timeouts') @periodics.periodic(spacing=CONF.conductor.check_provision_state_interval) def _check_inspect_timeouts(self, context): """Periodically checks inspect_timeout and fails upon reaching it. @@ -2051,6 +2087,7 @@ class ConductorManager(base_manager.BaseConductorManager): self._fail_if_in_state(context, filters, states.INSPECTING, sort_key, last_error=last_error) + @METRICS.timer('ConductorManager.set_target_raid_config') @messaging.expected_exceptions(exception.NodeLocked, exception.UnsupportedDriverExtension, exception.InvalidParameterValue, @@ -2090,6 +2127,7 @@ class ConductorManager(base_manager.BaseConductorManager): node.target_raid_config = target_raid_config node.save() + @METRICS.timer('ConductorManager.get_raid_logical_disk_properties') @messaging.expected_exceptions(exception.UnsupportedDriverExtension) def get_raid_logical_disk_properties(self, context, driver_name): """Get the logical disk properties for RAID configuration. @@ -2114,6 +2152,7 @@ class ConductorManager(base_manager.BaseConductorManager): return driver.raid.get_logical_disk_properties() + @METRICS.timer('ConductorManager.heartbeat') @messaging.expected_exceptions(exception.NoFreeConductorWorker) def heartbeat(self, context, node_id, callback_url): """Process a heartbeat from the ramdisk. @@ -2156,6 +2195,7 @@ class ConductorManager(base_manager.BaseConductorManager): # there's no reason to log it here. raise messaging.ExpectedException() + @METRICS.timer('ConductorManager.object_class_action_versions') def object_class_action_versions(self, context, objname, objmethod, object_versions, args, kwargs): """Perform an action on a VersionedObject class. @@ -2182,6 +2222,7 @@ class ConductorManager(base_manager.BaseConductorManager): version_manifest=object_versions) return result + @METRICS.timer('ConductorManager.object_action') def object_action(self, context, objinst, objmethod, args, kwargs): """Perform an action on a VersionedObject instance. @@ -2213,6 +2254,7 @@ class ConductorManager(base_manager.BaseConductorManager): updates['obj_what_changed'] = objinst.obj_what_changed() return updates, result + @METRICS.timer('ConductorManager.object_backport_versions') def object_backport_versions(self, context, objinst, object_versions): """Perform a backport of an object instance. @@ -2236,6 +2278,7 @@ class ConductorManager(base_manager.BaseConductorManager): version_manifest=object_versions) +@METRICS.timer('get_vendor_passthru_metadata') def get_vendor_passthru_metadata(route_dict): d = {} for method, metadata in route_dict.items(): @@ -2286,6 +2329,7 @@ def _store_configdrive(node, configdrive): node.instance_info = i_info +@METRICS.timer('do_node_deploy') @task_manager.require_exclusive_lock def do_node_deploy(task, conductor_id, configdrive=None): """Prepare the environment and deploy a node.""" @@ -2386,6 +2430,7 @@ def handle_sync_power_state_max_retries_exceeded(task, actual_power_state, LOG.error(msg) +@METRICS.timer('do_sync_power_state') def do_sync_power_state(task, count): """Sync the power state for this node, incrementing the counter on failure.