Save power_state to instance and sync with ironic

1. add power_state to instance
2. add a periodic task to sync power state

Change-Id: Id51927bf8d72884d67a73c6370458222dc061210
This commit is contained in:
Zhenguo Niu 2017-01-09 17:26:33 +08:00
parent 7343fbfb27
commit 0dfb378480
18 changed files with 252 additions and 86 deletions

View File

@ -104,6 +104,7 @@ Response
- description: instance_description
- uuid: instance_uuid
- status: instance_status
- power_state: instance_power_state
- links: links
**Example List of Instances: JSON response**

View File

@ -14,6 +14,7 @@
],
"name": "test_instance",
"status": "building",
"power_state": "power on",
"uuid": "f978ef48-d4af-4dad-beec-e6174309bc71"
}
]

View File

@ -30,14 +30,12 @@ from mogan.api.controllers.v1 import utils as api_utils
from mogan.api import expose
from mogan.common import exception
from mogan.common.i18n import _
from mogan.common.i18n import _LW
from mogan.common import policy
from mogan.engine.baremetal import ironic_states as ir_states
from mogan import objects
_DEFAULT_INSTANCE_RETURN_FIELDS = ('uuid', 'name', 'description',
'status')
_NODE_FIELDS = ['power_state', 'instance_uuid']
'status', 'power_state')
LOG = log.getLogger(__name__)
@ -286,27 +284,6 @@ class InstanceController(rest.RestController):
project_only=project_only)
instances_data = [instance.as_dict() for instance in instances]
if fields is None or 'power_state' in fields:
try:
nodes = pecan.request.engine_api.get_ironic_node_list(
pecan.request.context, fields=_NODE_FIELDS)
node_list = nodes['nodes']
except Exception as e:
LOG.warning(
_LW("Failed to retrieve node list from"
"ironic api: %(msg)s") % {"msg": e})
node_list = []
if node_list:
node_dict = {node['instance_uuid']: node for node in node_list
if node['instance_uuid']}
# Merge mogan instance info with ironic node power state
for instance_data in instances_data:
uuid = instance_data['uuid']
if uuid in node_dict:
instance_data['power_state'] = \
node_dict[uuid]['power_state']
return InstanceCollection.convert_with_links(instances_data,
fields=fields)
@ -337,21 +314,6 @@ class InstanceController(rest.RestController):
"""
rpc_instance = self._resource or self._get_resource(instance_uuid)
instance_data = rpc_instance.as_dict()
if (fields is None or 'power_state' in fields and
instance_data['node_uuid']):
# Only fetch node info if fields parameter is not specified
# or node fields is not requested and when instance is really
# associated with a ironic node.
try:
node = pecan.request.engine_api.get_ironic_node(
pecan.request.context, instance_uuid, _NODE_FIELDS)
instance_data['power_state'] = node['power_state']
except Exception as e:
LOG.warning(
_LW("Failed to retrieve node by instance_uuid"
" %(instance_uuid)s from ironic api: %(msg)s") % {
"instance_uuid": instance_uuid,
"msg": e})
return Instance.convert_with_links(instance_data, fields=fields)

View File

@ -27,6 +27,26 @@ from mogan.common import fsm
LOG = logging.getLogger(__name__)
##############
# Power states
##############
POWER_ON = 'power on'
""" Instance is powered on. """
POWER_OFF = 'power off'
""" Instance is powered off. """
NOSTATE = None
""" No state information """
POWER_ACTION_MAP = {
'on': 'start',
'off': 'stop',
'reboot': 'reboot',
}
#################
# Instance states
#################

View File

@ -17,6 +17,7 @@
import re
from oslo_concurrency import lockutils
from oslo_log import log as logging
import six
@ -25,6 +26,8 @@ from mogan.common.i18n import _LW
LOG = logging.getLogger(__name__)
synchronized = lockutils.synchronized_with_prefix('mogan-')
def safe_rstrip(value, chars=None):
"""Removes trailing characters from a string if that does not make it empty

View File

@ -40,7 +40,16 @@ opts = [
help=_('Default scheduler driver to use')),
cfg.StrOpt('default_schedule_zone',
help=_("Availability zone to use when user doesn't "
"specify one."))
"specify one.")),
cfg.IntOpt('sync_power_state_pool_size',
default=1000,
help=_("Number of greenthreads available for use to sync "
"power states. Any positive integer representing "
"greenthreads count.")),
cfg.IntOpt('sync_power_state_interval',
default=600,
help=_("Interval to sync power states between the database "
"and Ironic, in seconds.")),
]

View File

@ -79,6 +79,7 @@ def upgrade():
sa.Column('name', sa.String(length=255), nullable=True),
sa.Column('description', sa.String(length=255), nullable=True),
sa.Column('status', sa.String(length=255), nullable=True),
sa.Column('power_state', sa.String(length=15), nullable=True),
sa.Column('instance_type_uuid', sa.String(length=36), nullable=True),
sa.Column('image_uuid', sa.String(length=36), nullable=True),
sa.Column('network_info', sa.Text(), nullable=True),

View File

@ -73,6 +73,7 @@ class Instance(Base):
project_id = Column(String(36), nullable=True)
user_id = Column(String(36), nullable=True)
status = Column(String(255), nullable=True)
power_state = Column(String(15), nullable=True)
instance_type_uuid = Column(String(36), nullable=True)
availability_zone = Column(String(255), nullable=True)
image_uuid = Column(String(36), nullable=True)

View File

@ -125,16 +125,15 @@ class API(object):
def _delete_instance(self, context, instance):
# Initialize state machine
fsm = states.machine.copy()
fsm.initialize(start_state=instance.status,
target_state=states.DELETED)
fsm.initialize(start_state=instance.status)
fsm.process_event('delete')
try:
instance.status = fsm.current_state
instance.save()
except exception.InstanceNotFound:
LOG.debug("Instance %s is not found while deleting",
instance.uuid)
LOG.debug("Instance is not found while deleting",
instance=instance)
return
self.engine_rpcapi.delete_instance(context, instance)
@ -149,18 +148,21 @@ class API(object):
def power(self, context, instance, target):
"""Set power state of an instance."""
LOG.debug("Going to try to set instance power state to %s",
target, instance=instance)
fsm = states.machine.copy()
fsm.initialize(start_state=instance.status)
fsm.process_event(states.POWER_ACTION_MAP[target])
try:
instance.status = fsm.current_state
instance.save()
except exception.InstanceNotFound:
LOG.debug("Instance is not found while setting power state",
instance=instance)
return
self.engine_rpcapi.set_power_state(context, instance, target)
def get_ironic_node(self, context, instance_uuid, fields):
"""Get a ironic node by instance UUID."""
return self.engine_rpcapi.get_ironic_node(context,
instance_uuid,
fields)
def get_ironic_node_list(self, context, fields):
"""Get a list of ironic node."""
return self.engine_rpcapi.get_ironic_node_list(context, fields)
def list_availability_zones(self, context):
"""Get a list of availability zones."""
return self.engine_rpcapi.list_availability_zones(context)

View File

@ -17,6 +17,8 @@ from ironicclient import exceptions as client_e
from oslo_log import log as logging
from mogan.common.i18n import _LE
from mogan.common.i18n import _LW
from mogan.common import states
from mogan.engine.baremetal import ironic_states
LOG = logging.getLogger(__name__)
@ -25,6 +27,29 @@ _NODE_FIELDS = ('uuid', 'power_state', 'target_power_state', 'provision_state',
'target_provision_state', 'last_error', 'maintenance',
'properties', 'instance_uuid')
_POWER_STATE_MAP = {
ironic_states.POWER_ON: states.POWER_ON,
ironic_states.NOSTATE: states.NOSTATE,
ironic_states.POWER_OFF: states.POWER_OFF,
}
def map_power_state(state):
try:
return _POWER_STATE_MAP[state]
except KeyError:
LOG.warning(_LW("Power state %s not found."), state)
return states.NOSTATE
def get_power_state(ironicclient, instance_uuid):
try:
node = ironicclient.call('node.get_by_instance_uuid',
instance_uuid, fields=('power_state',))
return map_power_state(node.power_state)
except client_e.NotFound:
return map_power_state(ironic_states.NOSTATE)
def get_ports_from_node(ironicclient, node_uuid, detail=False):
"""List the MAC addresses and the port types from a node."""

View File

@ -41,6 +41,9 @@ class BaseEngineManager(periodic_task.PeriodicTasks):
self.scheduler = importutils.import_object(scheduler_driver)
self.ironicclient = ironic.IronicClientWrapper()
self.engine_rpcapi = rpcapi.EngineAPI()
self._sync_power_pool = greenpool.GreenPool(
size=CONF.engine.sync_power_state_pool_size)
self._syncs_in_progress = {}
self._started = False
def init_host(self):

View File

@ -30,12 +30,14 @@ from mogan.common.i18n import _LE
from mogan.common.i18n import _LI
from mogan.common.i18n import _LW
from mogan.common import states
from mogan.common import utils
from mogan.conf import CONF
from mogan.engine.baremetal import ironic
from mogan.engine.baremetal import ironic_states
from mogan.engine import base_manager
from mogan.engine.flows import create_instance
from mogan.notifications import base as notifications
from mogan import objects
from mogan.objects import fields
LOG = log.getLogger(__name__)
@ -71,6 +73,127 @@ class EngineManager(base_manager.BaseEngineManager):
def _sync_node_resources(self, context):
self._refresh_cache()
@periodic_task.periodic_task(spacing=CONF.engine.sync_power_state_interval,
run_immediately=True)
def _sync_power_states(self, context):
"""Align power states between the database and the hypervisor."""
# Only fetching the necessary fields, will skip synchronizing if
# target_power_state is not None.
node_fields = ('instance_uuid', 'power_state', 'target_power_state')
try:
nodes = ironic.get_node_list(self.ironicclient,
maintenance=False,
associated=True,
fields=node_fields,
limit=0)
except Exception as e:
LOG.warning(
_LW("Failed to retrieve node list when synchronizing power "
"states: %(msg)s") % {"msg": e})
# Just retrun if we fail to get nodes real power state.
return
node_dict = {node.instance_uuid: node for node in nodes
if node.target_power_state is None}
if not node_dict:
LOG.warning(_LW("While synchronizing instance power states, "
"found none instance with stable power state "
"on the hypervisor."))
return
def _sync(db_instance, node_power_state):
# This must be synchronized as we query state from two separate
# sources, the driver (ironic) and the database. They are set
# (in stop_instance) and read, in sync.
@utils.synchronized(db_instance.uuid)
def sync_instance_power_state():
self._sync_instance_power_state(context, db_instance,
node_power_state)
try:
sync_instance_power_state()
except Exception:
LOG.exception(_LE("Periodic sync_power_state task had an "
"error while processing an instance."),
instance=db_instance)
self._syncs_in_progress.pop(db_instance.uuid)
db_instances = objects.Instance.list(context)
for db_instance in db_instances:
# process syncs asynchronously - don't want instance locking to
# block entire periodic task thread
uuid = db_instance.uuid
if uuid in self._syncs_in_progress:
LOG.debug('Sync power state already in progress for %s', uuid)
continue
if db_instance.status not in (states.ACTIVE, states.STOPPED):
LOG.info(_LI("During sync_power_state the instance has a "
"pending task (%(task)s). Skip."),
{'task': db_instance.status},
instance=db_instance)
continue
if uuid not in node_dict:
continue
node_power_state = node_dict[uuid].power_state
if db_instance.power_state != node_power_state:
LOG.debug('Triggering sync for uuid %s', uuid)
self._syncs_in_progress[uuid] = True
self._sync_power_pool.spawn_n(_sync, db_instance,
node_power_state)
def _sync_instance_power_state(self, context, db_instance,
node_power_state):
"""Align instance power state between the database and hypervisor.
If the instance is not found on the hypervisor, but is in the database,
then a stop() API will be called on the instance.
"""
# We re-query the DB to get the latest instance info to minimize
# (not eliminate) race condition.
db_instance.refresh()
db_power_state = db_instance.power_state
if db_instance.status not in (states.ACTIVE, states.STOPPED):
# on the receiving end of mogan-engine, it could happen
# that the DB instance already report the new resident
# but the actual BM has not showed up on the hypervisor
# yet. In this case, let's allow the loop to continue
# and run the state sync in a later round
LOG.info(_LI("During sync_power_state the instance has a "
"pending task (%(task)s). Skip."),
{'task': db_instance.task_state},
instance=db_instance)
return
if node_power_state != db_power_state:
LOG.info(_LI('During _sync_instance_power_state the DB '
'power_state (%(db_power_state)s) does not match '
'the node_power_state from the hypervisor '
'(%(node_power_state)s). Updating power_state in the '
'DB to match the hypervisor.'),
{'db_power_state': db_power_state,
'node_power_state': node_power_state},
instance=db_instance)
# power_state is always updated from hypervisor to db
db_instance.power_state = node_power_state
db_instance.save()
def _set_instance_obj_error_state(self, context, instance):
try:
instance.status = states.ERROR
instance.save()
except exception.InstanceNotFound:
LOG.debug('Instance has been destroyed from under us while '
'trying to set it to ERROR', instance=instance)
def destroy_networks(self, context, instance):
LOG.debug("unplug: instance_uuid=%(uuid)s vif=%(network_info)s",
{'uuid': instance.uuid,
@ -195,6 +318,7 @@ class EngineManager(base_manager.BaseEngineManager):
_run_flow()
except Exception as e:
fsm.process_event('error')
instance.power_state = states.NOSTATE
instance.status = fsm.current_state
instance.save()
LOG.error(_LE("Created instance %(uuid)s failed."
@ -206,6 +330,8 @@ class EngineManager(base_manager.BaseEngineManager):
# doesn't alter the instance in any way. This may raise
# InvalidState, if this event is not allowed in the current state.
fsm.process_event('done')
instance.power_state = ironic.get_power_state(self.ironicclient,
instance.uuid)
instance.status = fsm.current_state
instance.launched_at = timeutils.utcnow()
instance.save()
@ -240,11 +366,13 @@ class EngineManager(base_manager.BaseEngineManager):
"instance resources."),
instance=instance)
fsm.process_event('error')
instance.power_state = states.NOSTATE
instance.status = fsm.current_state
instance.save()
return
fsm.process_event('done')
instance.power_state = states.NOSTATE
instance.status = fsm.current_state
instance.deleted_at = timeutils.utcnow()
instance.save()
@ -279,14 +407,31 @@ class EngineManager(base_manager.BaseEngineManager):
def set_power_state(self, context, instance, state):
"""Set power state for the specified instance."""
LOG.debug('Power %(state)s called for instance %(instance)s',
{'state': state,
'instance': instance})
ironic.set_power_state(self.ironicclient, instance.node_uuid, state)
timer = loopingcall.FixedIntervalLoopingCall(
self._wait_for_power_state, instance)
timer.start(interval=CONF.ironic.api_retry_interval).wait()
# Initialize state machine
fsm = states.machine.copy()
fsm.initialize(start_state=instance.status)
@utils.synchronized(instance.uuid)
def do_set_power_state():
LOG.debug('Power %(state)s called for instance %(instance)s',
{'state': state,
'instance': instance})
ironic.set_power_state(self.ironicclient,
instance.node_uuid,
state)
timer = loopingcall.FixedIntervalLoopingCall(
self._wait_for_power_state, instance)
timer.start(interval=CONF.ironic.api_retry_interval).wait()
fsm.process_event('done')
instance.power_state = ironic.get_power_state(self.ironicclient,
instance.uuid)
instance.status = fsm.current_state
instance.save()
do_set_power_state()
LOG.info(_LI('Successfully set node power state: %s'),
state, instance=instance)

View File

@ -36,6 +36,7 @@ class Instance(base.MoganObject, object_base.VersionedObjectDictCompat):
'project_id': object_fields.UUIDField(nullable=True),
'user_id': object_fields.UUIDField(nullable=True),
'status': object_fields.StringField(nullable=True),
'power_state': object_fields.StringField(nullable=True),
'instance_type_uuid': object_fields.UUIDField(nullable=True),
'availability_zone': object_fields.StringField(nullable=True),
'image_uuid': object_fields.UUIDField(nullable=True),

View File

@ -178,7 +178,7 @@ class TestInstances(v1_test.APITestV1):
self._prepare_instance(4)
resps = self.get_json('/instances/detail')['instances']
self.assertEqual(4, len(resps))
self.assertEqual(15, len(resps[0].keys()))
self.assertEqual(16, len(resps[0].keys()))
self.assertEqual('test_instance_0', resps[0]['name'])
self.assertEqual('just test instance 0', resps[0]['description'])
self.assertEqual('building', resps[0]['status'])

View File

@ -70,17 +70,13 @@ class TestInstanceAuthorization(v1_test.APITestV1):
headers = self.gen_headers(self.context)
self.post_json('/instances', body, headers=headers, status=201)
@mock.patch('mogan.engine.api.API.get_ironic_node')
def test_instance_get_one_by_owner(self, mock_get_node):
mock_get_node.return_value = {'power_state': 'power on'}
def test_instance_get_one_by_owner(self):
# not admin but the owner
self.context.tenant = self.instance1.project_id
headers = self.gen_headers(self.context, roles="no-admin")
self.get_json('/instances/%s' % self.instance1.uuid, headers=headers)
@mock.patch('mogan.engine.api.API.get_ironic_node')
def test_instance_get_one_by_admin(self, mock_get_node):
mock_get_node.return_value = {'power_state': 'power on'}
def test_instance_get_one_by_admin(self):
# when the evil tenant is admin, he can do everything.
self.context.tenant = self.evil_project
headers = self.gen_headers(self.context, roles="admin")
@ -116,10 +112,8 @@ class TestPatch(v1_test.APITestV1):
self.assertEqual('application/json', response.content_type)
self.assertTrue(response.json['error_message'])
@mock.patch('mogan.engine.api.API.get_ironic_node')
@mock.patch.object(timeutils, 'utcnow')
def test_replace_singular(self, mock_utcnow, mock_get_node):
mock_get_node.return_value = {'power_state': 'power on'}
def test_replace_singular(self, mock_utcnow):
description = 'instance-new-description'
test_time = datetime.datetime(2000, 1, 1, 0, 0)
@ -137,9 +131,7 @@ class TestPatch(v1_test.APITestV1):
result['updated_at']).replace(tzinfo=None)
self.assertEqual(test_time, return_updated_at)
@mock.patch('mogan.engine.api.API.get_ironic_node')
def test_replace_multi(self, mock_get_node):
mock_get_node.return_value = {'power_state': 'power on'}
def test_replace_multi(self):
extra = {"foo1": "bar1", "foo2": "bar2", "foo3": "bar3"}
uuid = uuidutils.generate_uuid()
instance = utils.create_test_instance(name='test1', uuid=uuid,
@ -157,9 +149,7 @@ class TestPatch(v1_test.APITestV1):
extra["foo2"] = new_value
self.assertEqual(extra, result['extra'])
@mock.patch('mogan.engine.api.API.get_ironic_node')
def test_remove_singular(self, mock_get_node):
mock_get_node.return_value = {'power_state': 'power on'}
def test_remove_singular(self):
uuid = uuidutils.generate_uuid()
instance = utils.create_test_instance(name='test2', uuid=uuid,
extra={'a': 'b'})
@ -176,9 +166,7 @@ class TestPatch(v1_test.APITestV1):
self.assertEqual(instance.uuid, result['uuid'])
self.assertEqual(instance.extra, result['extra'])
@mock.patch('mogan.engine.api.API.get_ironic_node')
def test_remove_multi(self, mock_get_node):
mock_get_node.return_value = {'power_state': 'power on'}
def test_remove_multi(self):
extra = {"foo1": "bar1", "foo2": "bar2", "foo3": "bar3"}
uuid = uuidutils.generate_uuid()
instance = utils.create_test_instance(name='test3', extra=extra,
@ -235,9 +223,7 @@ class TestPatch(v1_test.APITestV1):
self.assertEqual(http_client.BAD_REQUEST, response.status_int)
self.assertTrue(response.json['error_message'])
@mock.patch('mogan.engine.api.API.get_ironic_node')
def test_add_multi(self, mock_get_node):
mock_get_node.return_value = {'power_state': 'power on'}
def test_add_multi(self):
response = self.patch_json('/instances/%s' % self.instance.uuid,
[{'path': '/extra/foo1', 'value': 'bar1',
'op': 'add'},

View File

@ -47,6 +47,7 @@ def get_test_instance(**kw):
'c18e8a1a870d4c08a0b51ced6e0b6459'),
'user_id': kw.get('user_id', 'cdbf77d47f1d4d04ad9b7ff62b672467'),
'status': kw.get('status', states.ACTIVE),
'power_state': kw.get('power_state', 'power on'),
'instance_type_uuid': kw.get('instance_type_uuid',
'28708dff-283c-449e-9bfa-a48c93480c86'),
'availability_zone': kw.get('availability_zone', 'test_az'),

View File

@ -151,14 +151,18 @@ class ManageInstanceTestCase(mgr_utils.ServiceSetUpMixin,
self.assertFalse(destroy_inst_mock.called)
@mock.patch.object(ironic, 'get_power_state')
@mock.patch.object(ironic, 'get_node_by_instance')
@mock.patch.object(ironic, 'set_power_state')
def test_change_instance_power_state(
self, set_power_mock, get_node_mock, refresh_cache_mock):
instance = obj_utils.create_test_instance(self.context)
self, set_power_mock, get_node_mock, get_power_mock,
refresh_cache_mock):
instance = obj_utils.create_test_instance(
self.context, status=states.POWERING_ON)
fake_node = mock.MagicMock()
fake_node.target_power_state = ironic_states.NOSTATE
get_node_mock.return_value = fake_node
get_power_mock.return_value = states.POWER_ON
refresh_cache_mock.side_effect = None
self._start_service()
@ -169,6 +173,7 @@ class ManageInstanceTestCase(mgr_utils.ServiceSetUpMixin,
set_power_mock.assert_called_once_with(mock.ANY, instance.node_uuid,
ironic_states.POWER_ON)
get_node_mock.assert_called_once_with(mock.ANY, instance.uuid)
get_power_mock.assert_called_once_with(mock.ANY, instance.uuid)
@mock.patch.object(ironic, 'get_node_states')
def test_get_instance_states(self, get_states_mock, refresh_cache_mock):

View File

@ -382,7 +382,7 @@ class _TestObject(object):
# version bump. It is md5 hash of object fields and remotable methods.
# The fingerprint values should only be changed if there is a version bump.
expected_object_fingerprints = {
'Instance': '1.0-dc4b1e2710dddf6fe42582c90403c0b0',
'Instance': '1.0-c3a73e3ec189aa09dc430b389c81b11f',
'InstanceType': '1.0-589b096651fcdb30898ff50f748dd948',
'MyObj': '1.1-aad62eedc5a5cc8bcaf2982c285e753f',
'FakeNode': '1.0-295d1b08ce3048535926c47dedd27211',