Merge "Sync COMPUTE_STATUS_DISABLED from API"
This commit is contained in:
commit
499e753cf3
|
@ -106,6 +106,7 @@ AGGREGATE_ACTION_ADD = 'Add'
|
|||
MIN_COMPUTE_TRUSTED_CERTS = 31
|
||||
MIN_COMPUTE_ABORT_QUEUED_LIVE_MIGRATION = 34
|
||||
MIN_COMPUTE_VOLUME_TYPE = 36
|
||||
MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED = 38
|
||||
|
||||
# FIXME(danms): Keep a global cache of the cells we find the
|
||||
# first time we look. This needs to be refreshed on a timer or
|
||||
|
@ -5047,17 +5048,69 @@ class HostAPI(base.Base):
|
|||
"""Get service entry for the given compute hostname."""
|
||||
return objects.Service.get_by_compute_host(context, host_name)
|
||||
|
||||
def _update_compute_provider_status(self, context, service):
|
||||
"""Calls the compute service to sync the COMPUTE_STATUS_DISABLED trait.
|
||||
|
||||
There are two cases where the API will not call the compute service:
|
||||
|
||||
* The compute service is down. In this case the trait is synchronized
|
||||
when the compute service is restarted.
|
||||
* The compute service is old. In this case the trait is synchronized
|
||||
when the compute service is upgraded and restarted.
|
||||
|
||||
:param context: nova auth RequestContext
|
||||
:param service: nova.objects.Service object which has been enabled
|
||||
or disabled (see ``service_update``).
|
||||
"""
|
||||
# Make sure the service is up so we can make the RPC call.
|
||||
if not self.servicegroup_api.service_is_up(service):
|
||||
LOG.info('Compute service on host %s is down. The '
|
||||
'COMPUTE_STATUS_DISABLED trait will be synchronized '
|
||||
'when the service is restarted.', service.host)
|
||||
return
|
||||
|
||||
# Make sure the compute service is new enough for the trait sync
|
||||
# behavior.
|
||||
# TODO(mriedem): Remove this compat check in the U release.
|
||||
if service.version < MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED:
|
||||
LOG.info('Compute service on host %s is too old to sync the '
|
||||
'COMPUTE_STATUS_DISABLED trait in Placement. The '
|
||||
'trait will be synchronized when the service is '
|
||||
'upgraded and restarted.', service.host)
|
||||
return
|
||||
|
||||
enabled = not service.disabled
|
||||
# Avoid leaking errors out of the API.
|
||||
try:
|
||||
LOG.debug('Calling the compute service on host %s to sync the '
|
||||
'COMPUTE_STATUS_DISABLED trait.', service.host)
|
||||
self.rpcapi.set_host_enabled(context, service.host, enabled)
|
||||
except Exception:
|
||||
LOG.exception('An error occurred while updating host enabled '
|
||||
'status to "%s" for compute host: %s',
|
||||
'enabled' if enabled else 'disabled',
|
||||
service.host)
|
||||
|
||||
def service_update(self, context, service):
|
||||
"""Performs the actual service update operation.
|
||||
|
||||
If the "disabled" field is changed, potentially calls the compute
|
||||
service to sync the COMPUTE_STATUS_DISABLED trait on the compute node
|
||||
resource providers managed by this compute service.
|
||||
|
||||
:param context: nova auth RequestContext
|
||||
:param service: nova.objects.Service object with changes already
|
||||
set on the object
|
||||
"""
|
||||
# Before persisting changes and resetting the changed fields on the
|
||||
# Service object, determine if the disabled field changed.
|
||||
update_placement = 'disabled' in service.obj_what_changed()
|
||||
# Persist the Service object changes to the database.
|
||||
service.save()
|
||||
# TODO(mriedem): Reflect COMPUTE_STATUS_DISABLED trait changes to the
|
||||
# associated compute node resource providers if the service's disabled
|
||||
# status changed.
|
||||
# If the disabled field changed, potentially call the compute service
|
||||
# to sync the COMPUTE_STATUS_DISABLED trait.
|
||||
if update_placement:
|
||||
self._update_compute_provider_status(context, service)
|
||||
return service
|
||||
|
||||
@target_host_cell
|
||||
|
|
|
@ -913,7 +913,9 @@ class ComputeAPI(object):
|
|||
def set_host_enabled(self, ctxt, host, enabled):
|
||||
version = '5.0'
|
||||
cctxt = self.router.client(ctxt).prepare(
|
||||
server=host, version=version)
|
||||
server=host, version=version,
|
||||
call_monitor_timeout=CONF.rpc_response_timeout,
|
||||
timeout=CONF.long_rpc_timeout)
|
||||
return cctxt.call(ctxt, 'set_host_enabled', enabled=enabled)
|
||||
|
||||
def swap_volume(self, ctxt, instance, old_volume_id, new_volume_id,
|
||||
|
|
|
@ -28,6 +28,7 @@ Operations with RPC calls that utilize this value:
|
|||
|
||||
* live migration
|
||||
* scheduling
|
||||
* enabling/disabling a compute service
|
||||
|
||||
Related options:
|
||||
|
||||
|
|
|
@ -37,6 +37,12 @@ class ServicesJsonTest(api_sample_base.ApiSampleTestBaseV21):
|
|||
test_services.fake_service_get_by_host_binary)
|
||||
self.stub_out("nova.db.api.service_update",
|
||||
test_services.fake_service_update)
|
||||
# If we are not using real services, we need to stub out
|
||||
# HostAPI._update_compute_provider_status so we don't actually
|
||||
# try to call a fake service over RPC.
|
||||
self.stub_out('nova.compute.api.HostAPI.'
|
||||
'_update_compute_provider_status',
|
||||
lambda *args, **kwargs: None)
|
||||
self.useFixture(utils_fixture.TimeFixture(test_services.fake_utcnow()))
|
||||
|
||||
def test_services_list(self):
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
# under the License.
|
||||
|
||||
import os_resource_classes as orc
|
||||
import os_traits
|
||||
import six
|
||||
|
||||
from nova import context as nova_context
|
||||
|
@ -18,6 +19,8 @@ from nova import exception
|
|||
from nova import objects
|
||||
from nova.tests.functional.api import client as api_client
|
||||
from nova.tests.functional import integrated_helpers
|
||||
from nova.tests.unit.image import fake as fake_image
|
||||
from nova import utils
|
||||
|
||||
|
||||
class TestServicesAPI(integrated_helpers.ProviderUsageBaseTestCase):
|
||||
|
@ -171,3 +174,132 @@ class TestServicesAPI(integrated_helpers.ProviderUsageBaseTestCase):
|
|||
log_output = self.stdlog.logger.output
|
||||
self.assertIn('Error updating resources for node host1.', log_output)
|
||||
self.assertIn('Failed to create resource provider host1', log_output)
|
||||
|
||||
|
||||
class ComputeStatusFilterTest(integrated_helpers.ProviderUsageBaseTestCase):
|
||||
"""Tests the API, compute service and Placement interaction with the
|
||||
COMPUTE_STATUS_DISABLED trait when a compute service is enable/disabled.
|
||||
|
||||
This version of the test uses the 2.latest microversion for testing the
|
||||
2.53+ behavior of the PUT /os-services/{service_id} API.
|
||||
"""
|
||||
compute_driver = 'fake.SmallFakeDriver'
|
||||
|
||||
def _update_service(self, service, disabled, forced_down=None):
|
||||
"""Update the service using the 2.53 request schema.
|
||||
|
||||
:param service: dict representing the service resource in the API
|
||||
:param disabled: True if the service should be disabled, False if the
|
||||
service should be enabled
|
||||
:param forced_down: Optionally change the forced_down value.
|
||||
"""
|
||||
status = 'disabled' if disabled else 'enabled'
|
||||
req = {'status': status}
|
||||
if forced_down is not None:
|
||||
req['forced_down'] = forced_down
|
||||
self.admin_api.put_service(service['id'], req)
|
||||
|
||||
def test_compute_status_filter(self):
|
||||
"""Tests the compute_status_filter placement request filter"""
|
||||
# Start a compute service so a compute node and resource provider is
|
||||
# created.
|
||||
compute = self._start_compute('host1')
|
||||
# Get the UUID of the resource provider that was created.
|
||||
rp_uuid = self._get_provider_uuid_by_host('host1')
|
||||
# Get the service from the compute API.
|
||||
services = self.admin_api.get_services(binary='nova-compute',
|
||||
host='host1')
|
||||
self.assertEqual(1, len(services))
|
||||
service = services[0]
|
||||
|
||||
# At this point, the service should be enabled and the
|
||||
# COMPUTE_STATUS_DISABLED trait should not be set on the
|
||||
# resource provider in placement.
|
||||
self.assertEqual('enabled', service['status'])
|
||||
rp_traits = self._get_provider_traits(rp_uuid)
|
||||
trait = os_traits.COMPUTE_STATUS_DISABLED
|
||||
self.assertNotIn(trait, rp_traits)
|
||||
|
||||
# Now disable the compute service via the API.
|
||||
self._update_service(service, disabled=True)
|
||||
|
||||
# The update to placement should be synchronous so check the provider
|
||||
# traits and COMPUTE_STATUS_DISABLED should be set.
|
||||
rp_traits = self._get_provider_traits(rp_uuid)
|
||||
self.assertIn(trait, rp_traits)
|
||||
|
||||
# Try creating a server which should fail because nothing is available.
|
||||
networks = [{'port': self.neutron.port_1['id']}]
|
||||
server_req = self._build_minimal_create_server_request(
|
||||
self.api, 'test_compute_status_filter',
|
||||
image_uuid=fake_image.get_valid_image_id(), networks=networks)
|
||||
server = self.api.post_server({'server': server_req})
|
||||
server = self._wait_for_state_change(self.api, server, 'ERROR')
|
||||
# There should be a NoValidHost fault recorded.
|
||||
self.assertIn('fault', server)
|
||||
self.assertIn('No valid host', server['fault']['message'])
|
||||
|
||||
# Now enable the service and the trait should be gone.
|
||||
self._update_service(service, disabled=False)
|
||||
rp_traits = self._get_provider_traits(rp_uuid)
|
||||
self.assertNotIn(trait, rp_traits)
|
||||
|
||||
# Try creating another server and it should be OK.
|
||||
server = self.api.post_server({'server': server_req})
|
||||
self._wait_for_state_change(self.api, server, 'ACTIVE')
|
||||
|
||||
# Stop, force-down and disable the service so the API cannot call
|
||||
# the compute service to sync the trait.
|
||||
compute.stop()
|
||||
self._update_service(service, disabled=True, forced_down=True)
|
||||
# The API should have logged a message about the service being down.
|
||||
self.assertIn('Compute service on host host1 is down. The '
|
||||
'COMPUTE_STATUS_DISABLED trait will be synchronized '
|
||||
'when the service is restarted.',
|
||||
self.stdlog.logger.output)
|
||||
# The trait should not be on the provider even though the node is
|
||||
# disabled.
|
||||
rp_traits = self._get_provider_traits(rp_uuid)
|
||||
self.assertNotIn(trait, rp_traits)
|
||||
# Restart the compute service which should sync and set the trait on
|
||||
# the provider in placement.
|
||||
self.restart_compute_service(compute)
|
||||
rp_traits = self._get_provider_traits(rp_uuid)
|
||||
self.assertIn(trait, rp_traits)
|
||||
|
||||
|
||||
class ComputeStatusFilterTest211(ComputeStatusFilterTest):
|
||||
"""Extends ComputeStatusFilterTest and uses the 2.11 API for the
|
||||
legacy os-services disable/enable/force-down API behavior
|
||||
"""
|
||||
microversion = '2.11'
|
||||
|
||||
def _update_service(self, service, disabled, forced_down=None):
|
||||
"""Update the service using the 2.11 request schema.
|
||||
|
||||
:param service: dict representing the service resource in the API
|
||||
:param disabled: True if the service should be disabled, False if the
|
||||
service should be enabled
|
||||
:param forced_down: Optionally change the forced_down value.
|
||||
"""
|
||||
# Before 2.53 the service is uniquely identified by host and binary.
|
||||
body = {
|
||||
'host': service['host'],
|
||||
'binary': service['binary']
|
||||
}
|
||||
# Handle forced_down first if provided since the enable/disable
|
||||
# behavior in the API depends on it.
|
||||
if forced_down is not None:
|
||||
body['forced_down'] = forced_down
|
||||
self.admin_api.api_put('/os-services/force-down', body)
|
||||
|
||||
if disabled:
|
||||
self.admin_api.api_put('/os-services/disable', body)
|
||||
else:
|
||||
self.admin_api.api_put('/os-services/enable', body)
|
||||
|
||||
def _get_provider_uuid_by_host(self, host):
|
||||
# We have to temporarily mutate to 2.53 to get the hypervisor UUID.
|
||||
with utils.temporary_mutation(self.admin_api, microversion='2.53'):
|
||||
return super(ComputeStatusFilterTest211,
|
||||
self)._get_provider_uuid_by_host(host)
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
import fixtures
|
||||
import mock
|
||||
import oslo_messaging as messaging
|
||||
from oslo_utils.fixture import uuidsentinel as uuids
|
||||
|
||||
from nova.api.openstack.compute import services
|
||||
|
@ -327,18 +328,73 @@ class ComputeHostAPITestCase(test.TestCase):
|
|||
service_id = 42
|
||||
expected_result = dict(test_service.fake_service, id=service_id)
|
||||
|
||||
@mock.patch.object(self.host_api, '_update_compute_provider_status')
|
||||
@mock.patch.object(self.host_api.db, 'service_get_by_host_and_binary')
|
||||
@mock.patch.object(self.host_api.db, 'service_update')
|
||||
def _do_test(mock_service_update, mock_service_get_by_host_and_binary):
|
||||
def _do_test(mock_service_update, mock_service_get_by_host_and_binary,
|
||||
mock_update_compute_provider_status):
|
||||
mock_service_get_by_host_and_binary.return_value = expected_result
|
||||
mock_service_update.return_value = expected_result
|
||||
|
||||
result = self.host_api.service_update_by_host_and_binary(
|
||||
self.ctxt, host_name, binary, params_to_update)
|
||||
self._compare_obj(result, expected_result)
|
||||
mock_update_compute_provider_status.assert_called_once_with(
|
||||
self.ctxt, test.MatchType(objects.Service))
|
||||
|
||||
_do_test()
|
||||
|
||||
@mock.patch('nova.compute.api.HostAPI._update_compute_provider_status',
|
||||
new_callable=mock.NonCallableMock)
|
||||
def test_service_update_no_update_provider_status(self, mock_ucps):
|
||||
"""Tests the scenario that the service is updated but the disabled
|
||||
field is not changed, for example the forced_down field is only
|
||||
updated. In this case _update_compute_provider_status should not be
|
||||
called.
|
||||
"""
|
||||
service = objects.Service(forced_down=True)
|
||||
self.assertIn('forced_down', service.obj_what_changed())
|
||||
with mock.patch.object(service, 'save') as mock_save:
|
||||
retval = self.host_api.service_update(self.ctxt, service)
|
||||
self.assertIs(retval, service)
|
||||
mock_save.assert_called_once_with()
|
||||
|
||||
@mock.patch('nova.compute.rpcapi.ComputeAPI.set_host_enabled',
|
||||
new_callable=mock.NonCallableMock)
|
||||
def test_update_compute_provider_status_service_too_old(self, mock_she):
|
||||
"""Tests the scenario that the service is up but is too old to sync the
|
||||
COMPUTE_STATUS_DISABLED trait.
|
||||
"""
|
||||
service = objects.Service(host='fake-host')
|
||||
service.version = compute.MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED - 1
|
||||
with mock.patch.object(
|
||||
self.host_api.servicegroup_api, 'service_is_up',
|
||||
return_value=True) as service_is_up:
|
||||
self.host_api._update_compute_provider_status(self.ctxt, service)
|
||||
service_is_up.assert_called_once_with(service)
|
||||
self.assertIn('Compute service on host fake-host is too old to sync '
|
||||
'the COMPUTE_STATUS_DISABLED trait in Placement.',
|
||||
self.stdlog.logger.output)
|
||||
|
||||
@mock.patch('nova.compute.rpcapi.ComputeAPI.set_host_enabled',
|
||||
side_effect=messaging.MessagingTimeout)
|
||||
def test_update_compute_provider_status_service_rpc_error(self, mock_she):
|
||||
"""Tests the scenario that the RPC call to the compute service raised
|
||||
some exception.
|
||||
"""
|
||||
service = objects.Service(host='fake-host', disabled=True)
|
||||
with mock.patch.object(
|
||||
self.host_api.servicegroup_api, 'service_is_up',
|
||||
return_value=True) as service_is_up:
|
||||
self.host_api._update_compute_provider_status(self.ctxt, service)
|
||||
service_is_up.assert_called_once_with(service)
|
||||
mock_she.assert_called_once_with(self.ctxt, 'fake-host', False)
|
||||
log_output = self.stdlog.logger.output
|
||||
self.assertIn('An error occurred while updating host enabled '
|
||||
'status to "disabled" for compute host: fake-host',
|
||||
log_output)
|
||||
self.assertIn('MessagingTimeout', log_output)
|
||||
|
||||
@mock.patch.object(objects.InstanceList, 'get_by_host',
|
||||
return_value = ['fake-responses'])
|
||||
def test_instance_get_all_by_host(self, mock_get):
|
||||
|
|
|
@ -468,8 +468,10 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
|
|||
version='5.0')
|
||||
|
||||
def test_set_host_enabled(self):
|
||||
self.flags(long_rpc_timeout=600, rpc_response_timeout=120)
|
||||
self._test_compute_api('set_host_enabled', 'call',
|
||||
enabled='enabled', host='host')
|
||||
enabled='enabled', host='host',
|
||||
call_monitor_timeout=120, timeout=600)
|
||||
|
||||
def test_get_host_uptime(self):
|
||||
self._test_compute_api('get_host_uptime', 'call', host='host')
|
||||
|
|
Loading…
Reference in New Issue