Refined fix for validating image on rebuild

This aims to fix the issue described in bug 1664931 where a rebuild
fails to validate the existing host with the scheduler when a new
image is provided. The previous attempt to do this could cause rebuilds
to fail unnecessarily because we ran _all_ of the filters during a
rebuild, which could cause usage/resource filters to prevent an otherwise
valid rebuild from succeeding.

This aims to classify filters as useful for rebuild or not, and only apply
the former during a rebuild scheduler check. We do this by using an internal
scheduler hint, indicating our intent. This should (a) filter out
all hosts other than the one we're running on and (b) be detectable by
the filtering infrastructure as an internally-generated scheduling request
in order to trigger the correct filtering behavior.

Conflicts:
      nova/scheduler/filters/exact_core_filter.py
      nova/scheduler/filters/exact_disk_filter.py
      nova/scheduler/filters/exact_ram_filter.py
      nova/scheduler/filters/type_filter.py

NOTE(mriedem): The conflicts were due to deprecation warnings
which don't exist in Ocata. The functional test also needed a missing
import and to use api_put() instead of put_service().

Closes-Bug: #1664931
Change-Id: I1a46ef1503be2febcd20f4594f44344d05525446
(cherry picked from commit f7c688b8ef)
(cherry picked from commit b29a461a8b)
This commit is contained in:
Dan Smith 2017-11-17 12:27:34 -08:00 committed by Matt Riedemann
parent 324d57bec3
commit bbfc4230ef
32 changed files with 169 additions and 13 deletions

View File

@ -3059,16 +3059,27 @@ class API(base.Base):
# through the scheduler again, but we want the instance to be
# rebuilt on the same host it's already on.
if orig_image_ref != image_href:
request_spec.requested_destination = objects.Destination(
host=instance.host,
node=instance.node)
# We have to modify the request spec that goes to the scheduler
# to contain the new image. We persist this since we've already
# changed the instance.image_ref above so we're being
# consistent.
request_spec.image = objects.ImageMeta.from_dict(image)
request_spec.save()
host = None # This tells conductor to call the scheduler.
if 'scheduler_hints' not in request_spec:
request_spec.scheduler_hints = {}
# Nuke the id on this so we can't accidentally save
# this hint hack later
del request_spec.id
# NOTE(danms): Passing host=None tells conductor to
# call the scheduler. The _nova_check_type hint
# requires that the scheduler returns only the same
# host that we are currently on and only checks
# rebuild-related filters.
request_spec.scheduler_hints['_nova_check_type'] = ['rebuild']
request_spec.force_hosts = [instance.host]
request_spec.force_nodes = [instance.node]
host = None
except exception.RequestSpecNotFound:
# Some old instances can still have no RequestSpec object attached
# to them, we need to support the old way

View File

@ -21,9 +21,27 @@ from nova import filters
class BaseHostFilter(filters.BaseFilter):
"""Base class for host filters."""
def _filter_one(self, obj, filter_properties):
# This is set to True if this filter should be run for rebuild.
# For example, with rebuild, we need to ask the scheduler if the
# existing host is still legit for a rebuild with the new image and
# other parameters. We care about running policy filters (i.e.
# ImagePropertiesFilter) but not things that check usage on the
# existing compute node, etc.
RUN_ON_REBUILD = False
def _filter_one(self, obj, spec):
"""Return True if the object passes the filter, otherwise False."""
return self.host_passes(obj, filter_properties)
# Do this here so we don't get scheduler.filters.utils
from nova.scheduler import utils
if not self.RUN_ON_REBUILD and utils.request_is_rebuild(spec):
# If we don't filter, default to passing the host.
return True
else:
# We are either a rebuild filter, in which case we always run,
# or this request is not rebuild in which case all filters
# should run.
return self.host_passes(obj, spec)
def host_passes(self, host_state, filter_properties):
"""Return True if the HostState passes the filter, otherwise False.

View File

@ -29,6 +29,8 @@ class DifferentHostFilter(filters.BaseHostFilter):
# The hosts the instances are running on doesn't change within a request
run_filter_once_per_request = True
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
affinity_uuids = spec_obj.get_scheduler_hint('different_host')
if affinity_uuids:
@ -45,6 +47,8 @@ class SameHostFilter(filters.BaseHostFilter):
# The hosts the instances are running on doesn't change within a request
run_filter_once_per_request = True
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
affinity_uuids = spec_obj.get_scheduler_hint('same_host')
if affinity_uuids:
@ -59,6 +63,8 @@ class SimpleCIDRAffinityFilter(filters.BaseHostFilter):
# The address of a host doesn't change within a request
run_filter_once_per_request = True
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
affinity_cidr = spec_obj.get_scheduler_hint('cidr', '/24')
affinity_host_addr = spec_obj.get_scheduler_hint('build_near_host_ip')
@ -77,6 +83,9 @@ class _GroupAntiAffinityFilter(filters.BaseHostFilter):
"""Schedule the instance on a different host from a set of group
hosts.
"""
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
# Only invoke the filter if 'anti-affinity' is configured
policies = (spec_obj.instance_group.policies
@ -110,6 +119,9 @@ class ServerGroupAntiAffinityFilter(_GroupAntiAffinityFilter):
class _GroupAffinityFilter(filters.BaseHostFilter):
"""Schedule the instance on to host from a set of group hosts.
"""
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
# Only invoke the filter if 'affinity' is configured
policies = (spec_obj.instance_group.policies

View File

@ -32,6 +32,8 @@ class AggregateImagePropertiesIsolation(filters.BaseHostFilter):
# Aggregate data and instance type does not change within a request
run_filter_once_per_request = True
RUN_ON_REBUILD = True
def host_passes(self, host_state, spec_obj):
"""Checks a host in an aggregate that metadata key/value match
with image properties.

View File

@ -33,6 +33,8 @@ class AggregateInstanceExtraSpecsFilter(filters.BaseHostFilter):
# Aggregate data and instance type does not change within a request
run_filter_once_per_request = True
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
"""Return a list of hosts that can create instance_type

View File

@ -28,6 +28,8 @@ class AggregateMultiTenancyIsolation(filters.BaseHostFilter):
# Aggregate data and tenant do not change within a request
run_filter_once_per_request = True
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
"""If a host is in an aggregate that has the metadata key
"filter_tenant_id" it can only create instances from that tenant(s).

View File

@ -23,5 +23,7 @@ class AllHostsFilter(filters.BaseHostFilter):
# list of hosts doesn't change within a request
run_filter_once_per_request = True
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
return True

View File

@ -35,6 +35,8 @@ class AvailabilityZoneFilter(filters.BaseHostFilter):
# Availability zones do not change within a request
run_filter_once_per_request = True
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
availability_zone = spec_obj.availability_zone

View File

@ -30,6 +30,8 @@ class ComputeCapabilitiesFilter(filters.BaseHostFilter):
# Instance type and host capabilities do not change within a request
run_filter_once_per_request = True
RUN_ON_REBUILD = False
def _get_capabilities(self, host_state, scope):
cap = host_state
for index in range(0, len(scope)):

View File

@ -25,6 +25,8 @@ LOG = logging.getLogger(__name__)
class ComputeFilter(filters.BaseHostFilter):
"""Filter on active Compute nodes."""
RUN_ON_REBUILD = False
def __init__(self):
self.servicegroup_api = servicegroup.API()

View File

@ -26,6 +26,8 @@ LOG = logging.getLogger(__name__)
class BaseCoreFilter(filters.BaseHostFilter):
RUN_ON_REBUILD = False
def _get_cpu_allocation_ratio(self, host_state, spec_obj):
raise NotImplementedError

View File

@ -28,6 +28,8 @@ CONF = nova.conf.CONF
class DiskFilter(filters.BaseHostFilter):
"""Disk Filter with over subscription flag."""
RUN_ON_REBUILD = False
def _get_disk_allocation_ratio(self, host_state, spec_obj):
return host_state.disk_allocation_ratio
@ -80,6 +82,8 @@ class AggregateDiskFilter(DiskFilter):
found.
"""
RUN_ON_REBUILD = False
def _get_disk_allocation_ratio(self, host_state, spec_obj):
aggregate_vals = utils.aggregate_values_from_key(
host_state,

View File

@ -25,6 +25,8 @@ LOG = logging.getLogger(__name__)
class ExactCoreFilter(filters.BaseHostFilter):
"""Exact Core Filter."""
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
"""Return True if host has the exact number of CPU cores."""
if not host_state.vcpus_total:

View File

@ -23,6 +23,8 @@ LOG = logging.getLogger(__name__)
class ExactDiskFilter(filters.BaseHostFilter):
"""Exact Disk Filter."""
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
"""Return True if host has the exact amount of disk available."""
requested_disk = (1024 * (spec_obj.root_gb +

View File

@ -23,6 +23,8 @@ LOG = logging.getLogger(__name__)
class ExactRamFilter(filters.BaseHostFilter):
"""Exact RAM Filter."""
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
"""Return True if host has the exact amount of RAM available."""
requested_ram = spec_obj.memory_mb

View File

@ -35,6 +35,8 @@ class ImagePropertiesFilter(filters.BaseHostFilter):
contained in the image dictionary in the request_spec.
"""
RUN_ON_REBUILD = True
# Image Properties and Compute Capabilities do not change within
# a request
run_filter_once_per_request = True

View File

@ -28,6 +28,8 @@ CONF = nova.conf.CONF
class IoOpsFilter(filters.BaseHostFilter):
"""Filter out hosts with too many concurrent I/O operations."""
RUN_ON_REBUILD = False
def _get_max_io_ops_per_host(self, host_state, spec_obj):
return CONF.filter_scheduler.max_io_ops_per_host

View File

@ -25,6 +25,8 @@ class IsolatedHostsFilter(filters.BaseHostFilter):
# The configuration values do not change within a request
run_filter_once_per_request = True
RUN_ON_REBUILD = True
def host_passes(self, host_state, spec_obj):
"""Result Matrix with 'restrict_isolated_hosts_to_isolated_images' set
to True::

View File

@ -26,6 +26,9 @@ class JsonFilter(filters.BaseHostFilter):
"""Host Filter to allow simple JSON-based grammar for
selecting hosts.
"""
RUN_ON_REBUILD = False
def _op_compare(self, args, op):
"""Returns True if the specified operator can successfully
compare the first item in the args with all the rest. Will

View File

@ -32,6 +32,8 @@ class MetricsFilter(filters.BaseHostFilter):
these hosts.
"""
RUN_ON_REBUILD = False
def __init__(self):
super(MetricsFilter, self).__init__()
opts = utils.parse_options(CONF.metrics.weight_setting,

View File

@ -28,6 +28,8 @@ CONF = nova.conf.CONF
class NumInstancesFilter(filters.BaseHostFilter):
"""Filter out hosts with too many instances."""
RUN_ON_REBUILD = False
def _get_max_instances_per_host(self, host_state, spec_obj):
return CONF.filter_scheduler.max_instances_per_host

View File

@ -23,6 +23,8 @@ LOG = logging.getLogger(__name__)
class NUMATopologyFilter(filters.BaseHostFilter):
"""Filter on requested NUMA topology."""
RUN_ON_REBUILD = True
def _satisfies_cpu_policy(self, host_state, extra_specs, image_props):
"""Check that the host_state provided satisfies any available
CPU policy requirements.

View File

@ -40,6 +40,8 @@ class PciPassthroughFilter(filters.BaseHostFilter):
"""
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
"""Return true if the host has the required PCI devices."""
pci_requests = spec_obj.pci_requests

View File

@ -25,6 +25,8 @@ LOG = logging.getLogger(__name__)
class BaseRamFilter(filters.BaseHostFilter):
RUN_ON_REBUILD = False
def _get_ram_allocation_ratio(self, host_state, spec_obj):
raise NotImplementedError

View File

@ -26,6 +26,10 @@ class RetryFilter(filters.BaseHostFilter):
purposes
"""
# NOTE(danms): This does not affect _where_ an instance lands, so not
# related to rebuild.
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
"""Skip nodes that have already been attempted."""
retry = spec_obj.retry

View File

@ -237,6 +237,8 @@ class ComputeAttestation(object):
class TrustedFilter(filters.BaseHostFilter):
"""Trusted filter to support Trusted Compute Pools."""
RUN_ON_REBUILD = False
def __init__(self):
self.compute_attestation = ComputeAttestation()
LOG.warning(_LW('The TrustedFilter is considered experimental '

View File

@ -25,6 +25,8 @@ class TypeAffinityFilter(filters.BaseHostFilter):
(spread) set to 1 (default).
"""
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
"""Dynamically limits hosts to one instance type
@ -48,6 +50,8 @@ class AggregateTypeAffinityFilter(filters.BaseHostFilter):
# Aggregate data does not change within a request
run_filter_once_per_request = True
RUN_ON_REBUILD = False
def host_passes(self, host_state, spec_obj):
instance_type = spec_obj.flavor

View File

@ -560,8 +560,13 @@ class HostManager(object):
_match_forced_hosts(name_to_cls_map, force_hosts)
if force_nodes:
_match_forced_nodes(name_to_cls_map, force_nodes)
if force_hosts or force_nodes:
# NOTE(deva): Skip filters when forcing host or node
check_type = ('scheduler_hints' in spec_obj and
spec_obj.scheduler_hints.get('_nova_check_type'))
if not check_type and (force_hosts or force_nodes):
# NOTE(deva,dansmith): Skip filters when forcing host or node
# unless we've declared the internal check type flag, in which
# case we're asking for a specific host and for filtering to
# be done.
if name_to_cls_map:
return name_to_cls_map.values()
else:

View File

@ -383,3 +383,16 @@ def retry_on_timeout(retries=1):
return outer
retry_select_destinations = retry_on_timeout(CONF.scheduler.max_attempts - 1)
def request_is_rebuild(spec_obj):
"""Returns True if request is for a rebuild.
:param spec_obj: An objects.RequestSpec to examine (or None).
"""
if not spec_obj:
return False
if 'scheduler_hints' not in spec_obj:
return False
check_type = spec_obj.scheduler_hints.get('_nova_check_type')
return check_type == ['rebuild']

View File

@ -37,6 +37,7 @@ from nova.tests.unit.api.openstack import fakes
from nova.tests.unit import fake_block_device
from nova.tests.unit import fake_network
import nova.tests.unit.image.fake
from nova.virt import fake
from nova import volume
@ -929,6 +930,16 @@ class ServerRebuildTestCase(integrated_helpers._IntegratedTestBase,
group='filter_scheduler')
return self.start_service('scheduler')
def _disable_compute_for(self, server):
# Refresh to get its host
server = self.api.get_server(server['id'])
host = server['OS-EXT-SRV-ATTR:host']
# Disable the service it is on
self.api_fixture.admin_api.api_put('/os-services/disable',
{'host': host,
'binary': 'nova-compute'})
def test_rebuild_with_image_novalidhost(self):
"""Creates a server with an image that is valid for the single compute
that we have. Then rebuilds the server, passing in an image with
@ -936,6 +947,12 @@ class ServerRebuildTestCase(integrated_helpers._IntegratedTestBase,
a NoValidHost error. The ImagePropertiesFilter filter is enabled by
default so that should filter out the host based on the image meta.
"""
fake.set_nodes(['host2'])
self.addCleanup(fake.restore_nodes)
self.flags(host='host2')
self.compute2 = self.start_service('compute', host='host2')
server_req_body = {
'server': {
# We hard-code from a fake image since we can't get images
@ -950,6 +967,11 @@ class ServerRebuildTestCase(integrated_helpers._IntegratedTestBase,
}
server = self.api.post_server(server_req_body)
self._wait_for_state_change(self.api, server, 'ACTIVE')
# Disable the host we're on so ComputeFilter would have ruled it out
# normally
self._disable_compute_for(server)
# Now update the image metadata to be something that won't work with
# the fake compute driver we're using since the fake driver has an
# "x86_64" architecture.

View File

@ -3217,6 +3217,7 @@ class _ComputeAPIUnitTestMixIn(object):
system_metadata=orig_system_metadata,
expected_attrs=['system_metadata'],
image_ref=orig_image_href,
node='node',
vm_mode=fields_obj.VMMode.HVM)
flavor = instance.get_flavor()
@ -3228,7 +3229,7 @@ class _ComputeAPIUnitTestMixIn(object):
_get_image.side_effect = get_image
bdm_get_by_instance_uuid.return_value = bdms
fake_spec = objects.RequestSpec()
fake_spec = objects.RequestSpec(id=1)
req_spec_get_by_inst_uuid.return_value = fake_spec
with mock.patch.object(self.compute_api.compute_task_api,
@ -3246,10 +3247,9 @@ class _ComputeAPIUnitTestMixIn(object):
# assert the request spec was modified so the scheduler picks
# the existing instance host/node
req_spec_save.assert_called_once_with()
self.assertIn('requested_destination', fake_spec)
requested_destination = fake_spec.requested_destination
self.assertEqual(instance.host, requested_destination.host)
self.assertEqual(instance.node, requested_destination.node)
self.assertIn('_nova_check_type', fake_spec.scheduler_hints)
self.assertEqual('rebuild',
fake_spec.scheduler_hints['_nova_check_type'][0])
_check_auto_disk_config.assert_called_once_with(image=new_image)
_checks_for_create_and_rebuild.assert_called_once_with(self.context,

View File

@ -0,0 +1,20 @@
---
fixes:
- |
The fix for `OSSA-2017-005`_ (CVE-2017-16239) was too far-reaching in that
rebuilds can now fail based on scheduling filters that should not apply
to rebuild. For example, a rebuild of an instance on a disabled compute
host could fail whereas it would not before the fix for CVE-2017-16239.
Similarly, rebuilding an instance on a host that is at capacity for vcpu,
memory or disk could fail since the scheduler filters would treat it as a
new build request even though the rebuild is not claiming *new* resources.
Therefore this release contains a fix for those regressions in scheduling
behavior on rebuild while maintaining the original fix for CVE-2017-16239.
.. note:: The fix relies on a ``RUN_ON_REBUILD`` variable which is checked
for all scheduler filters during a rebuild. The reasoning behind
the value for that variable depends on each filter. If you have
out-of-tree scheduler filters, you will likely need to assess
whether or not they need to override the default value (False)
for the new variable.