diff --git a/.zuul.yaml b/.zuul.yaml index 8022e2006a7f..de882a242c3a 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -71,14 +71,6 @@ tox_envlist: functional-py35 timeout: 3600 -- job: - name: nova-caching-scheduler - parent: nova-dsvm-base - description: | - Run non-slow Tempest API and scenario tests using the CachingScheduler. - run: playbooks/legacy/nova-caching-scheduler/run.yaml - post-run: playbooks/legacy/nova-caching-scheduler/post.yaml - - job: name: nova-cells-v1 parent: nova-dsvm-base @@ -246,7 +238,5 @@ irrelevant-files: *dsvm-irrelevant-files - neutron-tempest-dvr-ha-multinode-full: irrelevant-files: *dsvm-irrelevant-files - - nova-caching-scheduler: - irrelevant-files: *dsvm-irrelevant-files - os-vif-ovs: irrelevant-files: *dsvm-irrelevant-files diff --git a/contrib/profile_caching_scheduler.sh b/contrib/profile_caching_scheduler.sh deleted file mode 100755 index df38ab12bfeb..000000000000 --- a/contrib/profile_caching_scheduler.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -# Copyright (c) 2014 Rackspace Hosting -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -# -# This runs a unit test that uses pycallgraph -# to profile the select_destinations call -# in the CachingScheduler -# -# For this script to work please run: -# python setup.py develop -# pip install -r requirements.txt -# pip install -r test-requirements.txt -# pip install pycallgraph -# export EVENTLET_NO_GREENDNS='yes' -# -BASEDIR=$(dirname $0) -TEST=$BASEDIR/../nova/tests/scheduler/test_caching_scheduler.py -echo -echo "Running this unit test file as a python script:" -echo $TEST - -python $TEST - -RESULTDIR=$(pwd) -echo -echo "For profiler result see: " -echo $RESULTDIR/scheduler.png -echo diff --git a/devstack/tempest-dsvm-caching-scheduler-rc b/devstack/tempest-dsvm-caching-scheduler-rc deleted file mode 100644 index cc09af6b82da..000000000000 --- a/devstack/tempest-dsvm-caching-scheduler-rc +++ /dev/null @@ -1,30 +0,0 @@ -# -# This script is executed in the OpenStack CI nova-caching-scheduler job. -# It's used to configure which tempest tests actually get run. You can find -# the CI job configuration under playbooks/legacy/nova-caching-scheduler/. -# - -# Construct a regex to use when limiting scope of tempest -# to avoid features unsupported by Nova's CachingScheduler support. - -# When adding entries to the regex, add a comment explaining why -# since this list should not grow. - -r="^(?!.*" -# exclude the slow tag -r="$r(?:.*\[.*\bslow\b.*\])" - -# NOTE(mriedem): ServersAdminTestJSON.test_create_server_with_scheduling_hint -# is skipped because it relies on the SameHostFilter which relies on the -# HostState object which might be stale when that filter runs. -# tempest.api.compute.admin.test_servers.ServersAdminTestJSON.test_create_server_with_scheduling_hint -r="$r|(?:.*id\-fdcd9b33\-0903\-4e00\-a1f7\-b5f6543068d6.*)" -# NOTE(mriedem): AggregatesAdminTestJSON.test_aggregate_add_host_create_server_with_az -# is skipped because it creates an aggregate and adds a host to it, then -# creates a server in that aggregate but fails to schedule because the caching -# scheduler hasn't updated the host's aggregates view yet. -# tempest.api.compute.admin.test_aggregates.AggregatesAdminTestJSON.test_aggregate_add_host_create_server_with_az -r="$r|(?:.*id\-96be03c7\-570d\-409c\-90f8\-e4db3c646996.*)" -r="$r).*$" - -export DEVSTACK_GATE_TEMPEST_REGEX="$r" diff --git a/doc/source/admin/configuration/schedulers.rst b/doc/source/admin/configuration/schedulers.rst index 230d9572e4b7..42928d3daef2 100644 --- a/doc/source/admin/configuration/schedulers.rst +++ b/doc/source/admin/configuration/schedulers.rst @@ -311,10 +311,9 @@ CoreFilter ``CoreFilter`` is deprecated since the 19.0.0 Stein release. VCPU filtering is performed natively using the Placement service when using the - ``filter_scheduler`` driver. Users of the ``caching_scheduler`` driver may - still rely on this filter but the ``caching_scheduler`` driver is itself - deprecated. Furthermore, enabling CoreFilter may incorrectly filter out - `baremetal nodes`_ which must be scheduled using custom resource classes. + ``filter_scheduler`` driver. Furthermore, enabling CoreFilter may + incorrectly filter out `baremetal nodes`_ which must be scheduled using + custom resource classes. Only schedules instances on hosts if sufficient CPU cores are available. If this filter is not set, the scheduler might over-provision a host based on @@ -390,10 +389,9 @@ DiskFilter ``DiskFilter`` is deprecated since the 19.0.0 Stein release. DISK_GB filtering is performed natively using the Placement service when using the - ``filter_scheduler`` driver. Users of the ``caching_scheduler`` driver may - still rely on this filter but the ``caching_scheduler`` driver is itself - deprecated. Furthermore, enabling DiskFilter may incorrectly filter out - `baremetal nodes`_ which must be scheduled using custom resource classes. + ``filter_scheduler`` driver. Furthermore, enabling DiskFilter may + incorrectly filter out `baremetal nodes`_ which must be scheduled using + custom resource classes. Only schedules instances on hosts if there is sufficient disk space available for root and ephemeral storage. @@ -640,10 +638,9 @@ RamFilter ``RamFilter`` is deprecated since the 19.0.0 Stein release. MEMORY_MB filtering is performed natively using the Placement service when using the - ``filter_scheduler`` driver. Users of the ``caching_scheduler`` driver may - still rely on this filter but the ``caching_scheduler`` driver is itself - deprecated. Furthermore, enabling RamFilter may incorrectly filter out - `baremetal nodes`_ which must be scheduled using custom resource classes. + ``filter_scheduler`` driver. Furthermore, enabling RamFilter may + incorrectly filter out `baremetal nodes`_ which must be scheduled using + custom resource classes. .. _baremetal nodes: https://docs.openstack.org/ironic/latest/install/configure-nova-flavors.html diff --git a/nova/conductor/tasks/live_migrate.py b/nova/conductor/tasks/live_migrate.py index b321de8723be..58156f2e4466 100644 --- a/nova/conductor/tasks/live_migrate.py +++ b/nova/conductor/tasks/live_migrate.py @@ -196,8 +196,6 @@ class LiveMigrationTask(base.TaskBase): # TODO(mriedem): This method can be removed when the forced host # scenario is calling select_destinations() in the scheduler because # Placement will be used to filter allocation candidates by MEMORY_MB. - # We likely can't remove it until the CachingScheduler is gone though - # since the CachingScheduler does not use Placement. compute = self._get_compute_info(self.destination) free_ram_mb = compute.free_ram_mb total_ram_mb = compute.memory_mb diff --git a/nova/conf/scheduler.py b/nova/conf/scheduler.py index ca51bfb24975..b725f83acb6e 100644 --- a/nova/conf/scheduler.py +++ b/nova/conf/scheduler.py @@ -34,9 +34,6 @@ used. Other options are: -* 'caching_scheduler' which aggressively caches the system state for better - individual scheduler performance at the risk of more retries when running - multiple schedulers. [DEPRECATED] * 'fake_scheduler' which is used for testing. Possible values: @@ -44,7 +41,6 @@ Possible values: * Any of the drivers included in Nova: * filter_scheduler - * caching_scheduler * fake_scheduler * You may also set this to the entry point name of a custom scheduler driver, @@ -62,8 +58,8 @@ Periodic task interval. This value controls how often (in seconds) to run periodic tasks in the scheduler. The specific tasks that are run for each period are determined by -the particular scheduler being used. Currently the only in-tree scheduler -driver that uses this option is the ``caching_scheduler``. +the particular scheduler being used. Currently there are no in-tree scheduler +driver that use this option. If this is larger than the nova-service 'service_down_time' setting, the ComputeFilter (if enabled) may think the compute service is down. As each diff --git a/nova/scheduler/caching_scheduler.py b/nova/scheduler/caching_scheduler.py deleted file mode 100644 index e53c42e777fa..000000000000 --- a/nova/scheduler/caching_scheduler.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (c) 2014 Rackspace Hosting -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import collections -import itertools - -from oslo_log import log as logging - -from nova.scheduler import filter_scheduler - -LOG = logging.getLogger(__name__) - - -class CachingScheduler(filter_scheduler.FilterScheduler): - """Scheduler to test aggressive caching of the host list. - - Please note, this is a very opinionated scheduler. Be sure to - review the caveats listed here before selecting this scheduler. - - The aim of this scheduler is to reduce server build times when - you have large bursts of server builds, by reducing the time it - takes, from the users point of view, to service each schedule - request. - - There are two main parts to scheduling a users request: - * getting the current state of the system - * using filters and weights to pick the best host - - This scheduler tries its best to cache in memory the current - state of the system, so we don't need to make the expensive - call to get the current state of the system while processing - a user's request, we can do that query in a periodic task - before the user even issues their request. - - To reduce races, cached info of the chosen host is updated using - the existing host state call: consume_from_instance - - Please note, the way this works, each scheduler worker has its own - copy of the cache. So if you run multiple schedulers, you will get - more retries, because the data stored on any additional scheduler will - be more out of date, than if it was fetched from the database. - - In a similar way, if you have a high number of server deletes, the - extra capacity from those deletes will not show up until the cache is - refreshed. - """ - - USES_ALLOCATION_CANDIDATES = False - - def __init__(self, *args, **kwargs): - super(CachingScheduler, self).__init__(*args, **kwargs) - self.all_host_states = None - LOG.warning('CachingScheduler is deprecated in Pike and will be ' - 'removed in a subsequent release.') - - def run_periodic_tasks(self, context): - """Called from a periodic tasks in the manager.""" - elevated = context.elevated() - # NOTE(johngarbutt) Fetching the list of hosts before we get - # a user request, so no user requests have to wait while we - # fetch the list of hosts. - self.all_host_states = self._get_up_hosts(elevated) - - def _get_all_host_states(self, context, spec_obj, provider_summaries): - """Called from the filter scheduler, in a template pattern.""" - if self.all_host_states is None: - # NOTE(johngarbutt) We only get here when we a scheduler request - # comes in before the first run of the periodic task. - # Rather than raise an error, we fetch the list of hosts. - self.all_host_states = self._get_up_hosts(context) - - if (spec_obj and 'requested_destination' in spec_obj and - spec_obj.requested_destination and - 'cell' in spec_obj.requested_destination): - only_cell = spec_obj.requested_destination.cell - else: - only_cell = None - - if only_cell: - return self.all_host_states.get(only_cell.uuid, []) - else: - return itertools.chain.from_iterable( - self.all_host_states.values()) - - def _get_up_hosts(self, context): - all_hosts_iterator = self.host_manager.get_all_host_states(context) - # NOTE(danms): This could be more efficient if host_manager returned - # a dict for us. However, it flattens the information for the more - # mainline FilterScheduler. Since CachingScheduler goes away soonish, - # and since we were already iterating the full host list on every - # refresh, just build the dict here for now. - hosts_by_cell = collections.defaultdict(list) - for host in all_hosts_iterator: - hosts_by_cell[host.cell_uuid].append(host) - return hosts_by_cell diff --git a/nova/scheduler/driver.py b/nova/scheduler/driver.py index 84d49da9429a..1e08c071754e 100644 --- a/nova/scheduler/driver.py +++ b/nova/scheduler/driver.py @@ -32,6 +32,12 @@ from nova import servicegroup class Scheduler(object): """The base class that all Scheduler classes should inherit from.""" + # TODO(mriedem): We should remove this flag now so that all scheduler + # drivers, both in-tree and out-of-tree, must rely on placement for + # scheduling decisions. We're likely going to have more and more code + # over time that relies on the scheduler creating allocations and it + # will not be sustainable to try and keep compatibility code around for + # scheduler drivers that do not create allocations in Placement. USES_ALLOCATION_CANDIDATES = True """Indicates that the scheduler driver calls the Placement API for allocation candidates and uses those allocation candidates in its diff --git a/nova/scheduler/filters/core_filter.py b/nova/scheduler/filters/core_filter.py index 48fe5eb13610..30a561554526 100644 --- a/nova/scheduler/filters/core_filter.py +++ b/nova/scheduler/filters/core_filter.py @@ -83,9 +83,7 @@ class CoreFilter(BaseCoreFilter): LOG.warning('The CoreFilter is deprecated since the 19.0.0 Stein ' 'release. VCPU filtering is performed natively using the ' 'Placement service when using the filter_scheduler ' - 'driver. Users of the caching_scheduler driver may still ' - 'rely on this filter but the caching_scheduler driver is ' - 'itself deprecated. Furthermore, enabling CoreFilter ' + 'driver. Furthermore, enabling CoreFilter ' 'may incorrectly filter out baremetal nodes which must be ' 'scheduled using custom resource classes.') diff --git a/nova/scheduler/filters/disk_filter.py b/nova/scheduler/filters/disk_filter.py index 5297c5f9a618..ff6f4bb37ce0 100644 --- a/nova/scheduler/filters/disk_filter.py +++ b/nova/scheduler/filters/disk_filter.py @@ -33,12 +33,10 @@ class DiskFilter(filters.BaseHostFilter): LOG.warning('The DiskFilter is deprecated since the 19.0.0 Stein ' 'release. DISK_GB filtering is performed natively ' 'using the Placement service when using the ' - 'filter_scheduler driver. Users of the ' - 'caching_scheduler driver may still rely on this ' - 'filter but the caching_scheduler driver is itself ' - 'deprecated. Furthermore, enabling DiskFilter may ' - 'incorrectly filter out baremetal nodes which must be ' - 'scheduled using custom resource classes.') + 'filter_scheduler driver. Furthermore, enabling ' + 'DiskFilter may incorrectly filter out baremetal ' + 'nodes which must be scheduled using custom resource ' + 'classes.') def _get_disk_allocation_ratio(self, host_state, spec_obj): return host_state.disk_allocation_ratio diff --git a/nova/scheduler/filters/ram_filter.py b/nova/scheduler/filters/ram_filter.py index 448b20ba08e4..d109025ca209 100644 --- a/nova/scheduler/filters/ram_filter.py +++ b/nova/scheduler/filters/ram_filter.py @@ -73,11 +73,8 @@ class RamFilter(BaseRamFilter): LOG.warning('The RamFilter is deprecated since the 19.0.0 Stein ' 'release. MEMORY_MB filtering is performed natively ' 'using the Placement service when using the ' - 'filter_scheduler driver. Users of the ' - 'caching_scheduler driver may still rely on this ' - 'filter but the caching_scheduler driver is itself ' - 'deprecated. Furthermore, enabling RamFilter may ' - 'incorrectly filter out baremetal nodes which must be ' + 'filter_scheduler driver. Furthermore, enabling RamFilter ' + 'may incorrectly filter out baremetal nodes which must be ' 'scheduled using custom resource classes.') def _get_ram_allocation_ratio(self, host_state, spec_obj): diff --git a/nova/test.py b/nova/test.py index 5497b1ecfc20..90a0c86c3ea6 100644 --- a/nova/test.py +++ b/nova/test.py @@ -433,19 +433,9 @@ class TestCase(testtools.TestCase): def restart_scheduler_service(scheduler): """Restart a scheduler service in a realistic way. - Deals with resetting the host state cache in the case of using the - CachingScheduler driver. - :param scheduler: The nova-scheduler service to be restarted. """ scheduler.stop() - if hasattr(scheduler.manager.driver, 'all_host_states'): - # On startup, the CachingScheduler runs a periodic task to pull - # the initial set of compute nodes out of the database which it - # then puts into a cache (hence the name of the driver). This can - # race with actually starting the compute services so we need to - # restart the scheduler to refresh the cache. - scheduler.manager.driver.all_host_states = None scheduler.start() def assertJsonEqual(self, expected, observed, message=''): diff --git a/nova/tests/functional/regressions/test_bug_1671648.py b/nova/tests/functional/regressions/test_bug_1671648.py index c1c0653d7206..2a2a6b6954e8 100644 --- a/nova/tests/functional/regressions/test_bug_1671648.py +++ b/nova/tests/functional/regressions/test_bug_1671648.py @@ -79,9 +79,7 @@ class TestRetryBetweenComputeNodeBuilds(test.TestCase): self.addCleanup(fake.restore_nodes) self.start_service('compute', host='host2') - # Start the scheduler after the compute nodes are created in the DB - # in the case of using the CachingScheduler. - self.start_service('scheduler') + self.scheduler_service = self.start_service('scheduler') self.useFixture(cast_as_call.CastAsCall(self)) @@ -153,9 +151,14 @@ class TestRetryBetweenComputeNodeBuilds(test.TestCase): self.assertEqual(2, self.attempts) -class TestRetryBetweenComputeNodeBuildsCachingScheduler( +class TestRetryBetweenComputeNodeBuildsNoAllocations( TestRetryBetweenComputeNodeBuilds): - """Tests the reschedule scenario using the CachingScheduler.""" + """Tests the reschedule scenario using a scheduler driver which does + not use Placement. + """ def setUp(self): - self.flags(driver='caching_scheduler', group='scheduler') - super(TestRetryBetweenComputeNodeBuildsCachingScheduler, self).setUp() + super(TestRetryBetweenComputeNodeBuildsNoAllocations, self).setUp() + # We need to mock the FilterScheduler to not use Placement so that + # allocations won't be created during scheduling. + self.scheduler_service.manager.driver.USES_ALLOCATION_CANDIDATES = \ + False diff --git a/nova/tests/functional/regressions/test_bug_1741125.py b/nova/tests/functional/regressions/test_bug_1741125.py index 206a2eda685d..4e2480014b95 100644 --- a/nova/tests/functional/regressions/test_bug_1741125.py +++ b/nova/tests/functional/regressions/test_bug_1741125.py @@ -36,9 +36,6 @@ class TestServerResizeReschedule(integrated_helpers.ProviderUsageBaseTestCase): self.compute3 = self._start_compute(host='host3') self.compute4 = self._start_compute(host='host4') - # Restart the scheduler to reset the host state cache. - self.restart_scheduler_service(self.scheduler_service) - flavors = self.api.get_flavors() self.flavor1 = flavors[0] self.flavor2 = flavors[1] @@ -83,10 +80,15 @@ class TestServerResizeReschedule(integrated_helpers.ProviderUsageBaseTestCase): server['flavor']['original_name']) -class TestServerResizeRescheduleWithCachingScheduler( +class TestServerResizeRescheduleWithNoAllocations( TestServerResizeReschedule): - """Tests the reschedule scenario using the CachingScheduler.""" + """Tests the reschedule scenario using a scheduler driver which does not + use Placement. + """ def setUp(self): - self.flags(driver='caching_scheduler', group='scheduler') - super(TestServerResizeRescheduleWithCachingScheduler, self).setUp() + super(TestServerResizeRescheduleWithNoAllocations, self).setUp() + # We need to mock the FilterScheduler to not use Placement so that + # allocations won't be created during scheduling. + self.scheduler_service.manager.driver.USES_ALLOCATION_CANDIDATES = \ + False diff --git a/nova/tests/functional/regressions/test_bug_1741307.py b/nova/tests/functional/regressions/test_bug_1741307.py index 0041b95f7505..811352b0de8d 100644 --- a/nova/tests/functional/regressions/test_bug_1741307.py +++ b/nova/tests/functional/regressions/test_bug_1741307.py @@ -18,23 +18,24 @@ from nova.tests.unit import policy_fixture from nova.virt import fake -class TestResizeWithCachingScheduler(test.TestCase, - integrated_helpers.InstanceHelperMixin): +class TestResizeWithNoAllocationScheduler( + test.TestCase, integrated_helpers.InstanceHelperMixin): """Regression tests for bug #1741307 - The CachingScheduler does not use Placement to make claims (allocations) - against compute nodes during scheduling like the FilterScheduler does. + Some scheduler drivers, like the old CachingScheduler driver, do not use + Placement to make claims (allocations) against compute nodes during + scheduling like the FilterScheduler does. During a cold migrate / resize, the FilterScheduler will "double up" the instance allocations so the instance has resource allocations made against both the source node and the chosen destination node. Conductor will then attempt to "swap" the source node allocation to the migration record. If - using the CachingScheduler, there are no allocations for the instance on + using a non-Placement driver, there are no allocations for the instance on the source node and conductor fails. Note that if the compute running the instance was running Ocata code or older, then the compute itself would create the allocations in Placement via the ResourceTracker, but once all computes are upgraded to Pike or newer, the compute no longer creates allocations in Placement because it assumes the scheduler is doing that, - which is not the case with the CachingScheduler. + which is not the case with these outlier scheduler drivers. This is a regression test to show the failure before it's fixed and then can be used to confirm the fix. @@ -43,7 +44,7 @@ class TestResizeWithCachingScheduler(test.TestCase, microversion = 'latest' def setUp(self): - super(TestResizeWithCachingScheduler, self).setUp() + super(TestResizeWithNoAllocationScheduler, self).setUp() self.useFixture(policy_fixture.RealPolicyFixture()) self.useFixture(nova_fixtures.NeutronFixture(self)) @@ -66,10 +67,10 @@ class TestResizeWithCachingScheduler(test.TestCase, self.addCleanup(fake.restore_nodes) self.start_service('compute', host=host) - # Start the scheduler after the compute nodes are created in the DB - # in the case of using the CachingScheduler. - self.flags(driver='caching_scheduler', group='scheduler') - self.start_service('scheduler') + scheduler_service = self.start_service('scheduler') + # We need to mock the FilterScheduler to not use Placement so that + # allocations won't be created during scheduling. + scheduler_service.manager.driver.USES_ALLOCATION_CANDIDATES = False flavors = self.api.get_flavors() self.old_flavor = flavors[0] diff --git a/nova/tests/functional/test_nova_manage.py b/nova/tests/functional/test_nova_manage.py index 3bd931d452fb..94c76a1b8e05 100644 --- a/nova/tests/functional/test_nova_manage.py +++ b/nova/tests/functional/test_nova_manage.py @@ -369,10 +369,6 @@ class TestNovaManagePlacementHealAllocations( NUMBER_OF_CELLS = 2 def setUp(self): - # Since the CachingScheduler does not use Placement, we want to use - # the CachingScheduler to create instances and then we can heal their - # allocations via the CLI. - self.flags(driver='caching_scheduler', group='scheduler') super(TestNovaManagePlacementHealAllocations, self).setUp() self.cli = manage.PlacementCommands() # We need to start a compute in each non-cell0 cell. @@ -387,8 +383,11 @@ class TestNovaManagePlacementHealAllocations( self.flavor = self.api.get_flavors()[0] self.output = StringIO() self.useFixture(fixtures.MonkeyPatch('sys.stdout', self.output)) - # Restart the scheduler to reset the host state cache. - self.restart_scheduler_service(self.scheduler_service) + # We need to mock the FilterScheduler to not use Placement so that + # allocations won't be created during scheduling and then we can heal + # them in the CLI. + self.scheduler_service.manager.driver.USES_ALLOCATION_CANDIDATES = \ + False def _boot_and_assert_no_allocations(self, flavor, hostname): """Creates a server on the given host and asserts neither have usage @@ -417,14 +416,14 @@ class TestNovaManagePlacementHealAllocations( self.assertEqual( 0, usage, 'Compute node resource provider %s should not have %s ' - 'usage when using the CachingScheduler.' % + 'usage; something must be wrong in test setup.' % (hostname, resource_class)) # Check that the server has no allocations. allocations = self._get_allocations_by_server_uuid(server['id']) self.assertEqual({}, allocations, - 'Server should not have allocations when using ' - 'the CachingScheduler.') + 'Server should not have allocations; something must ' + 'be wrong in test setup.') return server, rp_uuid def _assert_healed(self, server, rp_uuid): @@ -597,7 +596,7 @@ class TestNovaManagePlacementHealAllocations( 1.8 when consumer (project_id and user_id) were not required so the consumer information is using sentinel values from config. - Since the CachingScheduler used in this test class won't actually + Since the hacked scheduler used in this test class won't actually create allocations during scheduling, we have to create the allocations out-of-band and then run our heal routine to see they get updated with the instance project and user information. diff --git a/nova/tests/unit/cmd/test_scheduler.py b/nova/tests/unit/cmd/test_scheduler.py index 2d5029cb82f4..01e685d2d4f1 100644 --- a/nova/tests/unit/cmd/test_scheduler.py +++ b/nova/tests/unit/cmd/test_scheduler.py @@ -51,9 +51,9 @@ class TestScheduler(test.NoDBTestCase): @mock.patch('nova.service.serve') @mock.patch('nova.service.wait') @mock.patch('oslo_concurrency.processutils.get_worker_count') - def test_workers_caching_scheduler(self, get_worker_count, mock_wait, - mock_serve, service_create): - self.flags(driver='caching_scheduler', group='scheduler') + def test_workers_fake_scheduler(self, get_worker_count, mock_wait, + mock_serve, service_create): + self.flags(driver='fake_scheduler', group='scheduler') scheduler.main() get_worker_count.assert_not_called() mock_serve.assert_called_once_with( diff --git a/nova/tests/unit/scheduler/test_caching_scheduler.py b/nova/tests/unit/scheduler/test_caching_scheduler.py deleted file mode 100644 index 917e4a329b32..000000000000 --- a/nova/tests/unit/scheduler/test_caching_scheduler.py +++ /dev/null @@ -1,310 +0,0 @@ -# Copyright (c) 2014 Rackspace Hosting -# All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import mock -from oslo_utils.fixture import uuidsentinel as uuids -from oslo_utils import timeutils -from six.moves import range - -from nova import exception -from nova import objects -from nova.scheduler import caching_scheduler -from nova.scheduler import host_manager -from nova.tests.unit.scheduler import test_scheduler - - -ENABLE_PROFILER = False - - -class CachingSchedulerTestCase(test_scheduler.SchedulerTestCase): - """Test case for Caching Scheduler.""" - - driver_cls = caching_scheduler.CachingScheduler - - @mock.patch.object(caching_scheduler.CachingScheduler, - "_get_up_hosts") - def test_run_periodic_tasks_loads_hosts(self, mock_up_hosts): - mock_up_hosts.return_value = [] - context = mock.Mock() - - self.driver.run_periodic_tasks(context) - - self.assertTrue(mock_up_hosts.called) - self.assertEqual([], self.driver.all_host_states) - context.elevated.assert_called_with() - - @mock.patch.object(caching_scheduler.CachingScheduler, - "_get_up_hosts") - def test_get_all_host_states_returns_cached_value(self, mock_up_hosts): - self.driver.all_host_states = {uuids.cell: []} - - self.driver._get_all_host_states(self.context, None, - mock.sentinel.provider_uuids) - - self.assertFalse(mock_up_hosts.called) - self.assertEqual({uuids.cell: []}, self.driver.all_host_states) - - @mock.patch.object(caching_scheduler.CachingScheduler, - "_get_up_hosts") - def test_get_all_host_states_loads_hosts(self, mock_up_hosts): - host_state = self._get_fake_host_state() - mock_up_hosts.return_value = {uuids.cell: [host_state]} - - result = self.driver._get_all_host_states(self.context, None, - mock.sentinel.provider_uuids) - - self.assertTrue(mock_up_hosts.called) - self.assertEqual({uuids.cell: [host_state]}, - self.driver.all_host_states) - self.assertEqual([host_state], list(result)) - - def test_get_up_hosts(self): - with mock.patch.object(self.driver.host_manager, - "get_all_host_states") as mock_get_hosts: - host_state = self._get_fake_host_state() - mock_get_hosts.return_value = [host_state] - - result = self.driver._get_up_hosts(self.context) - - self.assertTrue(mock_get_hosts.called) - self.assertEqual({uuids.cell: [host_state]}, result) - - def test_select_destination_raises_with_no_hosts(self): - spec_obj = self._get_fake_request_spec() - self.driver.all_host_states = {uuids.cell: []} - - self.assertRaises(exception.NoValidHost, - self.driver.select_destinations, - self.context, spec_obj, [spec_obj.instance_uuid], - {}, {}) - - @mock.patch('nova.db.api.instance_extra_get_by_instance_uuid', - return_value={'numa_topology': None, - 'pci_requests': None}) - def test_select_destination_works(self, mock_get_extra): - spec_obj = self._get_fake_request_spec() - fake_host = self._get_fake_host_state() - self.driver.all_host_states = {uuids.cell: [fake_host]} - - result = self._test_select_destinations(spec_obj) - - self.assertEqual(1, len(result)) - self.assertEqual(result[0][0].service_host, fake_host.host) - - def _test_select_destinations(self, spec_obj): - provider_summaries = {} - for cell_hosts in self.driver.all_host_states.values(): - for hs in cell_hosts: - provider_summaries[hs.uuid] = hs - - return self.driver.select_destinations( - self.context, spec_obj, [spec_obj.instance_uuid], {}, - provider_summaries) - - def _get_fake_request_spec(self): - # NOTE(sbauza): Prevent to stub the Flavor.get_by_id call just by - # directly providing a Flavor object - flavor = objects.Flavor( - flavorid="small", - memory_mb=512, - root_gb=1, - ephemeral_gb=1, - vcpus=1, - swap=0, - ) - instance_properties = { - "os_type": "linux", - "project_id": "1234", - } - request_spec = objects.RequestSpec( - flavor=flavor, - num_instances=1, - ignore_hosts=None, - force_hosts=None, - force_nodes=None, - retry=None, - availability_zone=None, - image=None, - instance_group=None, - pci_requests=None, - numa_topology=None, - instance_uuid='aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa', - **instance_properties - ) - return request_spec - - def _get_fake_host_state(self, index=0): - host_state = host_manager.HostState( - 'host_%s' % index, - 'node_%s' % index, - uuids.cell) - host_state.uuid = getattr(uuids, 'host_%s' % index) - host_state.free_ram_mb = 50000 - host_state.total_usable_ram_mb = 50000 - host_state.free_disk_mb = 4096 - host_state.total_usable_disk_gb = 4 - host_state.service = { - "disabled": False, - "updated_at": timeutils.utcnow(), - "created_at": timeutils.utcnow(), - } - host_state.cpu_allocation_ratio = 16.0 - host_state.ram_allocation_ratio = 1.5 - host_state.disk_allocation_ratio = 1.0 - host_state.metrics = objects.MonitorMetricList(objects=[]) - host_state.failed_builds = 0 - return host_state - - @mock.patch('nova.db.api.instance_extra_get_by_instance_uuid', - return_value={'numa_topology': None, - 'pci_requests': None}) - def test_performance_check_select_destination(self, mock_get_extra): - hosts = 2 - requests = 1 - - self.flags(service_down_time=240) - - spec_obj = self._get_fake_request_spec() - host_states = [] - for x in range(hosts): - host_state = self._get_fake_host_state(x) - host_states.append(host_state) - self.driver.all_host_states = {uuids.cell: host_states} - provider_summaries = {hs.uuid: hs for hs in host_states} - - def run_test(): - a = timeutils.utcnow() - - for x in range(requests): - self.driver.select_destinations(self.context, spec_obj, - [spec_obj.instance_uuid], {}, provider_summaries) - - b = timeutils.utcnow() - c = b - a - - seconds = (c.days * 24 * 60 * 60 + c.seconds) - microseconds = seconds * 1000 + c.microseconds / 1000.0 - per_request_ms = microseconds / requests - return per_request_ms - - per_request_ms = None - if ENABLE_PROFILER: - import pycallgraph - from pycallgraph import output - config = pycallgraph.Config(max_depth=10) - config.trace_filter = pycallgraph.GlobbingFilter(exclude=[ - 'pycallgraph.*', - 'unittest.*', - 'testtools.*', - 'nova.tests.unit.*', - ]) - graphviz = output.GraphvizOutput(output_file='scheduler.png') - - with pycallgraph.PyCallGraph(output=graphviz): - per_request_ms = run_test() - - else: - per_request_ms = run_test() - - # This has proved to be around 1 ms on a random dev box - # But this is here so you can do simply performance testing easily. - self.assertLess(per_request_ms, 1000) - - def test_request_single_cell(self): - spec_obj = self._get_fake_request_spec() - spec_obj.requested_destination = objects.Destination( - cell=objects.CellMapping(uuid=uuids.cell2)) - host_states_cell1 = [self._get_fake_host_state(i) - for i in range(1, 5)] - host_states_cell2 = [self._get_fake_host_state(i) - for i in range(5, 10)] - - self.driver.all_host_states = { - uuids.cell1: host_states_cell1, - uuids.cell2: host_states_cell2, - } - provider_summaries = { - cn.uuid: cn for cn in host_states_cell1 + host_states_cell2 - } - - d = self.driver.select_destinations(self.context, spec_obj, - [spec_obj.instance_uuid], {}, provider_summaries) - self.assertIn(d[0][0].service_host, - [hs.host for hs in host_states_cell2]) - - @mock.patch("nova.scheduler.host_manager.HostState.consume_from_request") - @mock.patch("nova.scheduler.caching_scheduler.CachingScheduler." - "_get_sorted_hosts") - @mock.patch("nova.scheduler.caching_scheduler.CachingScheduler." - "_get_all_host_states") - def test_alternates_same_cell(self, mock_get_all_hosts, mock_sorted, - mock_consume): - """Tests getting hosts plus alternates where the hosts are spread - across two cells. - """ - all_host_states = [] - for num in range(10): - host_name = "host%s" % num - cell_uuid = uuids.cell1 if num % 2 else uuids.cell2 - hs = host_manager.HostState(host_name, "node%s" % num, - cell_uuid) - hs.uuid = getattr(uuids, host_name) - all_host_states.append(hs) - - mock_get_all_hosts.return_value = all_host_states - # There are two instances, so _get_sorted_hosts will be called once - # per instance, and then once again before picking alternates. - mock_sorted.side_effect = [all_host_states, - list(reversed(all_host_states)), - all_host_states] - total_returned = 3 - self.flags(max_attempts=total_returned, group="scheduler") - instance_uuids = [uuids.inst1, uuids.inst2] - num_instances = len(instance_uuids) - - spec_obj = objects.RequestSpec( - num_instances=num_instances, - flavor=objects.Flavor(memory_mb=512, - root_gb=512, - ephemeral_gb=0, - swap=0, - vcpus=1), - project_id=uuids.project_id, - instance_group=None) - - dests = self.driver._schedule(self.context, spec_obj, - instance_uuids, None, None, return_alternates=True) - # There should be max_attempts hosts per instance (1 selected, 2 alts) - self.assertEqual(total_returned, len(dests[0])) - self.assertEqual(total_returned, len(dests[1])) - # Verify that the two selected hosts are not in the same cell. - self.assertNotEqual(dests[0][0].cell_uuid, dests[1][0].cell_uuid) - for dest in dests: - selected_host = dest[0] - selected_cell_uuid = selected_host.cell_uuid - for alternate in dest[1:]: - self.assertEqual(alternate.cell_uuid, selected_cell_uuid) - - -if __name__ == '__main__': - # A handy tool to help profile the schedulers performance - ENABLE_PROFILER = True - import testtools - suite = testtools.ConcurrentTestSuite() - test = "test_performance_check_select_destination" - test_case = CachingSchedulerTestCase(test) - suite.addTest(test_case) - runner = testtools.TextTestResult.TextTestRunner() - runner.run(suite) diff --git a/nova/tests/unit/scheduler/test_scheduler.py b/nova/tests/unit/scheduler/test_scheduler.py index d1f7defc0dbe..513ce28598c8 100644 --- a/nova/tests/unit/scheduler/test_scheduler.py +++ b/nova/tests/unit/scheduler/test_scheduler.py @@ -23,7 +23,6 @@ from oslo_utils.fixture import uuidsentinel as uuids from nova import context from nova import objects -from nova.scheduler import caching_scheduler from nova.scheduler import filter_scheduler from nova.scheduler import host_manager from nova.scheduler import manager @@ -45,15 +44,6 @@ class SchedulerManagerInitTestCase(test.NoDBTestCase): driver = self.manager_cls().driver self.assertIsInstance(driver, filter_scheduler.FilterScheduler) - @mock.patch.object(host_manager.HostManager, '_init_instance_info') - @mock.patch.object(host_manager.HostManager, '_init_aggregates') - def test_init_using_caching_schedulerdriver(self, - mock_init_agg, - mock_init_inst): - self.flags(driver='caching_scheduler', group='scheduler') - driver = self.manager_cls().driver - self.assertIsInstance(driver, caching_scheduler.CachingScheduler) - @mock.patch.object(host_manager.HostManager, '_init_instance_info') @mock.patch.object(host_manager.HostManager, '_init_aggregates') def test_init_nonexist_schedulerdriver(self, diff --git a/playbooks/legacy/nova-caching-scheduler/post.yaml b/playbooks/legacy/nova-caching-scheduler/post.yaml deleted file mode 100644 index e07f5510ae70..000000000000 --- a/playbooks/legacy/nova-caching-scheduler/post.yaml +++ /dev/null @@ -1,15 +0,0 @@ -- hosts: primary - tasks: - - - name: Copy files from {{ ansible_user_dir }}/workspace/ on node - synchronize: - src: '{{ ansible_user_dir }}/workspace/' - dest: '{{ zuul.executor.log_root }}' - mode: pull - copy_links: true - verify_host: true - rsync_opts: - - --include=/logs/** - - --include=*/ - - --exclude=* - - --prune-empty-dirs diff --git a/playbooks/legacy/nova-caching-scheduler/run.yaml b/playbooks/legacy/nova-caching-scheduler/run.yaml deleted file mode 100644 index d3bc6e28fe53..000000000000 --- a/playbooks/legacy/nova-caching-scheduler/run.yaml +++ /dev/null @@ -1,59 +0,0 @@ -- hosts: all - name: nova-caching-scheduler - tasks: - - - name: Ensure workspace directory - file: - path: '{{ ansible_user_dir }}/workspace' - state: directory - - - shell: - cmd: | - set -e - set -x - cat > clonemap.yaml << EOF - clonemap: - - name: openstack-infra/devstack-gate - dest: devstack-gate - EOF - /usr/zuul-env/bin/zuul-cloner -m clonemap.yaml --cache-dir /opt/git \ - git://git.openstack.org \ - openstack-infra/devstack-gate - executable: /bin/bash - chdir: '{{ ansible_user_dir }}/workspace' - environment: '{{ zuul | zuul_legacy_vars }}' - - - shell: - # The scheduler.periodic_task_interval must be less than the - # service_down_time (defaults to 60) otherwise the ComputeFilter - # will be using potentially stale compute service information and - # think that the compute service is down when it's really not. - cmd: | - set -e - set -x - cat << 'EOF' >>"/tmp/dg-local.conf" - [[local|localrc]] - SCHEDULER=caching_scheduler - [[post-config|$NOVA_CONF]] - [scheduler] - periodic_task_interval=30 - - EOF - executable: /bin/bash - chdir: '{{ ansible_user_dir }}/workspace' - environment: '{{ zuul | zuul_legacy_vars }}' - - - shell: - cmd: | - set -e - set -x - export PYTHONUNBUFFERED=true - # Yes we want to run Tempest. - export DEVSTACK_GATE_TEMPEST=1 - # Run with our blacklist. - export DEVSTACK_GATE_SETTINGS=/opt/stack/new/nova/devstack/tempest-dsvm-caching-scheduler-rc - cp devstack-gate/devstack-vm-gate-wrap.sh ./safe-devstack-vm-gate-wrap.sh - ./safe-devstack-vm-gate-wrap.sh - executable: /bin/bash - chdir: '{{ ansible_user_dir }}/workspace' - environment: '{{ zuul | zuul_legacy_vars }}' diff --git a/releasenotes/notes/remove-caching-scheduler-cfe0985b5a58bef4.yaml b/releasenotes/notes/remove-caching-scheduler-cfe0985b5a58bef4.yaml new file mode 100644 index 000000000000..201e7fae4ca5 --- /dev/null +++ b/releasenotes/notes/remove-caching-scheduler-cfe0985b5a58bef4.yaml @@ -0,0 +1,46 @@ +--- +upgrade: + - | + The ``caching_scheduler`` scheduler driver, which was deprecated in the + 16.0.0 Pike release, has now been removed. Unlike the default + ``filter_scheduler`` scheduler driver which creates resource allocations + in the placement service during scheduling, the ``caching_scheduler`` + driver did not interface with the placement service. As more and more + functionality within nova relies on managing (sometimes complex) resource + allocations in the placement service, compatibility with the + ``caching_scheduler`` driver is difficult to maintain, and seldom tested. + The original reasons behind the need for the CachingScheduler should now + be resolved with the FilterScheduler and the placement service, notably: + + * resource claims (allocations) are made atomically during scheduling to + alleviate the potential for racing to concurrently build servers on the + same compute host which could lead to failures + * because of the atomic allocation claims made during scheduling by the + ``filter_scheduler`` driver, it is safe [1]_ to run multiple scheduler + workers and scale horizontally + + .. [1] There are still known race issues with concurrently building some + types of resources and workloads, such as anything that requires + PCI/NUMA or (anti-)affinity groups. However, those races also existed + with the ``caching_scheduler`` driver. + + To migrate from the CachingScheduler to the FilterScheduler, operators can + leverage the ``nova-manage placement heal_allocations`` command: + + https://docs.openstack.org/nova/latest/cli/nova-manage.html#placement + + Finally, it is still technically possible to load an out-of-tree scheduler + driver using the ``nova.scheduler.driver`` entry-point. However, + out-of-tree driver interfaces are not guaranteed to be stable: + + https://docs.openstack.org/nova/latest/contributor/policies.html#out-of-tree-support + + And as noted above, as more of the code base evolves to rely on resource + allocations being tracked in the placement service (created during + scheduling), out-of-tree scheduler driver support may be severely impacted. + + If you rely on the ``caching_scheduler`` driver or your own out-of-tree + driver which sets ``USES_ALLOCATION_CANDIDATES = False`` to bypass the + placement service, please communicate with the nova development team in + the openstack-dev mailing list and/or #openstack-nova freenode IRC channel + to determine what prevents you from using the ``filter_scheduler`` driver. \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 0fc5aa195011..de54a1141192 100644 --- a/setup.cfg +++ b/setup.cfg @@ -85,7 +85,6 @@ nova.ipv6_backend = nova.scheduler.driver = filter_scheduler = nova.scheduler.filter_scheduler:FilterScheduler - caching_scheduler = nova.scheduler.caching_scheduler:CachingScheduler fake_scheduler = nova.tests.unit.scheduler.fakes:FakeScheduler [egg_info]