Fix doubling allocations on rebuild

Commit 984dd8ad6a makes a rebuild
with a new image go through the scheduler again to validate the
image against the instance.host (we rebuild to the same host that
the instance already lives on). This fixes the subsequent doubling
of allocations that will occur by skipping the claim process if
a policy-only scheduler check is being performed.

Closes-Bug: #1732976

Related-CVE: CVE-2017-17051
Related-OSSA: OSSA-2017-006

NOTE(mriedem): This change removes the Pike-only workaround
added in 234ade29a3.

Change-Id: I8a9157bc76ba1068ab966c4abdbb147c500604a8
(cherry picked from commit 25a1d78e83)
This commit is contained in:
Dan Smith 2017-11-20 13:24:24 -08:00 committed by Matt Riedemann
parent 234ade29a3
commit fed660c118
4 changed files with 42 additions and 25 deletions

View File

@ -30,6 +30,7 @@ from nova.i18n import _
from nova import rpc
from nova.scheduler import client
from nova.scheduler import driver
from nova.scheduler import utils
CONF = nova.conf.CONF
LOG = logging.getLogger(__name__)
@ -271,6 +272,15 @@ class FilterScheduler(driver.Scheduler):
PUT /allocations/{consumer_uuid} call to claim
resources for the instance
"""
if utils.request_is_rebuild(spec_obj):
# NOTE(danms): This is a rebuild-only scheduling request, so we
# should not be doing any extra claiming
LOG.debug('Not claiming resources in the placement API for '
'rebuild-only scheduling of instance %(uuid)s',
{'uuid': instance_uuid})
return True
LOG.debug("Attempting to claim resources in the placement API for "
"instance %s", instance_uuid)

View File

@ -1065,14 +1065,6 @@ class ServerRebuildTestCase(integrated_helpers._IntegratedTestBase,
# can use to update image metadata via our compute images proxy API.
microversion = '2.38'
def setUp(self):
# We have to use the MediumFakeDriver for test_rebuild_with_new_image
# since the scheduler doubles the VCPU allocation until the bug is
# fixed.
# TODO(mriedem): Remove this once the bug is fixed.
self.flags(compute_driver='fake.MediumFakeDriver')
super(ServerRebuildTestCase, self).setUp()
# We need the ImagePropertiesFilter so override the base class setup
# which configures to use the chance_scheduler.
def _setup_scheduler_service(self):
@ -1182,15 +1174,6 @@ class ServerRebuildTestCase(integrated_helpers._IntegratedTestBase,
self.assertEqual(flavor['ram'], allocation['MEMORY_MB'])
self.assertEqual(flavor['disk'], allocation['DISK_GB'])
def assertFlavorsMatchAllocation(old_flavor, new_flavor,
allocation):
self.assertEqual(old_flavor['vcpus'] + new_flavor['vcpus'],
allocation['VCPU'])
self.assertEqual(old_flavor['ram'] + new_flavor['ram'],
allocation['MEMORY_MB'])
self.assertEqual(old_flavor['disk'] + new_flavor['disk'],
allocation['DISK_GB'])
rp_uuid = _get_provider_uuid()
# make sure we start with no usage on the compute node
rp_usages = _get_provider_usages(rp_uuid)
@ -1236,16 +1219,12 @@ class ServerRebuildTestCase(integrated_helpers._IntegratedTestBase,
# The usage and allocations should not have changed.
rp_usages = _get_provider_usages(rp_uuid)
# FIXME(mriedem): This is a bug where the scheduler doubled up the
# allocations for the instance even though we're just rebuilding
# to the same host. Uncomment this once fixed.
# assertFlavorMatchesAllocation(flavor, rp_usages)
assertFlavorsMatchAllocation(flavor, flavor, rp_usages)
assertFlavorMatchesAllocation(flavor, rp_usages)
allocs = _get_allocations_by_server_uuid(server['id'])
self.assertIn(rp_uuid, allocs)
allocs = allocs[rp_uuid]['resources']
# assertFlavorMatchesAllocation(flavor, allocs)
assertFlavorsMatchAllocation(flavor, flavor, allocs)
assertFlavorMatchesAllocation(flavor, allocs)
class ProviderUsageBaseTestCase(test.TestCase,

View File

@ -482,7 +482,7 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
instance.
"""
ctx = mock.Mock(user_id=uuids.user_id)
spec_obj = mock.Mock(project_id=uuids.project_id)
spec_obj = objects.RequestSpec(project_id=uuids.project_id)
instance_uuid = uuids.instance
alloc_reqs = [mock.sentinel.alloc_req]
@ -495,6 +495,16 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
mock.sentinel.alloc_req, uuids.project_id, uuids.user_id)
self.assertTrue(res)
@mock.patch('nova.scheduler.utils.request_is_rebuild')
def test_claim_resouces_for_policy_check(self, mock_policy):
mock_policy.return_value = True
res = self.driver._claim_resources(None, mock.sentinel.spec_obj,
mock.sentinel.instance_uuid,
[])
self.assertTrue(res)
mock_policy.assert_called_once_with(mock.sentinel.spec_obj)
self.assertFalse(self.placement_client.claim_resources.called)
def test_add_retry_host(self):
retry = dict(num_attempts=1, hosts=[])
filter_properties = dict(retry=retry)

View File

@ -0,0 +1,18 @@
---
security:
- |
`OSSA-2017-006`_: Nova FilterScheduler doubles resource allocations during
rebuild with new image (CVE-2017-17051)
By repeatedly rebuilding an instance with new images, an authenticated user
may consume untracked resources on a hypervisor host leading to a denial of
service. This regression was introduced with the fix for `OSSA-2017-005`_
(CVE-2017-16239), however, only Nova stable/pike or later deployments with
that fix applied and relying on the default FilterScheduler are affected.
The fix is in the `nova-api` and `nova-scheduler` services.
.. note:: The fix for errata in `OSSA-2017-005`_ (CVE-2017-16239) will
need to be applied in addition to this fix.
.. _OSSA-2017-006: https://security.openstack.org/ossa/OSSA-2017-006.html