Add logic to enforce local api and db limits

Local API and DB limits are limits on resources that are counted either as an API request parameter (example: server metadata items) or as records in the database (example: server key pairs). Future patches will make use of this logic, and actually enforce the limits. This patch just adds the infrastructure to allow for the enforcement of the limits. We are moving all existing quotas to be managed via Keystone's unified limits. To stop confusion between injected_file_path_length and injected_file_path_bytes, the unified limit in Keystone will use the latter name to match the name used the API. These local limits are all about preventing excessive load on the API and database and have little to do with resource usage. These limits are represented by keystone registered limits only, accordingly. Local limits include things that just limit items in an API request: * metadata_items * injected_files * injected_file_content_bytes * injected_file_path_bytes Local limits also include things that are stored in the database: * key_pairs * server_groups * server_group_members Some resource names have been changed to prepend a prefix of "server_" in order to disambiguate them from other potential unified limits in keystone: * metadata_items => server_metadata_items * injected_files => server_injected_files * injected_file_content_bytes => server_injected_file_content_bytes * injected_file_path_bytes => server_injected_file_path_bytes * key_pairs => server_key_pairs Note that each of the above are counted via a different scope. This new code ensures that key_pairs are counted per user, server_groups are counted per project and server_group_members are counted per server_group. Note: Previously server_group_member were counted per user inside each server_group, which has proved very confusing, as adding more users into a project increases the maximum size of allowed for a server_group. blueprint unified-limits-nova Change-Id: I0b6f4d29aaee1d71541a95cbecfd0708aac325d2
2020-03-10 09:46:49 +00:00 · 2020-03-10 09:46:49 +00:00 · 3b69f959a8
parent a0c2bd4176
commit 3b69f959a8
8 changed files with 393 additions and 0 deletions
--- a/lower-constraints.txt
+++ b/lower-constraints.txt
@ -73,6 +73,7 @@ oslo.context==3.4.0
 oslo.db==10.0.0
 oslo.i18n==5.1.0
 oslo.log==4.6.1
+oslo.limit==1.5.0
 oslo.messaging==10.3.0
 oslo.middleware==3.31.0
 oslo.policy==3.7.0
--- a/mypy-files.txt
+++ b/mypy-files.txt
@ -1,5 +1,6 @@
 nova/compute/manager.py
 nova/crypto.py
+nova/limit/local.py
 nova/network/neutron.py
 nova/pci
 nova/privsep/path.py
--- a/nova/exception.py
+++ b/nova/exception.py
@ -135,6 +135,10 @@ class GlanceConnectionFailed(NovaException):
        "%(reason)s")


+class KeystoneConnectionFailed(NovaException):
+    msg_fmt = _("Connection to keystone host failed: %(reason)s")
+
+
 class CinderConnectionFailed(NovaException):
    msg_fmt = _("Connection to cinder host failed: %(reason)s")

@ -1281,6 +1285,14 @@ class PortLimitExceeded(OverQuota):
    msg_fmt = _("Maximum number of ports exceeded")


+class ServerGroupLimitExceeded(OverQuota):
+    msg_fmt = _("Quota exceeded, too many server groups.")
+
+
+class GroupMemberLimitExceeded(OverQuota):
+    msg_fmt = _("Quota exceeded, too many servers in group")
+
+
 class AggregateNotFound(NotFound):
    msg_fmt = _("Aggregate %(aggregate_id)s could not be found.")

--- a/nova/limit/init.py
+++ b/nova/limit/init.py
--- a/nova/limit/local.py
+++ b/nova/limit/local.py
@ -0,0 +1,174 @@
+# Copyright 2022 StackHPC
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import functools
+import typing as ty
+
+from oslo_limit import exception as limit_exceptions
+from oslo_limit import limit
+from oslo_log import log as logging
+
+import nova.conf
+from nova import context as nova_context
+from nova import exception
+from nova import objects
+
+LOG = logging.getLogger(__name__)
+CONF = nova.conf.CONF
+
+# Entity types for API Limits, same as names of config options prefixed with
+# "server_" to disambiguate them in keystone
+SERVER_METADATA_ITEMS = "server_metadata_items"
+INJECTED_FILES = "server_injected_files"
+INJECTED_FILES_CONTENT = "server_injected_file_content_bytes"
+INJECTED_FILES_PATH = "server_injected_file_path_bytes"
+API_LIMITS = set([
+    SERVER_METADATA_ITEMS,
+    INJECTED_FILES,
+    INJECTED_FILES_CONTENT,
+    INJECTED_FILES_PATH,
+])
+
+# Entity types for all DB limits, same as names of config options prefixed with
+# "server_" to disambiguate them in keystone
+KEY_PAIRS = "server_key_pairs"
+SERVER_GROUPS = "server_groups"
+SERVER_GROUP_MEMBERS = "server_group_members"
+
+# Checks only happen when we are using the unified limits driver
+UNIFIED_LIMITS_DRIVER = "nova.quota.UnifiedLimitsDriver"
+
+# Map entity types to the exception we raise in the case that the resource is
+# over the allowed limit. Each of these should be a subclass of
+# exception.OverQuota.
+EXCEPTIONS = {
+    KEY_PAIRS: exception.KeypairLimitExceeded,
+    INJECTED_FILES_CONTENT: exception.OnsetFileContentLimitExceeded,
+    INJECTED_FILES_PATH: exception.OnsetFilePathLimitExceeded,
+    INJECTED_FILES: exception.OnsetFileLimitExceeded,
+    SERVER_METADATA_ITEMS: exception.MetadataLimitExceeded,
+    SERVER_GROUPS: exception.ServerGroupLimitExceeded,
+    SERVER_GROUP_MEMBERS: exception.GroupMemberLimitExceeded,
+}
+
+
+def always_zero_usage(
+    project_id: str, resource_names: ty.List[str]
+) -> ty.Dict[str, int]:
+    """Called by oslo_limit's enforcer"""
+    # Return usage of 0 for API limits. Values in API requests will be used as
+    # the deltas.
+    return {resource_name: 0 for resource_name in resource_names}
+
+
+def enforce_api_limit(entity_type: str, count: int) -> None:
+    """Check if the values given are over the limit for that key.
+
+    This is generally used for limiting the size of certain API requests
+    that eventually get stored in the database.
+    """
+    if CONF.quota.driver != UNIFIED_LIMITS_DRIVER:
+        return
+
+    if entity_type not in API_LIMITS:
+        fmt = "%s is not a valid API limit: %s"
+        raise ValueError(fmt % (entity_type, API_LIMITS))
+
+    try:
+        enforcer = limit.Enforcer(always_zero_usage)
+    except limit_exceptions.SessionInitError as e:
+        msg = ("Failed to connect to keystone while enforcing %s quota limit."
+               % entity_type)
+        LOG.error(msg + " Error: " + str(e))
+        raise exception.KeystoneConnectionFailed(msg)
+
+    try:
+        enforcer.enforce(None, {entity_type: count})
+    except limit_exceptions.ProjectOverLimit as e:
+        # Copy the exception message to a OverQuota to propagate to the
+        # API layer.
+        raise EXCEPTIONS.get(entity_type, exception.OverQuota)(str(e))
+
+
+def enforce_db_limit(
+    context: nova_context.RequestContext,
+    entity_type: str,
+    entity_scope: ty.Any,
+    delta: int
+) -> None:
+    """Check provided delta does not put resource over limit.
+
+    Firstly we count the current usage given the specified scope.
+    We then add that count to the specified  delta to see if we
+    are over the limit for that kind of entity.
+
+    Note previously we used to recheck these limits.
+    However these are really soft DDoS protections,
+    not hard resource limits, so we don't do the recheck for these.
+
+    The scope is specific to the limit type:
+    * key_pairs scope is context.user_id
+    * server_groups scope is context.project_id
+    * server_group_members scope is server_group_uuid
+    """
+    if CONF.quota.driver != UNIFIED_LIMITS_DRIVER:
+        return
+
+    if entity_type not in DB_COUNT_FUNCTION.keys():
+        fmt = "%s does not have a DB count function defined: %s"
+        raise ValueError(fmt % (entity_type, DB_COUNT_FUNCTION.keys()))
+    if delta < 0:
+        raise ValueError("delta must be a positive integer")
+
+    count_function = DB_COUNT_FUNCTION[entity_type]
+
+    try:
+        enforcer = limit.Enforcer(
+            functools.partial(count_function, context, entity_scope))
+    except limit_exceptions.SessionInitError as e:
+        msg = ("Failed to connect to keystone while enforcing %s quota limit."
+               % entity_type)
+        LOG.error(msg + " Error: " + str(e))
+        raise exception.KeystoneConnectionFailed(msg)
+
+    try:
+        enforcer.enforce(None, {entity_type: delta})
+    except limit_exceptions.ProjectOverLimit as e:
+        # Copy the exception message to a OverQuota to propagate to the
+        # API layer.
+        raise EXCEPTIONS.get(entity_type, exception.OverQuota)(str(e))
+
+
+def _keypair_count(context, user_id, *args):
+    count = objects.KeyPairList.get_count_by_user(context, user_id)
+    return {'server_key_pairs': count}
+
+
+def _server_group_count(context, project_id, *args):
+    raw_counts = objects.InstanceGroupList.get_counts(context, project_id)
+    return {'server_groups': raw_counts['project']['server_groups']}
+
+
+def _server_group_members_count(context, server_group_uuid, *args):
+    # NOTE(johngarbutt) we used to count members added per user
+    server_group = objects.InstanceGroup.get_by_uuid(context,
+                                                     server_group_uuid)
+    return {'server_group_members': len(server_group.members)}
+
+
+DB_COUNT_FUNCTION = {
+    KEY_PAIRS: _keypair_count,
+    SERVER_GROUPS: _server_group_count,
+    SERVER_GROUP_MEMBERS: _server_group_members_count
+}
--- a/nova/tests/unit/limit/init.py
+++ b/nova/tests/unit/limit/init.py
--- a/nova/tests/unit/limit/test_local.py
+++ b/nova/tests/unit/limit/test_local.py
@ -0,0 +1,204 @@
+# Copyright 2022 StackHPC
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+import mock
+
+from oslo_config import cfg
+from oslo_limit import exception as limit_exceptions
+from oslo_limit import fixture as limit_fixture
+from oslo_utils.fixture import uuidsentinel as uuids
+
+from nova import context
+from nova import exception
+from nova.limit import local as local_limit
+from nova import objects
+from nova import test
+
+CONF = cfg.CONF
+
+
+class TestLocalLimits(test.NoDBTestCase):
+    def setUp(self):
+        super(TestLocalLimits, self).setUp()
+        self.flags(driver=local_limit.UNIFIED_LIMITS_DRIVER, group="quota")
+        self.context = context.RequestContext()
+
+    def test_enforce_api_limit_metadata(self):
+        # default max is 128
+        self.useFixture(limit_fixture.LimitFixture(
+            {local_limit.SERVER_METADATA_ITEMS: 128}, {}))
+        local_limit.enforce_api_limit(local_limit.SERVER_METADATA_ITEMS, 128)
+
+        e = self.assertRaises(exception.MetadataLimitExceeded,
+                              local_limit.enforce_api_limit,
+                              local_limit.SERVER_METADATA_ITEMS, 129)
+        msg = ("Resource %s is over limit" % local_limit.SERVER_METADATA_ITEMS)
+        self.assertIn(msg, str(e))
+
+    def test_enforce_api_limit_skip(self):
+        self.flags(driver="nova.quota.NoopQuotaDriver", group="quota")
+        local_limit.enforce_api_limit(local_limit.SERVER_METADATA_ITEMS, 200)
+
+    @mock.patch('oslo_limit.limit.Enforcer')
+    def test_enforce_api_limit_session_init_error(self, mock_util):
+        mock_util.side_effect = limit_exceptions.SessionInitError('error')
+
+        e = self.assertRaises(exception.KeystoneConnectionFailed,
+                              local_limit.enforce_api_limit,
+                              local_limit.SERVER_METADATA_ITEMS, 42)
+        expected = ('Failed to connect to keystone while enforcing '
+                    'server_metadata_items quota limit.')
+        self.assertIn(expected, str(e))
+
+    def test_enforce_api_limit_raises_for_invalid_entity(self):
+        e = self.assertRaises(ValueError,
+                              local_limit.enforce_api_limit,
+                              local_limit.KEY_PAIRS, 42)
+        expected = '%s is not a valid API limit: %s' % (
+            local_limit.KEY_PAIRS, local_limit.API_LIMITS)
+        self.assertEqual(expected, str(e))
+
+    def test_enforce_api_limit_no_registered_limit_found(self):
+        self.useFixture(limit_fixture.LimitFixture({}, {}))
+        e = self.assertRaises(exception.MetadataLimitExceeded,
+                              local_limit.enforce_api_limit,
+                              local_limit.SERVER_METADATA_ITEMS, 42)
+        msg = ("Resource %s is over limit" % local_limit.SERVER_METADATA_ITEMS)
+        self.assertIn(msg, str(e))
+
+    def test_enforce_injected_files(self):
+        reglimits = {local_limit.INJECTED_FILES: 5,
+                     local_limit.INJECTED_FILES_CONTENT: 10 * 1024,
+                     local_limit.INJECTED_FILES_PATH: 255}
+        self.useFixture(limit_fixture.LimitFixture(reglimits, {}))
+
+        local_limit.enforce_api_limit(local_limit.INJECTED_FILES, 5)
+        local_limit.enforce_api_limit(local_limit.INJECTED_FILES_CONTENT,
+                                      10 * 1024)
+        local_limit.enforce_api_limit(local_limit.INJECTED_FILES_PATH, 255)
+
+        e = self.assertRaises(exception.OnsetFileLimitExceeded,
+                              local_limit.enforce_api_limit,
+                              local_limit.INJECTED_FILES, 6)
+        msg = ("Resource %s is over limit" % local_limit.INJECTED_FILES)
+        self.assertIn(msg, str(e))
+        e = self.assertRaises(exception.OnsetFileContentLimitExceeded,
+                              local_limit.enforce_api_limit,
+                              local_limit.INJECTED_FILES_CONTENT,
+                              10 * 1024 + 1)
+        msg = (
+            "Resource %s is over limit" % local_limit.INJECTED_FILES_CONTENT)
+        self.assertIn(msg, str(e))
+        e = self.assertRaises(exception.OnsetFilePathLimitExceeded,
+                              local_limit.enforce_api_limit,
+                              local_limit.INJECTED_FILES_PATH, 256)
+        msg = ("Resource %s is over limit" % local_limit.INJECTED_FILES_PATH)
+        self.assertIn(msg, str(e))
+
+    @mock.patch.object(objects.KeyPairList, "get_count_by_user")
+    def test_enforce_db_limit_keypairs(self, mock_count):
+        self.useFixture(limit_fixture.LimitFixture(
+            {local_limit.KEY_PAIRS: 100}, {}))
+
+        mock_count.return_value = 99
+        local_limit.enforce_db_limit(self.context, local_limit.KEY_PAIRS,
+                                     uuids.user_id, 1)
+        mock_count.assert_called_once_with(self.context, uuids.user_id)
+
+        self.assertRaises(exception.KeypairLimitExceeded,
+                          local_limit.enforce_db_limit,
+                          self.context, local_limit.KEY_PAIRS,
+                          uuids.user_id, 2)
+
+        mock_count.return_value = 100
+        local_limit.enforce_db_limit(self.context, local_limit.KEY_PAIRS,
+                                     uuids.user_id, 0)
+        mock_count.return_value = 101
+        self.assertRaises(exception.KeypairLimitExceeded,
+                          local_limit.enforce_db_limit,
+                          self.context, local_limit.KEY_PAIRS,
+                          uuids.user_id, 0)
+
+    def test_enforce_db_limit_skip(self):
+        self.flags(driver="nova.quota.NoopQuotaDriver", group="quota")
+        local_limit.enforce_db_limit(self.context, local_limit.KEY_PAIRS,
+                                     uuids.user_id, 1)
+
+    @mock.patch('oslo_limit.limit.Enforcer')
+    def test_enforce_db_limit_session_init_error(self, mock_util):
+        mock_util.side_effect = limit_exceptions.SessionInitError(
+            test.TestingException())
+
+        e = self.assertRaises(exception.KeystoneConnectionFailed,
+                              local_limit.enforce_db_limit, self.context,
+                              local_limit.KEY_PAIRS, uuids.user_id, 42)
+        expected = ('Failed to connect to keystone while enforcing '
+                    'server_key_pairs quota limit.')
+        self.assertEqual(expected, str(e))
+
+    def test_enforce_db_limit_raise_on_invalid(self):
+        e = self.assertRaises(ValueError, local_limit.enforce_db_limit,
+                              self.context, local_limit.INJECTED_FILES,
+                              uuids.user_id, 1)
+        fmt = '%s does not have a DB count function defined: %s'
+        expected = fmt % (
+            local_limit.INJECTED_FILES, local_limit.DB_COUNT_FUNCTION.keys())
+        self.assertEqual(expected, str(e))
+
+    @mock.patch.object(objects.KeyPairList, "get_count_by_user")
+    def test_enforce_db_limit_no_registered_limit_found(self, mock_count):
+        self.useFixture(limit_fixture.LimitFixture({}, {}))
+        mock_count.return_value = 5
+        e = self.assertRaises(exception.KeypairLimitExceeded,
+                              local_limit.enforce_db_limit, self.context,
+                              local_limit.KEY_PAIRS, uuids.user_id, 42)
+        msg = ("Resource %s is over limit" % local_limit.KEY_PAIRS)
+        self.assertIn(msg, str(e))
+
+    def test_enforce_db_limit_raise_bad_delta(self):
+        e = self.assertRaises(ValueError, local_limit.enforce_db_limit,
+                              self.context, local_limit.KEY_PAIRS,
+                              uuids.user_id, -1)
+        self.assertEqual("delta must be a positive integer", str(e))
+
+    @mock.patch.object(objects.InstanceGroupList, "get_counts")
+    def test_enforce_db_limit_server_groups(self, mock_count):
+        self.useFixture(limit_fixture.LimitFixture(
+            {local_limit.SERVER_GROUPS: 10}, {}))
+
+        mock_count.return_value = {'project': {'server_groups': 9}}
+        local_limit.enforce_db_limit(self.context, local_limit.SERVER_GROUPS,
+                                     uuids.project_id, 1)
+        mock_count.assert_called_once_with(self.context, uuids.project_id)
+
+        self.assertRaises(exception.ServerGroupLimitExceeded,
+                          local_limit.enforce_db_limit,
+                          self.context, local_limit.SERVER_GROUPS,
+                          uuids.project_id, 2)
+
+    @mock.patch.object(objects.InstanceGroup, "get_by_uuid")
+    def test_enforce_db_limit_server_group_members(self, mock_get):
+        self.useFixture(limit_fixture.LimitFixture(
+            {local_limit.SERVER_GROUP_MEMBERS: 10}, {}))
+
+        mock_get.return_value = objects.InstanceGroup(members=[])
+        local_limit.enforce_db_limit(self.context,
+                                     local_limit.SERVER_GROUP_MEMBERS,
+                                     uuids.server_group, 10)
+        mock_get.assert_called_once_with(self.context, uuids.server_group)
+
+        self.assertRaises(exception.GroupMemberLimitExceeded,
+                          local_limit.enforce_db_limit,
+                          self.context, local_limit.SERVER_GROUP_MEMBERS,
+                          uuids.server_group, 11)
--- a/requirements.txt
+++ b/requirements.txt
@ -31,6 +31,7 @@ oslo.concurrency>=4.5.0 # Apache-2.0
 oslo.config>=8.6.0 # Apache-2.0
 oslo.context>=3.4.0 # Apache-2.0
 oslo.log>=4.6.1 # Apache-2.0
+oslo.limit>=1.5.0 # Apache-2.0
 oslo.reports>=1.18.0 # Apache-2.0
 oslo.serialization>=4.2.0 # Apache-2.0
 oslo.upgradecheck>=1.3.0