Merge "rework memoized as a LRU cache with expiry"

2018-11-02 19:18:55 +00:00 · 2018-11-02 19:18:55 +00:00 · 19827011d8
parent e211eb4cda f3094e6f83
commit 19827011d8
4 changed files with 132 additions and 45 deletions
--- a/doc/source/configuration/settings.rst
+++ b/doc/source/configuration/settings.rst
@ -809,6 +809,18 @@ This SESSION_TIMEOUT is a method to supercede the token timeout with a shorter
 horizon session timeout (in seconds).  So if your token expires in 60 minutes,
 a value of 1800 will log users out after 30 minutes.

+MEMOIZED_MAX_SIZE_DEFAULT
+-------------------------
+
+.. versionadded:: 15.0.0(Stein)
+
+Default: ``"25"``
+
+MEMOIZED_MAX_SIZE_DEFAULT allows setting a global default to help control
+memory usage when caching. It should at least be 2 x the number of threads
+with a little bit of extra buffer.
+
+
 SHOW_KEYSTONE_V2_RC
 --------------------

--- a/horizon/test/unit/utils/test_memoized.py
+++ b/horizon/test/unit/utils/test_memoized.py
@ -35,3 +35,48 @@ class MemoizedTests(test.TestCase):
        for x in range(0, 5):
            cache_calls(1)
        self.assertEqual(1, len(values_list))
+
+    def test_memoized_decorator_cache_with_LRU(self):
+        values_list = []
+
+        @memoized.memoized(max_size=5)
+        def cache_calls(param):
+            values_list.append(param)
+            return param
+
+        def non_cached_calls(param):
+            values_list.append(param)
+            return param
+
+        for x in range(0, 5):
+            non_cached_calls(1)
+        self.assertEqual(5, len(values_list))
+
+        values_list = []
+        for x in range(0, 5):
+            cache_calls(1)
+        self.assertEqual(1, len(values_list))
+        # cache only has value for key with 1
+
+        for x in range(2, 6):
+            cache_calls(x)
+        self.assertEqual(5, len(values_list))
+        # cache has has 5 values, with 1 as least recently used key
+        cache_calls(6)
+        self.assertEqual(6, len(values_list))
+        # 1 is popped off cache
+        cache_calls(1)
+        self.assertEqual(7, len(values_list))
+        # 1 is readded and cached 2 will be popped off
+
+        cache_calls(3)
+        self.assertEqual(7, len(values_list))
+        # 3 was least recently used, but has now been updated to most recent
+
+        cache_calls(2)
+        self.assertEqual(8, len(values_list))
+        # 2 is readded, 4 is dropped
+
+        cache_calls(4)
+        self.assertEqual(9, len(values_list))
+        # 4 is readded, 5 is dropped
--- a/horizon/utils/memoized.py
+++ b/horizon/utils/memoized.py
@ -18,6 +18,8 @@ import threading
 import warnings
 import weakref

+from django.conf import settings
+

 class UnhashableKeyWarning(RuntimeWarning):
    """Raised when trying to memoize a function with an unhashable argument."""
@ -46,65 +48,88 @@ def _get_key(args, kwargs, remove_callback):
    return weak_args, weak_kwargs


-def memoized(func):
+def memoized(func=None, max_size=None):
    """Decorator that caches function calls.

    Caches the decorated function's return value the first time it is called
    with the given arguments.  If called later with the same arguments, the
    cached value is returned instead of calling the decorated function again.

+    It operates as a LRU cache and keeps up to the max_size value of cached
+    items, always clearing oldest items first.
+
    The cache uses weak references to the passed arguments, so it doesn't keep
    them alive in memory forever.
    """
-    # The dictionary in which all the data will be cached. This is a separate
-    # instance for every decorated function, and it's stored in a closure of
-    # the wrapped function.
-    cache = {}
-    locks = collections.defaultdict(threading.Lock)

-    @functools.wraps(func)
-    def wrapped(*args, **kwargs):
-        # We need to have defined key early, to be able to use it in the
-        # remove() function, but we calculate the actual value of the key
-        # later on, because we need the remove() function for that.
-        key = None
+    def decorate(func):

-        def remove(ref):
-            """A callback to remove outdated items from cache."""
-            try:
-                # The key here is from closure, and is calculated later.
-                del cache[key]
-                del locks[key]
-            except KeyError:
-                # Some other weak reference might have already removed that
-                # key -- in that case we don't need to do anything.
-                pass
+        # The dictionary in which all the data will be cached. This is a
+        # separate instance for every decorated function, and it's stored in a
+        # closure of the wrapped function.
+        cache = collections.OrderedDict()
+        locks = collections.defaultdict(threading.Lock)
+        if max_size:
+            max_cache_size = max_size
+        else:
+            max_cache_size = getattr(settings, 'MEMOIZED_MAX_SIZE_DEFAULT', 25)

-        key = _get_key(args, kwargs, remove)
-        try:
-            with locks[key]:
+        @functools.wraps(func)
+        def wrapped(*args, **kwargs):
+            # We need to have defined key early, to be able to use it in the
+            # remove() function, but we calculate the actual value of the key
+            # later on, because we need the remove() function for that.
+            key = None
+
+            def remove(ref):
+                """A callback to remove outdated items from cache."""
                try:
-                    # We want cache hit to be as fast as possible, and don't
-                    # really care much about the speed of a cache miss, because
-                    # it will only happen once and likely calls some external
-                    # API, database, or some other slow thing. That's why the
-                    # hit is in straightforward code, and the miss is in an
-                    # exception.
-                    value = cache[key]
+                    # The key here is from closure, and is calculated later.
+                    del cache[key]
+                    del locks[key]
                except KeyError:
-                    value = cache[key] = func(*args, **kwargs)
-        except TypeError:
-            # The calculated key may be unhashable when an unhashable
-            # object, such as a list, is passed as one of the arguments. In
-            # that case, we can't cache anything and simply always call the
-            # decorated function.
-            warnings.warn(
-                "The key of %s %s is not hashable and cannot be memoized: %r\n"
-                % (func.__module__, func.__name__, key),
-                UnhashableKeyWarning, 2)
-            value = func(*args, **kwargs)
-        return value
-    return wrapped
+                    # Some other weak reference might have already removed that
+                    # key -- in that case we don't need to do anything.
+                    pass
+
+            key = _get_key(args, kwargs, remove)
+            try:
+                with locks[key]:
+                    try:
+                        # We want cache hit to be as fast as possible, and
+                        # don't really care much about the speed of a cache
+                        # miss, because it will only happen once and likely
+                        # calls some external API, database, or some other slow
+                        # thing. That's why the hit is in straightforward code,
+                        # and the miss is in an exception.
+                        # We also want to pop the key and reset it to make sure
+                        # the position it has in the order updates.
+                        value = cache[key] = cache.pop(key)
+                    except KeyError:
+                        value = cache[key] = func(*args, **kwargs)
+            except TypeError:
+                # The calculated key may be unhashable when an unhashable
+                # object, such as a list, is passed as one of the arguments. In
+                # that case, we can't cache anything and simply always call the
+                # decorated function.
+                warnings.warn(
+                    "The key of %s %s is not hashable and cannot be memoized: "
+                    "%r\n" % (func.__module__, func.__name__, key),
+                    UnhashableKeyWarning, 2)
+                value = func(*args, **kwargs)
+
+            while len(cache) > max_cache_size:
+                try:
+                    popped_tuple = cache.popitem(last=False)
+                    locks.pop(popped_tuple[0], None)
+                except KeyError:
+                    pass
+
+            return value
+        return wrapped
+    if func and callable(func):
+        return decorate(func)
+    return decorate

 # We can use @memoized for methods now too, because it uses weakref and so
 # it doesn't keep the instances in memory forever. We might want to separate
--- a/openstack_dashboard/settings.py
+++ b/openstack_dashboard/settings.py
@ -216,6 +216,11 @@ SESSION_COOKIE_MAX_SIZE = 4093
 #                https://bugs.launchpad.net/horizon/+bug/1349463
 SESSION_SERIALIZER = 'django.contrib.sessions.serializers.PickleSerializer'

+# MEMOIZED_MAX_SIZE_DEFAULT allows setting a global default to help control
+# memory usage when caching. It should at least be 2 x the number of threads
+# with a little bit of extra buffer.
+MEMOIZED_MAX_SIZE_DEFAULT = 25
+
 CSRF_FAILURE_VIEW = 'openstack_dashboard.views.csrf_failure'

 LANGUAGES = (