From c31dd1aaac0a1dd8ca3f77b6da911ae85de6dc7a Mon Sep 17 00:00:00 2001 From: Morgan Fainberg Date: Fri, 22 Mar 2019 12:35:16 -0700 Subject: [PATCH] Pass `flush_on_reconnect` to memcache pooled backend If a memcache server disappears and then reconnects when multiple memcache servers are used (specific to the python-memcached based backends) it is possible that the server will contain stale data. The default is now to supply the ``flush_on_reconnect`` optional argument to the backend. This means that when the service connects to a memcache server, it will flush all cached data in the server. The pooled backend is more likely to run into issues with this as it does not explicitly use a thread.local for the client. The non-pooled backend was not touched, it is not the recommended production use-case. See the help from python-memcached: @param flush_on_reconnect: optional flag which prevents a scenario that can cause stale data to be read: If there's more than one memcached server and the connection to one is interrupted, keys that mapped to that server will get reassigned to another. If the first server comes back, those keys will map to it again. If it still has its data, get()s can read stale data that was overwritten on another server. This flag is off by default for backwards compatibility. Change-Id: I3e335261f749ad065e8abe972f4ac476d334e6b3 closes-bug: #1819957 (cherry picked from commit 1192f185a5fd2fa6177655f157146488a3de81d1) --- oslo_cache/_memcache_pool.py | 22 ++++++++++++++++- .../notes/bug-1819957-ccff6b0ec9d1cbf2.yaml | 24 +++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/bug-1819957-ccff6b0ec9d1cbf2.yaml diff --git a/oslo_cache/_memcache_pool.py b/oslo_cache/_memcache_pool.py index e4411dcb..3193e560 100644 --- a/oslo_cache/_memcache_pool.py +++ b/oslo_cache/_memcache_pool.py @@ -215,7 +215,27 @@ class MemcacheClientPool(ConnectionPool): self._hosts_deaduntil = [0] * len(urls) def _create_connection(self): - return _MemcacheClient(self.urls, **self._arguments) + # NOTE(morgan): Explicitly set flush_on_reconnect for pooled + # connections. This should ensure that stale data is never consumed + # from a server that pops in/out due to a network partition + # or disconnect. + # + # See the help from python-memcached: + # + # param flush_on_reconnect: optional flag which prevents a + # scenario that can cause stale data to be read: If there's more + # than one memcached server and the connection to one is + # interrupted, keys that mapped to that server will get + # reassigned to another. If the first server comes back, those + # keys will map to it again. If it still has its data, get()s + # can read stale data that was overwritten on another + # server. This flag is off by default for backwards + # compatibility. + # + # The normal non-pooled clients connect explicitly on each use and + # does not need the explicit flush_on_reconnect + return _MemcacheClient(self.urls, flush_on_reconnect=True, + **self._arguments) def _destroy_connection(self, conn): conn.disconnect_all() diff --git a/releasenotes/notes/bug-1819957-ccff6b0ec9d1cbf2.yaml b/releasenotes/notes/bug-1819957-ccff6b0ec9d1cbf2.yaml new file mode 100644 index 00000000..c2d54282 --- /dev/null +++ b/releasenotes/notes/bug-1819957-ccff6b0ec9d1cbf2.yaml @@ -0,0 +1,24 @@ +--- +fixes: + - | + [`bug 1819957 `_] + If a memcache server disappears and then reconnects when multiple memcache + servers are used (specific to the python-memcached based backends) it is + possible that the server will contain stale data. The default is now to + supply the ``flush_on_reconnect`` optional argument to the backend. This + means that when the service connects to a memcache server, it will flush + all cached data in the server. This change only impacts the pooled backend + as it is the most likely (with heavy use of greenlet) to be impacted + by the problem and is the recommended production configuration. + + See the help from python-memcached: + + @param flush_on_reconnect: optional flag which prevents a + scenario that can cause stale data to be read: If there's more + than one memcached server and the connection to one is + interrupted, keys that mapped to that server will get + reassigned to another. If the first server comes back, those + keys will map to it again. If it still has its data, get()s + can read stale data that was overwritten on another + server. This flag is off by default for backwards + compatibility.