Do not hardcode flush_on_reconnect, move to oslo.cache config

Param flush_on_reconnect is very risky to use on production
deployments. It can cause exponential raising of connections
to memcached servers. Moreover this option makes sense only
in keystone's oslo.cache config.

This patch is moving flush_on_reconnect from code to oslo.cache
config block to be configurable.

Co-Authored-By: Hervé Beraud <hberaud@redhat.com>
Change-Id: I8e6826bfb2c85e7ceed03e1667bd6a06b3dff469
Closes-Bug: #1888394
This commit is contained in:
Michal Arbet 2020-07-21 15:42:17 +02:00 committed by Hervé Beraud
parent bc9c70fd66
commit a437b219ac
6 changed files with 101 additions and 22 deletions

View File

@ -211,27 +211,7 @@ class MemcacheClientPool(ConnectionPool):
self._hosts_deaduntil = [0] * len(urls)
def _create_connection(self):
# NOTE(morgan): Explicitly set flush_on_reconnect for pooled
# connections. This should ensure that stale data is never consumed
# from a server that pops in/out due to a network partition
# or disconnect.
#
# See the help from python-memcached:
#
# param flush_on_reconnect: optional flag which prevents a
# scenario that can cause stale data to be read: If there's more
# than one memcached server and the connection to one is
# interrupted, keys that mapped to that server will get
# reassigned to another. If the first server comes back, those
# keys will map to it again. If it still has its data, get()s
# can read stale data that was overwritten on another
# server. This flag is off by default for backwards
# compatibility.
#
# The normal non-pooled clients connect explicitly on each use and
# does not need the explicit flush_on_reconnect
return _MemcacheClient(self.urls, flush_on_reconnect=True,
**self._arguments)
return _MemcacheClient(self.urls, **self._arguments)
def _destroy_connection(self, conn):
conn.disconnect_all()

View File

@ -110,6 +110,11 @@ FILE_OPTIONS = {
default=10,
help='Number of seconds that an operation will wait to get '
'a memcache client connection.'),
cfg.BoolOpt('memcache_pool_flush_on_reconnect',
default=False,
help='Global toggle if memcache will be flushed'
' on reconnect.'
' (oslo_cache.memcache_pool backend only).'),
cfg.BoolOpt('tls_enabled',
default=False,
help='Global toggle for TLS usage when comunicating with'
@ -150,6 +155,25 @@ def configure(conf):
conf.register_opt(option, group=section)
def set_defaults(conf, memcache_pool_flush_on_reconnect=False):
"""Set defaults for configuration variables.
Overrides default options values.
:param conf: Configuration object, managed by the caller.
:type conf: oslo.config.cfg.ConfigOpts
:param memcache_pool_flush_on_reconnect: The default state for the
``flush_on_reconnect`` flag. By default deactivated
:type memcache_pool_flush_on_reconnect: bool
"""
conf.register_opt(FILE_OPTIONS, group='cache')
cfg.set_defaults(
FILE_OPTIONS,
memcache_pool_flush_on_reconnect=memcache_pool_flush_on_reconnect)
def list_opts():
"""Return a list of oslo_config options.

View File

@ -48,6 +48,8 @@ class PooledMemcachedBackend(memcached_backend.MemcachedBackend):
'socket_timeout': arguments.get('socket_timeout', 3.0),
'server_max_value_length':
arguments.get('server_max_value_length'),
'flush_on_reconnect': arguments.get('pool_flush_on_reconnect',
False),
},
maxsize=arguments.get('pool_maxsize', 10),
unused_timeout=arguments.get('pool_unused_timeout', 60),

View File

@ -139,10 +139,31 @@ def _build_cache_config(conf):
# NOTE(yorik-sar): these arguments will be used for memcache-related
# backends. Use setdefault for url to support old-style setting through
# backend_argument=url:127.0.0.1:11211
#
# NOTE(morgan): Explicitly set flush_on_reconnect for pooled
# connections. This should ensure that stale data is never consumed
# from a server that pops in/out due to a network partition
# or disconnect.
#
# See the help from python-memcached:
#
# param flush_on_reconnect: optional flag which prevents a
# scenario that can cause stale data to be read: If there's more
# than one memcached server and the connection to one is
# interrupted, keys that mapped to that server will get
# reassigned to another. If the first server comes back, those
# keys will map to it again. If it still has its data, get()s
# can read stale data that was overwritten on another
# server. This flag is off by default for backwards
# compatibility.
#
# The normal non-pooled clients connect explicitly on each use and
# does not need the explicit flush_on_reconnect
conf_dict.setdefault('%s.arguments.url' % prefix,
conf.cache.memcache_servers)
for arg in ('dead_retry', 'socket_timeout', 'pool_maxsize',
'pool_unused_timeout', 'pool_connection_get_timeout'):
'pool_unused_timeout', 'pool_connection_get_timeout',
'pool_flush_on_reconnect'):
value = getattr(conf.cache, 'memcache_' + arg)
conf_dict['%s.arguments.%s' % (prefix, arg)] = value

View File

@ -352,6 +352,36 @@ class CacheRegionTest(test_cache.BaseTestCase):
config_dict['test_prefix.arguments.tls_context'],
)
def test_cache_dictionary_config_builder_flush_on_reconnect_enabled(self):
"""Validate we build a sane dogpile.cache dictionary config."""
self.config_fixture.config(group='cache',
enabled=True,
config_prefix='test_prefix',
backend='oslo_cache.dict',
memcache_pool_flush_on_reconnect=True)
config_dict = cache._build_cache_config(self.config_fixture.conf)
self.assertTrue(self.config_fixture.conf.cache.
memcache_pool_flush_on_reconnect)
self.assertTrue(config_dict['test_prefix.arguments'
'.pool_flush_on_reconnect'])
def test_cache_dictionary_config_builder_flush_on_reconnect_disabled(self):
"""Validate we build a sane dogpile.cache dictionary config."""
self.config_fixture.config(group='cache',
enabled=True,
config_prefix='test_prefix',
backend='oslo_cache.dict',
memcache_pool_flush_on_reconnect=False)
config_dict = cache._build_cache_config(self.config_fixture.conf)
self.assertFalse(self.config_fixture.conf.cache.
memcache_pool_flush_on_reconnect)
self.assertFalse(config_dict['test_prefix.arguments'
'.pool_flush_on_reconnect'])
def test_cache_debug_proxy(self):
single_value = 'Test Value'
single_key = 'testkey'

View File

@ -0,0 +1,22 @@
---
fixes:
- |
[`bug 1888394 <https://bugs.launchpad.net/oslo.cache/+bug/1888394>`_]
If a memcache server disappears and then reconnects when multiple memcache
servers are used (specific to the python-memcached based backends) it is
possible that the server will contain stale data. To avoid this, param
flush_on_reconnect was used in code.
But unfortunatelly this option is causing another issue.
If memcache server disappears, or client had broken connection to memcache server,
clients start to flush server on reconnect.
This means that network connections will go UP and can cause server to be overloaded
until memcache will be unresponsive.
Simply said this option can cause loop of flushs and overloaded memcached servers.
This change is moving optional parameter `flush_on_reconnect` to oslo.cache config.
features:
- Configuration option ``memcache_pool_flush_on_reconnect`` added to control
if flush will be sent to memcached server after reconnect.