proxy: Add a chance to skip memcache for get_*_info calls

If you've got thousands of requests per second for objects in a single
container, you basically NEVER want that container's info to ever fall
out of memcache. If it *does*, all those clients are almost certainly
going to overload the container.

Avoid this by allowing some small fraction of requests to bypass and
refresh the cache, pushing out the TTL as long as there continue to be
requests to the container. The likelihood of skipping the cache is
configurable, similar to what we did for shard range sets.

Change-Id: If9249a42b30e2a2e7c4b0b91f947f24bf891b86f
Closes-Bug: #1883324
This commit is contained in:
Tim Burke 2022-01-06 12:09:58 -08:00 committed by Matthew Oliver
parent 24acc6e56b
commit 5c6407bf59
11 changed files with 327 additions and 223 deletions

View File

@ -156,9 +156,9 @@ ionice_priority None I/O scheduling p
[proxy-server]
**************
====================================== =============== =====================================
============================================== =============== =====================================
Option Default Description
-------------------------------------- --------------- -------------------------------------
---------------------------------------------- --------------- -------------------------------------
use Entry point for paste.deploy for
the proxy server. For most
cases, this should be
@ -177,6 +177,34 @@ recheck_account_existence 60 Cache timeout in second
recheck_container_existence 60 Cache timeout in seconds to
send memcached for container
existence
account_existence_skip_cache_pct 0.0 Periodically, bypass the cache
for account info requests and
goto disk to refresh the data
in the cache. This is a percentage
of requests should randomly skip.
Values around 0.0 - 0.1 (1 in every
1000) are recommended.
container_existence_skip_cache_pct 0.0 Periodically, bypass the cache
for container info requests and
goto disk to refresh the data
in the cache. This is a percentage
of requests should randomly skip.
Values around 0.0 - 0.1 (1 in every
1000) are recommended.
container_updating_shard_ranges_skip_cache_pct 0.0 Periodically, bypass the cache
for shard_range update requests and
goto disk to refresh the data
in the cache. This is a percentage
of requests should randomly skip.
Values around 0.0 - 0.1 (1 in every
1000) are recommended.
container_listing_shard_ranges_skip_cache_pct 0.0 Periodically, bypass the cache
for shard_range listing info requests
and goto disk to refresh the data
in the cache. This is a percentage
of requests should randomly skip.
Values around 0.0 - 0.1 (1 in every
1000) are recommended.
object_chunk_size 65536 Chunk size to read from
object servers
client_chunk_size 65536 Chunk size to read from
@ -355,4 +383,4 @@ write_affinity_handoff_delete_count auto The number of local (as
(replicas - len(local_primary_nodes)).
This option may be overridden in a
per-policy configuration section.
====================================== =============== =====================================
============================================== =============== =====================================

View File

@ -153,8 +153,10 @@ use = egg:swift#proxy
# data is present in memcache, we can periodically refresh the data in memcache
# without causing a thundering herd. Values around 0.0 - 0.1 (i.e., one in
# every thousand requests skips cache, or fewer) are recommended.
# container_existence_skip_cache_pct = 0.0
# container_updating_shard_ranges_skip_cache_pct = 0.0
# container_listing_shard_ranges_skip_cache_pct = 0.0
# account_existence_skip_cache_pct = 0.0
#
# object_chunk_size = 65536
# client_chunk_size = 65536

View File

@ -167,6 +167,9 @@ from swift.common.registry import register_swift_info, \
class ListingEtagMiddleware(object):
def __init__(self, app):
self.app = app
# Pass this along so get_container_info will have the configured
# odds to skip cache
self._pipeline_final_app = app._pipeline_final_app
def __call__(self, env, start_response):
# a lot of this is cribbed from listing_formats / swob.Request

View File

@ -47,5 +47,8 @@ def filter_factory(global_conf, **local_conf):
if 'symlink' not in get_swift_info():
raise ValueError('object versioning requires symlinks')
app = ObjectVersioningMiddleware(app, conf)
# Pass this along so get_container_info will have the configured
# odds to skip cache
app._pipeline_final_app = app.app._pipeline_final_app
return VersionedWritesMiddleware(app, conf)
return versioning_filter

View File

@ -3716,7 +3716,11 @@ class StreamingPile(GreenAsyncPile):
# Keep populating the pile as greenthreads become available
for args in args_iter:
yield next(self)
try:
to_yield = next(self)
except StopIteration:
break
yield to_yield
self.spawn(func, *args)
# Drain the pile

View File

@ -750,7 +750,30 @@ def _get_info_from_memcache(app, env, account, container=None):
cache_key = get_cache_key(account, container)
memcache = cache_from_env(env, True)
if memcache:
try:
proxy_app = app._pipeline_final_app
except AttributeError:
# Only the middleware entry-points get a reference to the
# proxy-server app; if a middleware composes itself as multiple
# filters, we'll just have to choose a reasonable default
skip_chance = 0.0
logger = None
else:
if container:
skip_chance = proxy_app.container_existence_skip_cache
else:
skip_chance = proxy_app.account_existence_skip_cache
logger = proxy_app.logger
info_type = 'container' if container else 'account'
if skip_chance and random.random() < skip_chance:
info = None
if logger:
logger.increment('%s.info.cache.skip' % info_type)
else:
info = memcache.get(cache_key)
if logger:
logger.increment('%s.info.cache.%s' % (
info_type, 'hit' if info else 'miss'))
if info and six.PY2:
# Get back to native strings
new_info = {}

View File

@ -193,6 +193,10 @@ class Application(object):
def __init__(self, conf, logger=None, account_ring=None,
container_ring=None):
# This is for the sake of tests which instantiate an Application
# directly rather than via loadapp().
self._pipeline_final_app = self
if conf is None:
conf = {}
if logger is None:
@ -230,12 +234,16 @@ class Application(object):
self.recheck_account_existence = \
int(conf.get('recheck_account_existence',
DEFAULT_RECHECK_ACCOUNT_EXISTENCE))
self.container_existence_skip_cache = config_percent_value(
conf.get('container_existence_skip_cache_pct', 0))
self.container_updating_shard_ranges_skip_cache = \
config_percent_value(conf.get(
'container_updating_shard_ranges_skip_cache_pct', 0))
self.container_listing_shard_ranges_skip_cache = \
config_percent_value(conf.get(
'container_listing_shard_ranges_skip_cache_pct', 0))
self.account_existence_skip_cache = config_percent_value(
conf.get('account_existence_skip_cache_pct', 0))
self.allow_account_management = \
config_true_value(conf.get('allow_account_management', 'no'))
self.container_ring = container_ring or Ring(swift_dir,

View File

@ -77,6 +77,8 @@ class FakeSwift(object):
ALLOWED_METHODS = [
'PUT', 'POST', 'DELETE', 'GET', 'HEAD', 'OPTIONS', 'REPLICATE',
'SSYNC', 'UPDATE']
container_existence_skip_cache = 0.0
account_existence_skip_cache = 0.0
def __init__(self):
self._calls = []

View File

@ -29,8 +29,13 @@ from test.unit.common.middleware.s3api.helpers import FakeSwift
class FakeApp(object):
container_existence_skip_cache = 0.0
account_existence_skip_cache = 0.0
def __init__(self):
self._pipeline_final_app = self
self.swift = FakeSwift()
self.logger = debug_logger()
def _update_s3_path_info(self, env):
"""

View File

@ -2124,7 +2124,8 @@ class TestContainerController(TestRingBase):
req.environ['swift.infocache']['shard-listing/a/c'])
self.assertEqual(
[x[0][0] for x in self.logger.logger.log_dict['increment']],
['container.shard_listing.backend.200'])
['container.info.cache.miss',
'container.shard_listing.backend.200'])
# container is sharded and proxy has that state cached, but
# no shard ranges cached; expect a cache miss and write-back
@ -2161,7 +2162,8 @@ class TestContainerController(TestRingBase):
req.environ['swift.infocache']['shard-listing/a/c'])
self.assertEqual(
[x[0][0] for x in self.logger.logger.log_dict['increment']],
['container.shard_listing.cache.miss',
['container.info.cache.hit',
'container.shard_listing.cache.miss',
'container.shard_listing.backend.200'])
# container is sharded and proxy does have that state cached and
@ -2185,7 +2187,8 @@ class TestContainerController(TestRingBase):
req.environ['swift.infocache']['shard-listing/a/c'])
self.assertEqual(
[x[0][0] for x in self.logger.logger.log_dict['increment']],
['container.shard_listing.cache.hit'])
['container.info.cache.hit',
'container.shard_listing.cache.hit'])
# if there's a chance to skip cache, maybe we go to disk again...
self.memcache.clear_calls()
@ -2221,7 +2224,8 @@ class TestContainerController(TestRingBase):
req.environ['swift.infocache']['shard-listing/a/c'])
self.assertEqual(
[x[0][0] for x in self.logger.logger.log_dict['increment']],
['container.shard_listing.cache.skip',
['container.info.cache.hit',
'container.shard_listing.cache.skip',
'container.shard_listing.backend.200'])
# ... or maybe we serve from cache
@ -2245,8 +2249,8 @@ class TestContainerController(TestRingBase):
req.environ['swift.infocache']['shard-listing/a/c'])
self.assertEqual(
[x[0][0] for x in self.logger.logger.log_dict['increment']],
['container.shard_listing.cache.hit'])
['container.info.cache.hit',
'container.shard_listing.cache.hit'])
# put this back the way we found it for later subtests
self.app.container_listing_shard_ranges_skip_cache = 0.0
@ -2396,7 +2400,8 @@ class TestContainerController(TestRingBase):
self.assertEqual(404, self.memcache.calls[2][1][1]['status'])
self.assertEqual(b'', resp.body)
self.assertEqual(404, resp.status_int)
self.assertEqual({'container.shard_listing.cache.miss': 1,
self.assertEqual({'container.info.cache.hit': 1,
'container.shard_listing.cache.miss': 1,
'container.shard_listing.backend.404': 1},
self.logger.get_increment_counts())
@ -2429,7 +2434,8 @@ class TestContainerController(TestRingBase):
self.assertEqual(404, self.memcache.calls[2][1][1]['status'])
self.assertEqual(b'', resp.body)
self.assertEqual(404, resp.status_int)
self.assertEqual({'container.shard_listing.cache.error': 1,
self.assertEqual({'container.info.cache.hit': 1,
'container.shard_listing.cache.error': 1,
'container.shard_listing.backend.404': 1},
self.logger.get_increment_counts())
@ -2452,7 +2458,8 @@ class TestContainerController(TestRingBase):
[mock.call.get('container/a/c'),
mock.call.get('shard-listing/a/c', raise_on_error=True)],
self.memcache.calls)
self.assertEqual({'container.shard_listing.cache.hit': 1},
self.assertEqual({'container.info.cache.hit': 1,
'container.shard_listing.cache.hit': 1},
self.logger.get_increment_counts())
return resp
@ -2542,7 +2549,8 @@ class TestContainerController(TestRingBase):
# shards were cached
self.assertEqual('sharded',
self.memcache.calls[2][1][1]['sharding_state'])
self.assertEqual({'container.shard_listing.backend.200': 1},
self.assertEqual({'container.info.cache.miss': 1,
'container.shard_listing.backend.200': 1},
self.logger.get_increment_counts())
return resp
@ -2635,7 +2643,8 @@ class TestContainerController(TestRingBase):
self.memcache.calls)
self.assertEqual('sharded',
self.memcache.calls[2][1][1]['sharding_state'])
self.assertEqual({'container.shard_listing.backend.200': 1},
self.assertEqual({'container.info.cache.miss': 1,
'container.shard_listing.backend.200': 1},
self.logger.get_increment_counts())
def _do_test_GET_shard_ranges_no_cache_write(self, resp_hdrs):
@ -2807,7 +2816,8 @@ class TestContainerController(TestRingBase):
self.memcache.calls)
self.assertEqual(resp.headers.get('X-Backend-Sharding-State'),
self.memcache.calls[1][1][1]['sharding_state'])
self.assertEqual({'container.shard_listing.backend.200': 1},
self.assertEqual({'container.info.cache.miss': 1,
'container.shard_listing.backend.200': 1},
self.logger.get_increment_counts())
self.memcache.delete_all()

View File

@ -508,6 +508,7 @@ class TestController(unittest.TestCase):
def test_get_account_info_returns_values_as_strings(self):
app = mock.MagicMock()
app._pipeline_final_app.account_existence_skip_cache = 0.0
memcache = mock.MagicMock()
memcache.get = mock.MagicMock()
memcache.get.return_value = {
@ -533,6 +534,7 @@ class TestController(unittest.TestCase):
def test_get_container_info_returns_values_as_strings(self):
app = mock.MagicMock()
app._pipeline_final_app.container_existence_skip_cache = 0.0
memcache = mock.MagicMock()
memcache.get = mock.MagicMock()
memcache.get.return_value = {
@ -4134,9 +4136,10 @@ class TestReplicatedObjectController(
self.assertEqual(resp.status_int, 202)
stats = self.app.logger.get_increment_counts()
self.assertEqual({'object.shard_updating.cache.miss': 1,
self.assertEqual({'account.info.cache.miss': 1,
'container.info.cache.miss': 1,
'object.shard_updating.cache.miss': 1,
'object.shard_updating.backend.200': 1}, stats)
# verify statsd prefix is not mutated
self.assertEqual([], self.app.logger.log_dict['set_statsd_prefix'])
backend_requests = fake_conn.requests
@ -4234,7 +4237,9 @@ class TestReplicatedObjectController(
self.assertEqual(resp.status_int, 202)
stats = self.app.logger.get_increment_counts()
self.assertEqual({'object.shard_updating.cache.hit': 1}, stats)
self.assertEqual({'account.info.cache.miss': 1,
'container.info.cache.miss': 1,
'object.shard_updating.cache.hit': 1}, stats)
# verify statsd prefix is not mutated
self.assertEqual([], self.app.logger.log_dict['set_statsd_prefix'])
@ -4328,7 +4333,9 @@ class TestReplicatedObjectController(
self.assertEqual(resp.status_int, 202)
stats = self.app.logger.get_increment_counts()
self.assertEqual({'object.shard_updating.cache.hit': 1}, stats)
self.assertEqual({'account.info.cache.miss': 1,
'container.info.cache.miss': 1,
'object.shard_updating.cache.hit': 1}, stats)
# cached shard ranges are still there
cache_key = 'shard-updating/a/c'
@ -4366,7 +4373,11 @@ class TestReplicatedObjectController(
self.assertEqual(resp.status_int, 202)
stats = self.app.logger.get_increment_counts()
self.assertEqual({'object.shard_updating.cache.skip': 1,
self.assertEqual({'account.info.cache.miss': 1,
'account.info.cache.hit': 1,
'container.info.cache.miss': 1,
'container.info.cache.hit': 1,
'object.shard_updating.cache.skip': 1,
'object.shard_updating.cache.hit': 1,
'object.shard_updating.backend.200': 1}, stats)
# verify statsd prefix is not mutated
@ -4425,10 +4436,15 @@ class TestReplicatedObjectController(
self.assertEqual(resp.status_int, 202)
stats = self.app.logger.get_increment_counts()
self.assertEqual({'object.shard_updating.cache.skip': 1,
self.assertEqual(stats, {
'account.info.cache.hit': 2,
'account.info.cache.miss': 1,
'container.info.cache.hit': 2,
'container.info.cache.miss': 1,
'object.shard_updating.cache.skip': 1,
'object.shard_updating.cache.hit': 1,
'object.shard_updating.cache.error': 1,
'object.shard_updating.backend.200': 2}, stats)
'object.shard_updating.backend.200': 2})
do_test('POST', 'sharding')
do_test('POST', 'sharded')