Make instance_list perform per-cell batching

This makes the instance_list module support batching across cells
with a couple of different strategies, and with room to add more
in the future.

Before this change, an instance list with limit 1000 to a
deployment with 10 cells would generate a query to each cell
database with the same limit. Thus, that API request could end
up processing up to 10,000 instance records despite only
returning 1000 to the user (because of the limit).

This uses the batch functionality in the base code added in
Iaa4759822e70b39bd735104d03d4deec988d35a1
by providing a couple of strategies by which the batch size
per cell can be determined. These should provide a lot of gain
in the short term, and we can extend them with other strategies
as we identify some with additional benefits.

Closes-Bug: #1787977
Change-Id: Ie3a5f5dc49f8d9a4b96f1e97f8a6ea0b5738b768
This commit is contained in:
Dan Smith 2018-08-17 07:56:05 -07:00
parent 7bc6de3f24
commit c3a77f80b1
4 changed files with 229 additions and 5 deletions

View File

@ -98,6 +98,37 @@ def get_instances_sorted(ctx, filters, limit, marker, columns_to_join,
ctx, filters, limit, marker, columns_to_join=columns_to_join)
def get_instance_list_cells_batch_size(limit, cells):
"""Calculate the proper batch size for a list request.
This will consider config, request limit, and cells being queried and
return an appropriate batch size to use for querying said cells.
:param limit: The overall limit specified in the request
:param cells: The list of CellMapping objects being queried
:returns: An integer batch size
"""
strategy = CONF.api.instance_list_cells_batch_strategy
limit = limit or CONF.api.max_limit
if len(cells) <= 1:
# If we're limited to one (or no) cell for whatever reason, do
# not do any batching and just pull the desired limit from the
# single cell in one shot.
return limit
if strategy == 'fixed':
# Fixed strategy, always a static batch size
batch_size = CONF.api.instance_list_cells_batch_fixed_size
elif strategy == 'distributed':
# Distributed strategy, 10% more than even partitioning
batch_size = int((limit / len(cells)) * 1.10)
# We never query a larger batch than the total requested, and never
# smaller than the lower limit of 100.
return max(min(batch_size, limit), 100)
def get_instance_objects_sorted(ctx, filters, limit, marker, expected_attrs,
sort_keys, sort_dirs):
"""Same as above, but return an InstanceList."""
@ -115,11 +146,14 @@ def get_instance_objects_sorted(ctx, filters, limit, marker, expected_attrs,
context.load_cells()
cell_mappings = context.CELLS
batch_size = get_instance_list_cells_batch_size(limit, cell_mappings)
columns_to_join = instance_obj._expected_cols(expected_attrs)
instance_generator = get_instances_sorted(ctx, filters, limit, marker,
columns_to_join, sort_keys,
sort_dirs,
cell_mappings=cell_mappings)
cell_mappings=cell_mappings,
batch_size=batch_size)
if 'fault' in expected_attrs:
# We join fault above, so we need to make sure we don't ask

View File

@ -265,6 +265,61 @@ to provide the result. If you have a small number of cells, or tenants
are likely to have instances in all cells, then this should be
False. If you have many cells, especially if you confine tenants to a
small subset of those cells, this should be True.
"""),
cfg.StrOpt("instance_list_cells_batch_strategy",
choices=("fixed", "distributed"),
default="distributed",
help="""
This controls the method by which the API queries cell databases in
smaller batches during large instance list operations. If batching is
performed, a large instance list operation will request some fraction
of the overall API limit from each cell database initially, and will
re-request that same batch size as records are consumed (returned)
from each cell as necessary. Larger batches mean less chattiness
between the API and the database, but potentially more wasted effort
processing the results from the database which will not be returned to
the user. Any strategy will yield a batch size of at least 100 records,
to avoid a user causing many tiny database queries in their request.
``distributed`` (the default) will attempt to divide the limit
requested by the user by the number of cells in the system. This
requires counting the cells in the system initially, which will not be
refreshed until service restart or SIGHUP. The actual batch size will
be increased by 10% over the result of ($limit / $num_cells).
``fixed`` will simply request fixed-size batches from each cell, as
defined by ``instance_list_cells_batch_fixed_size``. If the limit is
smaller than the batch size, the limit will be used instead. If you do
not wish batching to be used at all, setting the fixed size equal to
the ``max_limit`` value will cause only one request per cell database
to be issued.
Possible values:
* distributed (default)
* fixed
Related options:
* instance_list_cells_batch_fixed_size
* max_limit
"""),
cfg.IntOpt("instance_list_cells_batch_fixed_size",
min=100,
default=100,
help="""
This controls the batch size of instances requested from each cell
database if ``instance_list_cells_batch_strategy``` is set to ``fixed``.
This integral value will define the limit issued to each cell every time
a batch of instances is requested, regardless of the number of cells in
the system or any other factors. Per the general logic called out in
the documentation for ``instance_list_cells_batch_strategy``, the
minimum value for this is 100 records per batch.
Related options:
* instance_list_cells_batch_strategy
* max_limit
"""),
]

View File

@ -46,6 +46,8 @@ class TestInstanceList(test.NoDBTestCase):
self.context = nova_context.RequestContext()
self.useFixture(fixtures.SpawnIsSynchronousFixture())
self.flags(instance_list_cells_batch_strategy='fixed', group='api')
def test_compare_simple_instance_quirks(self):
# Ensure uuid,asc is added
ctx = instance_list.InstanceSortContext(['key0'], ['asc'])
@ -94,7 +96,8 @@ class TestInstanceList(test.NoDBTestCase):
user_context, {}, None, None, [], None, None)
mock_gi.assert_called_once_with(user_context, {}, None, None, [],
None, None,
cell_mappings=mock_cm.return_value)
cell_mappings=mock_cm.return_value,
batch_size=1000)
@mock.patch('nova.context.CELLS', new=FAKE_CELLS)
@mock.patch('nova.context.load_cells')
@ -110,7 +113,8 @@ class TestInstanceList(test.NoDBTestCase):
admin_context, {}, None, None, [], None, None)
mock_gi.assert_called_once_with(admin_context, {}, None, None, [],
None, None,
cell_mappings=FAKE_CELLS)
cell_mappings=FAKE_CELLS,
batch_size=100)
mock_cm.assert_not_called()
mock_lc.assert_called_once_with()
@ -128,7 +132,8 @@ class TestInstanceList(test.NoDBTestCase):
user_context, {}, None, None, [], None, None)
mock_gi.assert_called_once_with(user_context, {}, None, None, [],
None, None,
cell_mappings=FAKE_CELLS)
cell_mappings=FAKE_CELLS,
batch_size=100)
mock_lc.assert_called_once_with()
@mock.patch('nova.context.CELLS', new=FAKE_CELLS)
@ -147,7 +152,8 @@ class TestInstanceList(test.NoDBTestCase):
admin_context, {}, None, None, [], None, None)
mock_gi.assert_called_once_with(admin_context, {}, None, None, [],
None, None,
cell_mappings=FAKE_CELLS)
cell_mappings=FAKE_CELLS,
batch_size=100)
mock_cm.assert_not_called()
mock_lc.assert_called_once_with()
@ -177,3 +183,121 @@ class TestInstanceList(test.NoDBTestCase):
# return the results from the up cell, ignoring the down cell.
self.assertEqual(uuid_initial, uuid_final)
def test_batch_size_fixed(self):
fixed_size = 200
self.flags(instance_list_cells_batch_strategy='fixed', group='api')
self.flags(instance_list_cells_batch_fixed_size=fixed_size,
group='api')
# We call the batch size calculator with various arguments, including
# lists of cells which are just counted, so the cardinality is all that
# matters.
# One cell, so batch at $limit
ret = instance_list.get_instance_list_cells_batch_size(
1000, [mock.sentinel.cell1])
self.assertEqual(1000, ret)
# Two cells, so batch at $fixed_size
ret = instance_list.get_instance_list_cells_batch_size(
1000, [mock.sentinel.cell1, mock.sentinel.cell2])
self.assertEqual(fixed_size, ret)
# Four cells, so batch at $fixed_size
ret = instance_list.get_instance_list_cells_batch_size(
1000, [mock.sentinel.cell1, mock.sentinel.cell2,
mock.sentinel.cell3, mock.sentinel.cell4])
self.assertEqual(fixed_size, ret)
# Three cells, tiny limit, so batch at lower threshold
ret = instance_list.get_instance_list_cells_batch_size(
10, [mock.sentinel.cell1,
mock.sentinel.cell2,
mock.sentinel.cell3])
self.assertEqual(100, ret)
# Three cells, limit above floor, so batch at limit
ret = instance_list.get_instance_list_cells_batch_size(
110, [mock.sentinel.cell1,
mock.sentinel.cell2,
mock.sentinel.cell3])
self.assertEqual(110, ret)
def test_batch_size_distributed(self):
self.flags(instance_list_cells_batch_strategy='distributed',
group='api')
# One cell, so batch at $limit
ret = instance_list.get_instance_list_cells_batch_size(1000, [1])
self.assertEqual(1000, ret)
# Two cells so batch at ($limit/2)+10%
ret = instance_list.get_instance_list_cells_batch_size(1000, [1, 2])
self.assertEqual(550, ret)
# Four cells so batch at ($limit/4)+10%
ret = instance_list.get_instance_list_cells_batch_size(1000, [1, 2,
3, 4])
self.assertEqual(275, ret)
# Three cells, tiny limit, so batch at lower threshold
ret = instance_list.get_instance_list_cells_batch_size(10, [1, 2, 3])
self.assertEqual(100, ret)
# Three cells, small limit, so batch at lower threshold
ret = instance_list.get_instance_list_cells_batch_size(110, [1, 2, 3])
self.assertEqual(100, ret)
# No cells, so batch at $limit
ret = instance_list.get_instance_list_cells_batch_size(1000, [])
self.assertEqual(1000, ret)
class TestInstanceListBig(test.NoDBTestCase):
def setUp(self):
super(TestInstanceListBig, self).setUp()
cells = [objects.CellMapping(uuid=getattr(uuids, 'cell%i' % i),
name='cell%i' % i,
transport_url='fake:///',
database_connection='fake://')
for i in range(0, 3)]
insts = list([
dict(
uuid=getattr(uuids, 'inst%i' % i),
hostname='inst%i' % i)
for i in range(0, 100)])
self.cells = cells
self.insts = insts
self.context = nova_context.RequestContext()
self.useFixture(fixtures.SpawnIsSynchronousFixture())
@mock.patch('nova.db.api.instance_get_all_by_filters_sort')
@mock.patch('nova.objects.CellMappingList.get_all')
def test_get_instances_batched(self, mock_cells, mock_inst):
mock_cells.return_value = self.cells
def fake_get_insts(ctx, filters, limit, *a, **k):
for i in range(0, limit):
yield self.insts.pop()
mock_inst.side_effect = fake_get_insts
insts = instance_list.get_instances_sorted(self.context, {},
50, None,
[], ['hostname'], ['desc'],
batch_size=10)
# Make sure we returned exactly how many were requested
insts = list(insts)
self.assertEqual(50, len(insts))
# Since the instances are all uniform, we should have a
# predictable number of queries to the database. 5 queries
# would get us 50 results, plus one more gets triggered by the
# sort to fill the buffer for the first cell feeder that runs
# dry.
self.assertEqual(6, mock_inst.call_count)

View File

@ -0,0 +1,11 @@
---
features:
- |
Instance list operations across cells are now made more efficient by batching queries
as a fraction of the total limit for a request. Before this, an instance list with a
limit of 1000 records (the default) would generate queries to each cell with that
limit, and potentially process/sort/merge $num_cells*$limit records, despite only
returning $limit to the user. The strategy can now be controlled via
``[api]/instance_list_cells_batch_strategy`` and related options to either use
fixed batch sizes, or a fractional value that scales with the number of configured
cells.