Always put 'uuid' into sort_keys for stable instance lists

If we're listing by sort keys that yield many ambiguous results, we
may exacerbate issues in client pagination because we're not even
bound by insertion order given that we have multiple databases being
queried in parallel. So, even if the client didn't ask for it, throw
'uuid' into the end of sort_keys to provide us a stable ordering. This
was done for the default case by always including 'id' in the default
set of sort_keys, although a user could still break if they request
their own keys.

Note this also removes the recently-added explicit sort in the
test_bug_1689692 case, since we're enforcing a strict ordering with
this patch. Also, mriedem is awesome.

Change-Id: Ida446acb1286a8b215451a5d8d7d23882643ef13
Closes-Bug: #1721791
This commit is contained in:
Dan Smith 2017-10-06 07:40:42 -07:00
parent 00bbc2ffd3
commit add69c0507
3 changed files with 51 additions and 6 deletions

View File

@ -114,6 +114,17 @@ def get_instances_sorted(ctx, filters, limit, marker, columns_to_join,
sort_keys = ['created_at', 'id']
sort_dirs = ['asc', 'asc']
if 'uuid' not in sort_keys:
# Historically the default sort includes 'id' (see above), which
# should give us a stable ordering. Since we're striping across
# cell databases here, many sort_keys arrangements will yield
# nothing unique across all the databases to give us a stable
# ordering, which can mess up expected client pagination behavior.
# So, throw uuid into the sort_keys at the end if it's not already
# there to keep us repeatable.
sort_keys = copy.copy(sort_keys) + ['uuid']
sort_dirs = copy.copy(sort_dirs) + ['asc']
sort_ctx = InstanceSortContext(sort_keys, sort_dirs)
if marker:

View File

@ -31,7 +31,8 @@ class InstanceListTestCase(test.TestCase):
self.num_instances = 3
self.instances = []
dt = datetime.datetime(1985, 10, 25, 1, 21, 0)
start = datetime.datetime(1985, 10, 25, 1, 21, 0)
dt = start
spread = datetime.timedelta(minutes=10)
self.cells = objects.CellMappingList.get_all(self.context)
@ -45,6 +46,7 @@ class InstanceListTestCase(test.TestCase):
context=cctx,
project_id=self.context.project_id,
user_id=self.context.user_id,
created_at=start,
launched_at=dt,
instance_type_id=i,
hostname='%s-inst%i' % (cell.name, i))
@ -164,6 +166,11 @@ class InstanceListTestCase(test.TestCase):
break
insts.extend(batch)
page += 1
if page > len(self.instances) * 2:
# Do this sanity check in case we introduce (or find) another
# repeating page bug like #1721791. Without this we loop
# until timeout, which is less obvious.
raise Exception('Infinite paging loop')
# We should have requested exactly (or one more unlimited) pages
self.assertIn(page, (pages, pages + 1))
@ -172,11 +179,12 @@ class InstanceListTestCase(test.TestCase):
found = [x[sort_by] for x in insts]
had = [x[sort_by] for x in self.instances]
if sort_by == 'launched_at':
if sort_by in ('launched_at', 'created_at'):
# We're comparing objects and database entries, so we need to
# squash the tzinfo of the object ones so we can compare
had = [x.replace(tzinfo=None) for x in had]
self.assertEqual(len(had), len(found))
self.assertEqual(sorted(had), found)
def test_get_sorted_with_limit_marker_stable(self):
@ -219,6 +227,14 @@ class InstanceListTestCase(test.TestCase):
"""
self._test_get_sorted_with_limit_marker(sort_by='launched_at')
def test_get_sorted_with_limit_marker_datetime_same(self):
"""Test sorted by created_at.
This tests that we can do all of this, but with datetime
fields that are identical.
"""
self._test_get_sorted_with_limit_marker(sort_by='created_at')
def test_get_sorted_with_deleted_marker(self):
marker = self.instances[1]['uuid']
@ -376,7 +392,8 @@ class TestInstanceListObjects(test.TestCase):
self.num_instances = 3
self.instances = []
dt = datetime.datetime(1985, 10, 25, 1, 21, 0)
start = datetime.datetime(1985, 10, 25, 1, 21, 0)
dt = start
spread = datetime.timedelta(minutes=10)
cells = objects.CellMappingList.get_all(self.context)
@ -390,6 +407,7 @@ class TestInstanceListObjects(test.TestCase):
context=cctx,
project_id=self.context.project_id,
user_id=self.context.user_id,
created_at=start,
launched_at=dt,
instance_type_id=i,
hostname='%s-inst%i' % (cell.name, i))
@ -451,3 +469,20 @@ class TestInstanceListObjects(test.TestCase):
# actual faults
self.assertEqual(2, len([inst for inst in insts
if inst.fault]))
def test_get_instance_objects_sorted_paged(self):
"""Query a full first page and ensure an empty second one.
This uses created_at which is enforced to be the same across
each instance by setUp(). This will help make sure we still
have a stable ordering, even when we only claim to care about
created_at.
"""
instp1 = instance_list.get_instance_objects_sorted(
self.context, {}, None, None, [],
['created_at'], ['asc'])
self.assertEqual(len(self.instances), len(instp1))
instp2 = instance_list.get_instance_objects_sorted(
self.context, {}, None, instp1[-1]['uuid'], [],
['created_at'], ['asc'])
self.assertEqual(0, len(instp2))

View File

@ -71,14 +71,13 @@ class ServerListLimitMarkerCell0Test(test.TestCase,
self.addCleanup(self.api.delete_server, server['id'])
self._wait_for_state_change(self.api, server, 'ERROR')
servers = self.api.get_servers(search_opts=dict(sort_key='uuid'))
servers = self.api.get_servers()
self.assertEqual(3, len(servers))
# Take the first server and user that as our marker.
marker = servers[0]['id']
# Since we're paging after the first server as our marker, there are
# only two left so specifying three should just return two.
servers = self.api.get_servers(search_opts=dict(sort_key='uuid',
marker=marker,
servers = self.api.get_servers(search_opts=dict(marker=marker,
limit=3))
self.assertEqual(2, len(servers))