Only return the requested fields from the DB

A node object has many fields, and a huge opportunity
for performance improvement is to reduce the amount of work
performed at the lower layers when it is not necessary.
In particular, the easiest case to identify and handle is
when a list of fields to be fulfilled is explicitly supplied.

This is particularlly noteworthy when we collecting a list of
nodes for synconization with Nova, where only 9 columns are
really needed to supply Nova with the information required,
and thus numerous fields are discarded.

This is all particularlly compounded when traits are used,
which presently uses a joined load pattern from SQL.

With this patch, we explicitly load and convert only the fields
requested at the lowest level, and then perform a different style
of loading node traits which requires less overhead by SQLAlchemy
to de-duplicate the returned result set to the application.

This turns out to be far more efficent as we're reducing the amount
of data/object conversion work by 360%, even before we consider
populating traits, which is performed as a separate query when
traits are requested.

Story: 2008885
Task: 42434
Change-Id: Iac703d2a9b7b240a47477be8a64c7c33e28f692f
This commit is contained in:
Julia Kreger 2021-05-19 12:24:16 -07:00
parent 416a0951c8
commit 644ba5d4bc
6 changed files with 250 additions and 8 deletions

View File

@ -74,7 +74,7 @@ class Connection(object, metaclass=abc.ABCMeta):
@abc.abstractmethod
def get_node_list(self, filters=None, limit=None, marker=None,
sort_key=None, sort_dir=None):
sort_key=None, sort_dir=None, fields=None):
"""Return a list of nodes.
:param filters: Filters to apply. Defaults to None.
@ -94,6 +94,10 @@ class Connection(object, metaclass=abc.ABCMeta):
:param sort_key: Attribute by which results should be sorted.
:param sort_dir: direction in which results should be sorted.
(asc, desc)
:param fields: Comma separated field list to return, to allow for
only specific fields to be returned to have maximum
API performance calls where not all columns are
needed from the database.
"""
@abc.abstractmethod

View File

@ -32,6 +32,8 @@ from osprofiler import sqlalchemy as osp_sqlalchemy
import sqlalchemy as sa
from sqlalchemy.orm.exc import NoResultFound, MultipleResultsFound
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import Load
from sqlalchemy.orm import selectinload
from sqlalchemy import sql
from ironic.common import exception
@ -433,8 +435,63 @@ class Connection(api.Connection):
sort_key, sort_dir, query)
def get_node_list(self, filters=None, limit=None, marker=None,
sort_key=None, sort_dir=None):
query = _get_node_query_with_all()
sort_key=None, sort_dir=None, fields=None):
if not fields:
query = _get_node_query_with_all()
query = self._add_nodes_filters(query, filters)
return _paginate_query(models.Node, limit, marker,
sort_key, sort_dir, query)
else:
# Shunt to the proper method to return the limited list.
return self.get_node_list_columns(columns=fields, filters=filters,
limit=limit, marker=marker,
sort_key=sort_key,
sort_dir=sort_dir)
def get_node_list_columns(self, columns=None, filters=None, limit=None,
marker=None, sort_key=None, sort_dir=None):
"""Get a node list with specific fields/columns.
:param columns: A list of columns to retrieve from the database
and populate into the object.
:param filters: The requested database field filters in the form of
a dictionary with the applicable key, and filter
value.
:param limit: Limit the number of returned nodes, default None.
:param marker: Starting marker to generate a paginated result
set for the consumer.
:param sort_key: Sort key to apply to the result set.
:param sort_dir: Sort direction to apply to the result set.
:returns: A list of Node objects based on the data model from
a SQLAlchemy result set, which the object layer can
use to convert the node into an Node object list.
"""
traits_found = False
use_columns = columns[:]
if 'traits' in columns:
# Traits is synthetic in the data model and not a direct
# table column. As such, a different query pattern is used
# with SQLAlchemy.
traits_found = True
use_columns.remove('traits')
# Generate the column object list so SQLAlchemy only fulfills
# the requested columns.
use_columns = [getattr(models.Node, c) for c in use_columns]
# In essence, traits (and anything else needed to generate the
# composite objects) need to be reconciled without using a join
# as multiple rows can be generated in the result set being returned
# from the database server. In this case, with traits, we use
# a selectinload pattern.
if traits_found:
query = model_query(models.Node).options(
Load(models.Node).load_only(*use_columns),
selectinload(models.Node.traits))
else:
query = model_query(models.Node).options(
Load(models.Node).load_only(*use_columns))
query = self._add_nodes_filters(query, filters)
return _paginate_query(models.Node, limit, marker,
sort_key, sort_dir, query)

View File

@ -299,7 +299,7 @@ class IronicObject(object_base.VersionedObject):
return obj
@classmethod
def _from_db_object_list(cls, context, db_objects):
def _from_db_object_list(cls, context, db_objects, fields=None):
"""Returns objects corresponding to database entities.
Returns a list of formal objects of this class that correspond to
@ -308,9 +308,13 @@ class IronicObject(object_base.VersionedObject):
:param cls: the VersionedObject class of the desired object
:param context: security context
:param db_objects: A list of DB models of the object
:param fields: A list of field names to comprise lower level
objects.
:returns: A list of objects corresponding to the database entities
"""
return [cls._from_db_object(context, cls(), db_obj)
# NOTE(TheJulia): Fields is used in a later patch in this series
# and tests are landed in an intermediate change.
return [cls._from_db_object(context, cls(), db_obj, fields=None)
for db_obj in db_objects]
def do_version_changes_for_db(self):

View File

@ -313,7 +313,7 @@ class Node(base.IronicObject, object_base.VersionedObjectDictCompat):
# @object_base.remotable_classmethod
@classmethod
def list(cls, context, limit=None, marker=None, sort_key=None,
sort_dir=None, filters=None):
sort_dir=None, filters=None, fields=None):
"""Return a list of Node objects.
:param cls: the :class:`Node`
@ -323,13 +323,30 @@ class Node(base.IronicObject, object_base.VersionedObjectDictCompat):
:param sort_key: column to sort results by.
:param sort_dir: direction to sort. "asc" or "desc".
:param filters: Filters to apply.
:param fields: Requested fields to be returned. Please note, some
fields are mandatory for the data model and are
automatically included. These are: id, version,
updated_at, created_at, owner, and lessee.
:returns: a list of :class:`Node` object.
"""
if fields:
# All requests must include version, updated_at, created_at
# owner, and lessee to support access controls and database
# version model updates. Driver and conductor_group are required
# for conductor mapping.
target_fields = ['id'] + fields[:] + ['version', 'updated_at',
'created_at', 'owner',
'lessee', 'driver',
'conductor_group']
else:
target_fields = None
db_nodes = cls.dbapi.get_node_list(filters=filters, limit=limit,
marker=marker, sort_key=sort_key,
sort_dir=sort_dir)
return cls._from_db_object_list(context, db_nodes)
sort_dir=sort_dir,
fields=target_fields)
return cls._from_db_object_list(context, db_nodes, target_fields)
# NOTE(xek): We don't want to enable RPC on this call just yet. Remotable
# methods can be used in the future to replace current explicit RPC calls.

View File

@ -20,6 +20,7 @@ from unittest import mock
from oslo_utils import timeutils
from oslo_utils import uuidutils
from sqlalchemy.orm import exc as sa_exc
from ironic.common import exception
from ironic.common import states
@ -302,6 +303,20 @@ class DbNodeTestCase(base.DbTestCase):
self.assertEqual([], r.tags)
self.assertEqual([], r.traits)
def test_get_node_list_includes_traits(self):
uuids = []
for i in range(1, 6):
node = utils.create_test_node(uuid=uuidutils.generate_uuid())
uuids.append(str(node['uuid']))
self.dbapi.set_node_traits(node.id, ['trait1', 'trait2'], '1.35')
res = self.dbapi.get_node_list()
res_uuids = [r.uuid for r in res]
self.assertCountEqual(uuids, res_uuids)
for r in res:
self.assertEqual([], r.tags)
self.assertEqual(2, len(r.traits))
def test_get_node_list_with_filters(self):
ch1 = utils.create_test_chassis(uuid=uuidutils.generate_uuid())
ch2 = utils.create_test_chassis(uuid=uuidutils.generate_uuid())
@ -446,6 +461,140 @@ class DbNodeTestCase(base.DbTestCase):
self.dbapi.get_node_list,
{'chassis_uuid': uuidutils.generate_uuid()})
def test_get_node_list_requested_fields_with_traits(self):
# Checks to to ensure we're not returning a node object with all
# fields populated as this is a high overhead for SQLAlchemy to do
# all of the object conversions, when we have fields which were not
# requested nor required.
# Modeled after the nova query which is used to collect node state
uuids = []
for i in range(1, 6):
node = utils.create_test_node(uuid=uuidutils.generate_uuid(),
provision_state=states.AVAILABLE,
power_state=states.POWER_OFF,
target_power_state=None,
target_provision_state=None,
last_error=None,
maintenance=False,
properties={'cpu': 'x86_64'},
instance_uuid=None,
resource_class='CUSTOM_BAREMETAL',
# Code requires the fields below
owner='fred',
lessee='marsha',
# Fields that should not be
# present in the obejct.
driver_internal_info={
'cat': 'meow'},
internal_info={'corgi': 'rocks'},
deploy_interface='purring_machine')
# Add some traits for good measure
self.dbapi.set_node_traits(node.id, ['trait1', 'trait2'], '1.35')
uuids.append(str(node['uuid']))
req_fields = ['uuid',
'power_state',
'target_power_state',
'provision_state',
'target_provision_state',
'last_error',
'maintenance',
'properties',
'instance_uuid',
'resource_class',
'traits',
'version',
'updated_at',
'created_at']
res = self.dbapi.get_node_list(fields=req_fields)
res_uuids = [r.uuid for r in res]
self.assertCountEqual(uuids, res_uuids)
for r in res:
self.assertIsNotNone(r.traits)
self.assertIsNotNone(r.version)
self.assertEqual(states.AVAILABLE, r.provision_state)
self.assertEqual(states.POWER_OFF, r.power_state)
self.assertIsNone(r.target_power_state)
self.assertIsNone(r.target_provision_state)
self.assertIsNone(r.last_error)
self.assertFalse(r.maintenance)
self.assertIsNone(r.instance_uuid)
self.assertEqual('CUSTOM_BAREMETAL', r.resource_class)
self.assertEqual('trait1', r.traits[0]['trait'])
self.assertEqual('trait2', r.traits[1]['trait'])
# These always need to be returned, even if not requested.
# These should always be empty values as they are not populated
# due to the object not returning a value in the field to save on
# excess un-necessary data conversions.
def _attempt_field_access(obj, field):
return obj[field]
for field in ['driver_internal_info', 'internal_info',
'deploy_interface', 'boot_interface',
'driver', 'extra']:
try:
self.assertRaises(sa_exc.DetachedInstanceError,
_attempt_field_access, r, field)
except AttributeError:
pass
def test_get_node_list_requested_fields_no_traits(self):
# The join for traits handling requires some special handling
# so in this case we execute without traits being joined in.
uuids = []
for i in range(1, 3):
node = utils.create_test_node(uuid=uuidutils.generate_uuid(),
provision_state=states.AVAILABLE,
last_error=None,
maintenance=False,
resource_class='CUSTOM_BAREMETAL',
# Code requires the fields below
owner='fred',
lessee='marsha',
# Fields that should not be
# present in the object.
driver_internal_info={
'cat': 'meow'},
internal_info={'corgi': 'rocks'},
deploy_interface='purring_machine')
uuids.append(str(node['uuid']))
req_fields = ['uuid',
'provision_state',
'last_error',
'owner',
'lessee',
'version']
res = self.dbapi.get_node_list(fields=req_fields)
res_uuids = [r.uuid for r in res]
self.assertCountEqual(uuids, res_uuids)
for r in res:
self.assertIsNotNone(r.version)
self.assertEqual(states.AVAILABLE, r.provision_state)
self.assertIsNone(r.last_error)
# These always need to be returned, even if not requested.
self.assertEqual('fred', r.owner)
self.assertEqual('marsha', r.lessee)
# These should always be empty values as they are not populated
# due to the object not returning a value in the field to save on
# excess un-necessary data conversions.
def _attempt_field_access(obj, field):
return obj[field]
for field in ['driver_internal_info', 'internal_info',
'deploy_interface', 'boot_interface',
'driver', 'extra', 'power_state',
'traits']:
try:
self.assertRaises(sa_exc.DetachedInstanceError,
_attempt_field_access, r, field)
except AttributeError:
# We expect an AttributeError, in addition to
# SQLAlchemy raising an exception.
pass
def test_get_node_by_instance(self):
node = utils.create_test_node(
instance_uuid='12345678-9999-0000-aaaa-123456789012')

View File

@ -397,6 +397,17 @@ class TestNodeObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn):
self.assertIsInstance(nodes[0], objects.Node)
self.assertEqual(self.context, nodes[0]._context)
def test_list_with_fields(self):
with mock.patch.object(self.dbapi, 'get_node_list',
autospec=True) as mock_get_list:
mock_get_list.return_value = [self.fake_node]
objects.Node.list(self.context, fields=['name'])
mock_get_list.assert_called_with(
filters=None, limit=None, marker=None, sort_key=None,
sort_dir=None,
fields=['id', 'name', 'version', 'updated_at', 'created_at',
'owner', 'lessee', 'driver', 'conductor_group'])
def test_reserve(self):
with mock.patch.object(self.dbapi, 'reserve_node',
autospec=True) as mock_reserve: