Handle cell failures in get_compute_nodes_by_host_or_node

get_compute_nodes_by_host_or_node uses the scatter_gather_cells
function but was not handling the case that a failure result
was returned, which could be the called function raising some
exception or the cell timing out. This causes issues when the
caller of get_compute_nodes_by_host_or_node expects to get a
ComputeNodeList back and can do something like len(nodes) on it
which fails when the result is not iterable.

To be clear, if a cell is down there are going to be problems
which likely result in a NoValidHost error during scheduling, but
this avoids an ugly TypeError traceback in the scheduler logs.

Change-Id: Ia54b5adf0a125ae1f9b86887a07dd1d79821dd54
Closes-Bug: #1857139
This commit is contained in:
Matt Riedemann 2019-12-20 10:03:23 -05:00
parent 65aae518f8
commit 0d9622f581
2 changed files with 21 additions and 2 deletions

View File

@ -707,9 +707,10 @@ class HostManager(object):
# Only one cell should have values for the compute nodes
# so we get them here, or return an empty list if no cell
# has a value
# has a value; be sure to filter out cell failures.
nodes = next(
(nodes for nodes in nodes_by_cell.values() if nodes),
(nodes for nodes in nodes_by_cell.values()
if nodes and not context_module.is_cell_failure_sentinel(nodes)),
objects.ComputeNodeList())
return nodes

View File

@ -1195,6 +1195,24 @@ class HostManagerTestCase(test.NoDBTestCase):
self.assertEqual(0, len(result))
@mock.patch('nova.context.scatter_gather_cells',
side_effect=( # called twice, different return values
{uuids.cell1: test.TestingException('conn fail')},
{uuids.cell1: nova_context.did_not_respond_sentinel}))
def test_get_compute_nodes_by_host_or_node_filter_errors(self, mock_sgc):
"""Make sure get_compute_nodes_by_host_or_node filters out exception
and cell timeout results from scatter_gather_cells.
"""
ctxt = nova_context.get_context()
cell1 = objects.CellMapping(uuid=uuids.cell1)
for x in range(2): # run twice because we have two side effects
nodes = self.host_manager.get_compute_nodes_by_host_or_node(
ctxt, 'host1', None, cell=cell1)
self.assertEqual(0, len(nodes), nodes)
self.assertEqual(2, mock_sgc.call_count, mock_sgc.mock_calls)
mock_sgc.assert_has_calls([mock.call(
ctxt, [cell1], nova_context.CELL_TIMEOUT, mock.ANY)] * 2)
class HostManagerChangedNodesTestCase(test.NoDBTestCase):
"""Test case for HostManager class."""