Handle cell failures in get_compute_nodes_by_host_or_node
get_compute_nodes_by_host_or_node uses the scatter_gather_cells function but was not handling the case that a failure result was returned, which could be the called function raising some exception or the cell timing out. This causes issues when the caller of get_compute_nodes_by_host_or_node expects to get a ComputeNodeList back and can do something like len(nodes) on it which fails when the result is not iterable. To be clear, if a cell is down there are going to be problems which likely result in a NoValidHost error during scheduling, but this avoids an ugly TypeError traceback in the scheduler logs. Change-Id: Ia54b5adf0a125ae1f9b86887a07dd1d79821dd54 Closes-Bug: #1857139
This commit is contained in:
parent
65aae518f8
commit
0d9622f581
|
@ -707,9 +707,10 @@ class HostManager(object):
|
|||
|
||||
# Only one cell should have values for the compute nodes
|
||||
# so we get them here, or return an empty list if no cell
|
||||
# has a value
|
||||
# has a value; be sure to filter out cell failures.
|
||||
nodes = next(
|
||||
(nodes for nodes in nodes_by_cell.values() if nodes),
|
||||
(nodes for nodes in nodes_by_cell.values()
|
||||
if nodes and not context_module.is_cell_failure_sentinel(nodes)),
|
||||
objects.ComputeNodeList())
|
||||
|
||||
return nodes
|
||||
|
|
|
@ -1195,6 +1195,24 @@ class HostManagerTestCase(test.NoDBTestCase):
|
|||
|
||||
self.assertEqual(0, len(result))
|
||||
|
||||
@mock.patch('nova.context.scatter_gather_cells',
|
||||
side_effect=( # called twice, different return values
|
||||
{uuids.cell1: test.TestingException('conn fail')},
|
||||
{uuids.cell1: nova_context.did_not_respond_sentinel}))
|
||||
def test_get_compute_nodes_by_host_or_node_filter_errors(self, mock_sgc):
|
||||
"""Make sure get_compute_nodes_by_host_or_node filters out exception
|
||||
and cell timeout results from scatter_gather_cells.
|
||||
"""
|
||||
ctxt = nova_context.get_context()
|
||||
cell1 = objects.CellMapping(uuid=uuids.cell1)
|
||||
for x in range(2): # run twice because we have two side effects
|
||||
nodes = self.host_manager.get_compute_nodes_by_host_or_node(
|
||||
ctxt, 'host1', None, cell=cell1)
|
||||
self.assertEqual(0, len(nodes), nodes)
|
||||
self.assertEqual(2, mock_sgc.call_count, mock_sgc.mock_calls)
|
||||
mock_sgc.assert_has_calls([mock.call(
|
||||
ctxt, [cell1], nova_context.CELL_TIMEOUT, mock.ANY)] * 2)
|
||||
|
||||
|
||||
class HostManagerChangedNodesTestCase(test.NoDBTestCase):
|
||||
"""Test case for HostManager class."""
|
||||
|
|
Loading…
Reference in New Issue