Log a warning and add nova-status check for old API service versions

Change Ib984c30543acb3ca9cb95fb53d44d9ded0f5a5c8, which was added
in Newton when cells v2 was optional, added some transitional code
to the API for looking up an instance, which didn't rely on instance
mappings in a cell to find the instance if the minimum nova-osapi_compute
service version was from before Ocata.

People have reported this being a source of confusion when upgrading
from before Ocata, when cells v2 wasn't required, to Ocata+ where cells
v2 along with the mapping setup is required. That's because they might
have older nova-osapi_compute service version records in their 'nova'
(cell) database which makes the API think the code is older than it
actually is, and results in an InstanceNotFound error.

This change does two things:

1. Adds a warning to the compute API code in this scenario to serve
   as a breadcrumb if a deployment hits this issue.

2. A nova-status check to look for minimum nova-osapi_compute service
   versions across all cells and report the issue as a warning. It's
   not an upgrade failure since we don't know how the nova-api service
   is configured, but leave that investigation up to the deployer.

This is also written in such a way that we should be able to backport
this through to stable/ocata.

Change-Id: Ie2bc4616439352850cf29a9de7d33a06c8f7c2b8
Closes-Bug: #1759316
This commit is contained in:
Matt Riedemann 2018-03-28 16:26:48 -04:00
parent e2d5dc4e2c
commit eaf6340847
5 changed files with 209 additions and 1 deletions

View File

@ -109,6 +109,10 @@ Upgrade
* Checks for the Placement API are modified to require version 1.21.
* Checks that ironic instances have had their embedded flavors migrated to
use custom resource classes.
* Checks for ``nova-osapi_compute`` service versions that are less than 15
across all cell mappings which might cause issues when querying instances
depending on how the **nova-api** service is configured.
See https://bugs.launchpad.net/nova/+bug/1759316 for details.
See Also
========

View File

@ -33,6 +33,7 @@ import pkg_resources
import prettytable
from sqlalchemy import func as sqlfunc
from sqlalchemy import MetaData, Table, and_, select
from sqlalchemy.sql import false
from nova.cmd import common as cmd_common
import nova.conf
@ -440,6 +441,74 @@ class UpgradeCommands(object):
# those nodes are already migrated, so there is nothing to do.
return UpgradeCheckResult(UpgradeCheckCode.SUCCESS)
def _get_min_service_version(self, context, binary):
meta = MetaData(bind=db_session.get_engine(context=context))
services = Table('services', meta, autoload=True)
return select([sqlfunc.min(services.c.version)]).select_from(
services).where(and_(
services.c.binary == binary,
services.c.deleted == 0,
services.c.forced_down == false())).scalar()
def _check_api_service_version(self):
"""Checks nova-osapi_compute service versions across cells.
For non-cellsv1 deployments, based on how the [database]/connection
is configured for the nova-api service, the nova-osapi_compute service
versions before 15 will only attempt to lookup instances from the
local database configured for the nova-api service directly.
This can cause issues if there are newer API service versions in cell1
after the upgrade to Ocata, but lingering older API service versions
in an older database.
This check will scan all cells looking for a minimum nova-osapi_compute
service version less than 15 and if found, emit a warning that those
service entries likely need to be cleaned up.
"""
# If we're using cells v1 then we don't care about this.
if CONF.cells.enable:
return UpgradeCheckResult(UpgradeCheckCode.SUCCESS)
meta = MetaData(bind=db_session.get_api_engine())
cell_mappings = Table('cell_mappings', meta, autoload=True)
mappings = cell_mappings.select().execute().fetchall()
if not mappings:
# There are no cell mappings so we can't determine this, just
# return a warning. The cellsv2 check would have already failed
# on this.
msg = (_('Unable to determine API service versions without '
'cell mappings.'))
return UpgradeCheckResult(UpgradeCheckCode.WARNING, msg)
ctxt = nova_context.get_admin_context()
cells_with_old_api_services = []
for mapping in mappings:
with nova_context.target_cell(ctxt, mapping) as cctxt:
# Get the minimum nova-osapi_compute service version in this
# cell.
min_version = self._get_min_service_version(
cctxt, 'nova-osapi_compute')
if min_version is not None and min_version < 15:
cells_with_old_api_services.append(mapping['uuid'])
# If there are any cells with older API versions, we report it as a
# warning since we don't know how the actual nova-api service is
# configured, but we need to give the operator some indication that
# they have something to investigate/cleanup.
if cells_with_old_api_services:
msg = (_("The following cells have 'nova-osapi_compute' services "
"with version < 15 which may cause issues when querying "
"instances from the API: %s. Depending on how nova-api "
"is configured, this may not be a problem, but is worth "
"investigating and potentially cleaning up those older "
"records. See "
"https://bugs.launchpad.net/nova/+bug/1759316 for "
"details.") % ', '.join(cells_with_old_api_services))
return UpgradeCheckResult(UpgradeCheckCode.WARNING, msg)
return UpgradeCheckResult(UpgradeCheckCode.SUCCESS)
# The format of the check functions is to return an UpgradeCheckResult
# object with the appropriate UpgradeCheckCode and details set. If the
# check hits warnings or failures then those should be stored in the
@ -455,7 +524,9 @@ class UpgradeCommands(object):
# Added in Ocata
(_('Resource Providers'), _check_resource_providers),
# Added in Rocky (but also useful going back to Pike)
(_('Ironic Flavor Migration'), _check_ironic_flavor_migration)
(_('Ironic Flavor Migration'), _check_ironic_flavor_migration),
# Added in Rocky (but is backportable to Ocata)
(_('API Service Version'), _check_api_service_version)
)
def _get_details(self, upgrade_check_result):

View File

@ -2269,6 +2269,22 @@ class API(base.Base):
# merged replica instead of the cell directly, so fall through
# here in that case as well.
if service_version < 15 or CONF.cells.enable:
# If not using cells v1, we need to log a warning about the API
# service version being less than 15 (that check was added in
# newton), which indicates there is some lingering data during the
# transition to cells v2 which could cause an InstanceNotFound
# here. The warning message is a sort of breadcrumb.
# This can all go away once we drop cells v1 and assert that all
# deployments have upgraded from a base cells v2 setup with
# mappings.
if not CONF.cells.enable:
LOG.warning('The nova-osapi_compute service version is from '
'before Ocata and may cause problems looking up '
'instances in a cells v2 setup. Check your '
'nova-api service configuration and cell '
'mappings. You may need to remove stale '
'nova-osapi_compute service records from the cell '
'database.')
return objects.Instance.get_by_uuid(context, instance_uuid,
expected_attrs=expected_attrs)
inst_map = self._get_instance_map_or_none(context, instance_uuid)

View File

@ -813,3 +813,107 @@ class TestUpgradeCheckIronicFlavorMigration(test.NoDBTestCase):
for cell_id in
sorted(unmigrated_instance_count_by_cell.keys())),
result.details)
class TestUpgradeCheckAPIServiceVersion(test.NoDBTestCase):
"""Tests for the nova-status upgrade API service version specific check."""
# We'll setup the database ourselves because we need to use cells fixtures
# for multiple cell mappings.
USES_DB_SELF = True
# This will create three cell mappings: cell0, cell1 (default) and cell2
NUMBER_OF_CELLS = 2
def setUp(self):
super(TestUpgradeCheckAPIServiceVersion, self).setUp()
self.output = StringIO()
self.useFixture(fixtures.MonkeyPatch('sys.stdout', self.output))
self.useFixture(nova_fixtures.Database(database='api'))
self.cmd = status.UpgradeCommands()
def test_check_cells_v1_enabled(self):
"""This is a 'success' case since the API service version check is
ignored when running cells v1.
"""
self.flags(enable=True, group='cells')
result = self.cmd._check_api_service_version()
self.assertEqual(status.UpgradeCheckCode.SUCCESS, result.code)
def test_check_no_cell_mappings_warning(self):
"""Warn when there are no cell mappings."""
result = self.cmd._check_api_service_version()
self.assertEqual(status.UpgradeCheckCode.WARNING, result.code)
self.assertEqual('Unable to determine API service versions without '
'cell mappings.', result.details)
@staticmethod
def _create_service(ctxt, host, binary, version):
svc = objects.Service(ctxt, host=host, binary=binary)
svc.version = version
svc.create()
return svc
def test_check_warning(self):
"""This is a failure scenario where we have the following setup:
Three cells where:
1. The first cell has two API services, one with version < 15 and one
with version >= 15.
2. The second cell has two services, one with version < 15 but it's
deleted so it gets filtered out, and one with version >= 15.
3. The third cell doesn't have any API services, just old compute
services which should be filtered out.
In this scenario, the first cell should be reported with a warning.
"""
self._setup_cells()
ctxt = context.get_admin_context()
cell0 = self.cell_mappings['cell0']
with context.target_cell(ctxt, cell0) as cctxt:
self._create_service(cctxt, host='cell0host1',
binary='nova-osapi_compute', version=14)
self._create_service(cctxt, host='cell0host2',
binary='nova-osapi_compute', version=15)
cell1 = self.cell_mappings['cell1']
with context.target_cell(ctxt, cell1) as cctxt:
svc = self._create_service(
cctxt, host='cell1host1', binary='nova-osapi_compute',
version=14)
# This deleted record with the old version should get filtered out.
svc.destroy()
self._create_service(cctxt, host='cell1host2',
binary='nova-osapi_compute', version=16)
cell2 = self.cell_mappings['cell2']
with context.target_cell(ctxt, cell2) as cctxt:
self._create_service(cctxt, host='cell2host1',
binary='nova-compute', version=14)
result = self.cmd._check_api_service_version()
self.assertEqual(status.UpgradeCheckCode.WARNING, result.code)
# The only cell in the message should be cell0.
self.assertIn(cell0.uuid, result.details)
self.assertNotIn(cell1.uuid, result.details)
self.assertNotIn(cell2.uuid, result.details)
def test_check_success(self):
"""Tests the success scenario where we have cell0 with a current API
service, cell1 with no API services, and an empty cell2.
"""
self._setup_cells()
ctxt = context.get_admin_context()
cell0 = self.cell_mappings['cell0']
with context.target_cell(ctxt, cell0) as cctxt:
self._create_service(cctxt, host='cell0host1',
binary='nova-osapi_compute', version=15)
cell1 = self.cell_mappings['cell1']
with context.target_cell(ctxt, cell1) as cctxt:
self._create_service(cctxt, host='cell1host1',
binary='nova-compute', version=15)
result = self.cmd._check_api_service_version()
self.assertEqual(status.UpgradeCheckCode.SUCCESS, result.code)

View File

@ -0,0 +1,13 @@
---
upgrade:
- |
A new check is added to ``nova-status upgrade check`` which will scan
all cells looking for ``nova-osapi_compute`` service versions which are
from before Ocata and which may cause issues with how the compute API
finds instances. This will result in a warning if:
* No cell mappings are found
* The minimum ``nova-osapi_compute`` service version is less than 15 in
any given cell
See https://bugs.launchpad.net/nova/+bug/1759316 for more details.