Add fill_virtual_interface_list online_data_migration script

In change [1] we modified _heal_instance_info_cache periodic task
to use Neutron point of view while rebuilding InstanceInfoCache
objects.
The crucial point was how we know the previous order of ports, if
the cache was broken. We decided to use VirtualInterfaceList objects
as source of port order.
For instances older than Newton VirtualInterface objects doesn't
exist, so we need to introduce a way of creating it.
This script should be executed while upgrading to Stein release.

[1] https://review.openstack.org/#/c/591607

Change-Id: Ic26d4ce3d071691a621d3c925dc5cd436b2005f1
Related-Bug: 1751923
This commit is contained in:
Maciej Jozefczyk 2018-10-30 09:58:30 +00:00 committed by Maciej Józefczyk
parent 8ef3d253a0
commit 3534471c57
4 changed files with 562 additions and 0 deletions

View File

@ -66,6 +66,7 @@ from nova.objects import instance as instance_obj
from nova.objects import instance_mapping as instance_mapping_obj
from nova.objects import keypair as keypair_obj
from nova.objects import quotas as quotas_obj
from nova.objects import virtual_interface as virtual_interface_obj
from nova import quota
from nova import rpc
from nova.scheduler.client import report
@ -416,6 +417,8 @@ class DbCommands(object):
instance_mapping_obj.populate_queued_for_delete,
# Added in Stein
compute_node_obj.migrate_empty_ratio,
# Added in Stein
virtual_interface_obj.fill_virtual_interface_list,
)
def __init__(self):

View File

@ -12,16 +12,22 @@
# License for the specific language governing permissions and limitations
# under the License.
from oslo_log import log as logging
from oslo_utils import versionutils
from nova import context as nova_context
from nova.db import api as db
from nova.db.sqlalchemy import api as db_api
from nova.db.sqlalchemy import models
from nova import exception
from nova import objects
from nova.objects import base
from nova.objects import fields
LOG = logging.getLogger(__name__)
VIF_OPTIONAL_FIELDS = ['network_id']
FAKE_UUID = '00000000-0000-0000-0000-000000000000'
@base.NovaObjectRegistry.register
@ -142,3 +148,177 @@ class VirtualInterfaceList(base.ObjectListBase, base.NovaObject):
context, instance_uuid, use_slave=use_slave)
return base.obj_make_list(context, cls(context),
objects.VirtualInterface, db_vifs)
@db_api.api_context_manager.writer
def fill_virtual_interface_list(context, max_count):
"""This fills missing VirtualInterface Objects in Nova DB"""
count_hit = 0
count_all = 0
def _regenerate_vif_list_base_on_cache(context,
instance,
old_vif_list,
nw_info):
# Set old VirtualInterfaces as deleted.
for vif in old_vif_list:
vif.destroy()
# Generate list based on current cache:
for vif in nw_info:
vif_obj = objects.VirtualInterface(context)
vif_obj.uuid = vif['id']
vif_obj.address = "%s/%s" % (vif['address'], vif['id'])
vif_obj.instance_uuid = instance['uuid']
# Find tag from previous VirtualInterface object if exist.
old_vif = [x for x in old_vif_list if x.uuid == vif['id']]
vif_obj.tag = old_vif[0].tag if len(old_vif) > 0 else None
vif_obj.create()
cells = objects.CellMappingList.get_all(context)
for cell in cells:
if count_all == max_count:
# We reached the limit of checked instances per
# this function run.
# Stop, do not go to other cell.
break
with nova_context.target_cell(context, cell) as cctxt:
marker = _get_marker_for_migrate_instances(cctxt)
filters = {'deleted': False}
# Adjust the limit of migrated instances.
# If user wants to process a total of 100 instances
# and we did a 75 in cell1, then we only need to
# verify 25 more in cell2, no more.
adjusted_limit = max_count - count_all
instances = objects.InstanceList.get_by_filters(
cctxt,
filters=filters,
sort_key='created_at',
sort_dir='asc',
marker=marker,
limit=adjusted_limit)
for instance in instances:
# We don't want to fill vif for FAKE instance.
if instance.uuid == FAKE_UUID:
continue
try:
info_cache = objects.InstanceInfoCache.\
get_by_instance_uuid(cctxt, instance.get('uuid'))
if not info_cache.network_info:
LOG.info('InstanceInfoCache object has not set '
'NetworkInfo field. '
'Skipping build of VirtualInterfaceList.')
continue
except exception.InstanceInfoCacheNotFound:
LOG.info('Instance has no InstanceInfoCache object. '
'Skipping build of VirtualInterfaceList for it.')
continue
# It by design filters out deleted vifs.
vif_list = VirtualInterfaceList.\
get_by_instance_uuid(cctxt, instance.get('uuid'))
nw_info = info_cache.network_info
# This should be list with proper order of vifs,
# but we're not sure about that.
cached_vif_ids = [vif['id'] for vif in nw_info]
# This is ordered list of vifs taken from db.
db_vif_ids = [vif.uuid for vif in vif_list]
count_all += 1
if cached_vif_ids == db_vif_ids:
# The list of vifs and its order in cache and in
# virtual_interfaces is the same. So we could end here.
continue
elif len(db_vif_ids) < len(cached_vif_ids):
# Seems to be an instance from release older than
# Newton and we don't have full VirtualInterfaceList for
# it. Rewrite whole VirtualInterfaceList using interface
# order from InstanceInfoCache.
count_hit += 1
LOG.info('Got an instance %s with less VIFs defined in DB '
'than in cache. Could be Pre-Newton instance. '
'Building new VirtualInterfaceList for it.',
instance.uuid)
_regenerate_vif_list_base_on_cache(cctxt,
instance,
vif_list,
nw_info)
elif len(db_vif_ids) > len(cached_vif_ids):
# Seems vif list is inconsistent with cache.
# it could be a broken cache or interface
# during attach. Do nothing.
LOG.info('Got an unexpected number of VIF records in the '
'database compared to what was stored in the '
'instance_info_caches table for instance %s. '
'Perhaps it is an instance during interface '
'attach. Do nothing.', instance.uuid)
continue
else:
# The order is different between lists.
# We need a source of truth, so rebuild order
# from cache.
count_hit += 1
LOG.info('Got an instance %s with different order of '
'VIFs between DB and cache. '
'We need a source of truth, so rebuild order '
'from cache.', instance.uuid)
_regenerate_vif_list_base_on_cache(cctxt,
instance,
vif_list,
nw_info)
# Set marker to point last checked instance.
if instances:
marker = instances[-1].uuid
_set_or_delete_marker_for_migrate_instances(cctxt, marker)
return count_all, count_hit
# NOTE(mjozefcz): This is similiar to marker mechanism made for
# RequestSpecs object creation.
# Since we have a lot of instances to be check this
# will add a FAKE row that points to last instance
# we checked.
# Please notice that because of virtual_interfaces_instance_uuid_fkey
# we need to have FAKE_UUID instance object, even deleted one.
@db_api.pick_context_manager_writer
def _set_or_delete_marker_for_migrate_instances(context, marker=None):
context.session.query(models.VirtualInterface).filter_by(
instance_uuid=FAKE_UUID).delete()
# Create FAKE_UUID instance objects, only for marker, if doesn't exist.
# It is needed due constraint: virtual_interfaces_instance_uuid_fkey
instance = context.session.query(models.Instance).filter_by(
uuid=FAKE_UUID).first()
if not instance:
instance = objects.Instance(context)
instance.uuid = FAKE_UUID
instance.project_id = FAKE_UUID
instance.create()
# Thats fake instance, lets destroy it.
# We need only its row to solve constraint issue.
instance.destroy()
if marker is not None:
# ... but there can be a new marker to set
db_mapping = objects.VirtualInterface(context)
db_mapping.instance_uuid = FAKE_UUID
db_mapping.uuid = FAKE_UUID
db_mapping.tag = marker
db_mapping.address = 'ff:ff:ff:ff:ff:ff/%s' % FAKE_UUID
db_mapping.create()
@db_api.pick_context_manager_reader
def _get_marker_for_migrate_instances(context):
vif = (context.session.query(models.VirtualInterface).filter_by(
instance_uuid=FAKE_UUID)).first()
marker = vif['tag'] if vif else None
return marker

View File

@ -0,0 +1,369 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import datetime
import mock
from oslo_config import cfg
from oslo_utils import timeutils
from nova import context
from nova import exception
from nova.network import model as network_model
from nova import objects
from nova.objects import virtual_interface
from nova.tests.functional import integrated_helpers
from nova.tests.unit import fake_network
CONF = cfg.CONF
FAKE_UUID = '00000000-0000-0000-0000-000000000000'
def _delete_vif_list(context, instance_uuid):
vif_list = objects.VirtualInterfaceList.\
get_by_instance_uuid(context, instance_uuid)
# Set old VirtualInterfaces as deleted.
for vif in vif_list:
vif.destroy()
def _verify_list_fulfillment(context, instance_uuid):
try:
info_cache = objects.InstanceInfoCache.\
get_by_instance_uuid(context, instance_uuid)
except exception.InstanceInfoCacheNotFound:
info_cache = []
vif_list = objects.VirtualInterfaceList.\
get_by_instance_uuid(context, instance_uuid)
vif_list = filter(lambda x: not x.deleted,
vif_list)
cached_vif_ids = [vif['id'] for vif in info_cache.network_info]
db_vif_ids = [vif.uuid for vif in vif_list]
return cached_vif_ids == db_vif_ids
class VirtualInterfaceListMigrationTestCase(
integrated_helpers._IntegratedTestBase,
integrated_helpers.InstanceHelperMixin):
ADMIN_API = True
USE_NEUTRON = True
api_major_version = 'v2.1'
_image_ref_parameter = 'imageRef'
_flavor_ref_parameter = 'flavorRef'
def setUp(self):
super(VirtualInterfaceListMigrationTestCase, self).setUp()
self.context = context.get_admin_context()
fake_network.set_stub_network_methods(self)
self.cells = objects.CellMappingList.get_all(self.context)
compute_cell0 = self.start_service(
'compute', host='compute2', cell='cell0')
self.computes = [compute_cell0, self.compute]
self.instances = []
def _create_instances(self, pre_newton=2, deleted=0, total=5,
target_cell=None):
if not target_cell:
target_cell = self.cells[1]
instances = []
with context.target_cell(self.context, target_cell) as cctxt:
flav_dict = objects.Flavor._flavor_get_from_db(cctxt, 1)
flavor = objects.Flavor(**flav_dict)
for i in range(0, total):
inst = objects.Instance(
context=cctxt,
project_id=self.api.project_id,
user_id=FAKE_UUID,
vm_state='active',
flavor=flavor,
created_at=datetime.datetime(1985, 10, 25, 1, 21, 0),
launched_at=datetime.datetime(1985, 10, 25, 1, 22, 0),
host=self.computes[0].host,
hostname='%s-inst%i' % (target_cell.name, i))
inst.create()
info_cache = objects.InstanceInfoCache(context=cctxt)
info_cache.updated_at = timeutils.utcnow()
info_cache.network_info = network_model.NetworkInfo()
info_cache.instance_uuid = inst.uuid
info_cache.save()
instances.append(inst)
im = objects.InstanceMapping(context=cctxt,
project_id=inst.project_id,
user_id=inst.user_id,
instance_uuid=inst.uuid,
cell_mapping=target_cell)
im.create()
# Attach fake interfaces to instances
network_id = list(self.neutron._networks.keys())[0]
for i in range(0, len(instances)):
for k in range(0, 4):
self.api.attach_interface(instances[i].uuid,
{"interfaceAttachment": {"net_id": network_id}})
with context.target_cell(self.context, target_cell) as cctxt:
# Fake the pre-newton behaviour by removing the
# VirtualInterfacesList objects.
if pre_newton:
for i in range(0, pre_newton):
_delete_vif_list(cctxt, instances[i].uuid)
if deleted:
# Delete from the end of active instances list
for i in range(total - deleted, total):
instances[i].destroy()
self.instances += instances
def test_migration_nothing_to_migrate(self):
"""This test when there already populated VirtualInterfaceList
objects for created instances.
"""
self._create_instances(pre_newton=0, total=5)
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 5)
self.assertEqual(5, match)
self.assertEqual(0, done)
def test_migration_verify_max_count(self):
"""This verifies if max_count is respected to avoid migration
of bigger set of data, than user specified.
"""
self._create_instances(pre_newton=0, total=3)
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 2)
self.assertEqual(2, match)
self.assertEqual(0, done)
def test_migration_do_not_step_to_next_cell(self):
"""This verifies if script doesn't step into next cell
when max_count is reached.
"""
# Create 2 instances in cell0
self._create_instances(
pre_newton=0, total=2, target_cell=self.cells[0])
# Create 2 instances in cell1
self._create_instances(
pre_newton=0, total=2, target_cell=self.cells[1])
with mock.patch('nova.objects.InstanceList.get_by_filters',
side_effect=[self.instances[0:2],
self.instances[2:]]) \
as mock_get:
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 2)
self.assertEqual(2, match)
self.assertEqual(0, done)
mock_get.assert_called_once()
def test_migration_pre_newton_instances(self):
"""This test when there is an instance created in release
older than Newton. For those instances the VirtualInterfaceList
needs to be re-created from cache.
"""
# Lets spawn 3 pre-newton instances and 2 new ones
self._create_instances(pre_newton=3, total=5)
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 5)
self.assertEqual(5, match)
self.assertEqual(3, done)
# Make sure we ran over all the instances - verify if marker works
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 50)
self.assertEqual(0, match)
self.assertEqual(0, done)
for i in range(0, 5):
_verify_list_fulfillment(self.context, self.instances[i].uuid)
def test_migration_pre_newton_instance_new_vifs(self):
"""This test when instance was created before Newton
but in meantime new interfaces where attached and
VirtualInterfaceList is not populated.
"""
self._create_instances(pre_newton=0, total=1)
vif_list = objects.VirtualInterfaceList.get_by_instance_uuid(
self.context, self.instances[0].uuid)
# Drop first vif from list to pretend old instance
vif_list[0].destroy()
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 5)
# The whole VirtualInterfaceList should be rewritten and base
# on cache.
self.assertEqual(1, match)
self.assertEqual(1, done)
_verify_list_fulfillment(self.context, self.instances[0].uuid)
def test_migration_attach_in_progress(self):
"""This test when number of vifs (db) is bigger than
number taken from network cache. Potential
port-attach is taking place.
"""
self._create_instances(pre_newton=0, total=1)
instance_info_cache = objects.InstanceInfoCache.get_by_instance_uuid(
self.context, self.instances[0].uuid)
# Delete last interface to pretend that's still in progress
instance_info_cache.network_info.pop()
instance_info_cache.updated_at = datetime.datetime(2015, 1, 1)
instance_info_cache.save()
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 5)
# I don't know whats going on so instance VirtualInterfaceList
# should stay untouched.
self.assertEqual(1, match)
self.assertEqual(0, done)
def test_migration_empty_network_info(self):
"""This test if migration is not executed while
NetworkInfo is empty, like instance without
interfaces attached.
"""
self._create_instances(pre_newton=0, total=1)
instance_info_cache = objects.InstanceInfoCache.get_by_instance_uuid(
self.context, self.instances[0].uuid)
# Clean NetworkInfo. Pretend instance without interfaces.
instance_info_cache.network_info = None
instance_info_cache.save()
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 5)
self.assertEqual(0, match)
self.assertEqual(0, done)
def test_migration_inconsistent_data(self):
"""This test when vif (db) are in completely different
comparing to network cache and we don't know how to
deal with it. It's the corner-case.
"""
self._create_instances(pre_newton=0, total=1)
instance_info_cache = objects.InstanceInfoCache.get_by_instance_uuid(
self.context, self.instances[0].uuid)
# Change order of interfaces in NetworkInfo to fake
# inconsistency between cache and db.
nwinfo = instance_info_cache.network_info
interface = nwinfo.pop()
nwinfo.insert(0, interface)
instance_info_cache.updated_at = datetime.datetime(2015, 1, 1)
instance_info_cache.network_info = nwinfo
# Update the cache
instance_info_cache.save()
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 5)
# Cache is corrupted, so must be rewritten
self.assertEqual(1, match)
self.assertEqual(1, done)
def test_migration_dont_touch_deleted_objects(self):
"""This test if deleted instances are skipped
during migration.
"""
self._create_instances(
pre_newton=1, deleted=1, total=3)
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 4)
self.assertEqual(2, match)
self.assertEqual(1, done)
def test_migration_multiple_cells(self):
"""This test if marker and max_rows limit works properly while
running in multi-cell environment.
"""
# Create 2 instances in cell0
self._create_instances(
pre_newton=1, total=2, target_cell=self.cells[0])
# Create 4 instances in cell1
self._create_instances(
pre_newton=3, total=5, target_cell=self.cells[1])
# Fill vif list limiting to 4 instances - it should
# touch cell0 and cell1 instances (migrate 3 due 1 is post newton).
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 4)
self.assertEqual(4, match)
self.assertEqual(3, done)
# Try again - should fill 3 left instances from cell1
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 4)
self.assertEqual(3, match)
self.assertEqual(1, done)
# Try again - should be nothing to migrate
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 4)
self.assertEqual(0, match)
self.assertEqual(0, done)
def test_migration_multiple_cells_new_instances_in_meantime(self):
"""This test if marker is created per-cell and we're able to
verify instanced that were added in meantime.
"""
# Create 2 instances in cell0
self._create_instances(
pre_newton=1, total=2, target_cell=self.cells[0])
# Create 2 instances in cell1
self._create_instances(
pre_newton=1, total=2, target_cell=self.cells[1])
# Migrate instances in both cells.
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 4)
self.assertEqual(4, match)
self.assertEqual(2, done)
# Add new instances to cell1
self._create_instances(
pre_newton=0, total=2, target_cell=self.cells[1])
# Try again, should find instances in cell1
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 4)
self.assertEqual(2, match)
self.assertEqual(0, done)
# Try again - should be nothing to migrate
match, done = virtual_interface.fill_virtual_interface_list(
self.context, 4)
self.assertEqual(0, match)
self.assertEqual(0, done)

View File

@ -0,0 +1,10 @@
---
upgrade:
- The ``nova-manage db online_data_migrations`` command
will now fill missing ``virtual_interfaces`` records for instances
created before the Newton release. This is related to a fix for
https://launchpad.net/bugs/1751923 which makes the
_heal_instance_info_cache periodic task in the ``nova-compute``
service regenerate an instance network info cache from the current
neutron port list, and the VIFs from the database are needed to
maintain the port order for the instance.