[OVN] Remove SB "Chassis"/"Chassis_Private" duplicated registers

A new OVN maintenance method is added. This method lists all existing
OVN SB Chassis registers and checks if any of them has the same
hostname. In case of having duplicated "Chassis"/"Chassis_Private"
registers, the maintenance method will remove those with older
(lower) timestamp, that is stored in
"Chassis_Private.nb_cfg_timestamp", leaving only the newer one.

Closes-Bug: #2016158
Change-Id: Ib3c6f0dc01efd31430691e720ba23ccb4ede65fa
This commit is contained in:
Rodolfo Alonso Hernandez 2023-06-09 16:18:09 +00:00 committed by Rodolfo Alonso
parent 8887cdf5d3
commit 9d9f47c20c
3 changed files with 132 additions and 0 deletions

View File

@ -1030,6 +1030,63 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase):
from_reload=True)
raise periodics.NeverAgain()
@periodics.periodic(spacing=300, run_immediately=True)
def remove_duplicated_chassis_registers(self):
"""Remove the "Chassis" and "Chassis_Private" duplicated registers.
When the ovn-controller service of a node is updated and the system-id
is changed, if the old service is not stopped gracefully, it will leave
a "Chassis" and a "Chassis_Private" registers on the OVN SB database.
These leftovers must be removed.
NOTE: this method is executed every 5 minutes. If a new chassis is
added, this method will perform again the clean-up process.
NOTE: this method can be executed only if the OVN SB has the
"Chassis_Private" table. Otherwise, is not possible to find out which
register is newer and thus must be kept in the database.
"""
if not self._sb_idl.is_table_present('Chassis_Private'):
raise periodics.NeverAgain()
if not self.has_lock:
return
# dup_chassis_port_host = {host_name: [(ch1, ch_private1),
# (ch2, ch_private2), ... ]}
dup_chassis_port_host = {}
chassis = self._sb_idl.chassis_list().execute(check_error=True)
chassis_hostnames = {ch.hostname for ch in chassis}
# Find the duplicated "Chassis" and "Chassis_Private" registers,
# comparing the hostname.
for hostname in chassis_hostnames:
ch_list = []
# Find these chassis matching the hostname and create a list.
for ch in (ch for ch in chassis if ch.hostname == hostname):
ch_private = self._sb_idl.lookup('Chassis_Private', ch.name,
default=None)
if ch_private:
ch_list.append((ch, ch_private))
# If the chassis list > 1, then we have duplicated chassis.
if len(ch_list) > 1:
# Order ch_list by Chassis_Private.nb_cfg_timestamp, from newer
# (greater value) to older.
ch_list.sort(key=lambda x: x[1].nb_cfg_timestamp, reverse=True)
dup_chassis_port_host[hostname] = ch_list
if not dup_chassis_port_host:
return
# Remove the "Chassis" and "Chassis_Private" registers with the
# older Chassis_Private.nb_cfg_timestamp.
with self._sb_idl.transaction(check_error=True) as txn:
for ch_list in dup_chassis_port_host.values():
# The first item is skipped, this is the newest element.
for ch, ch_private in ch_list[1:]:
for table in ('Chassis_Private', 'Chassis'):
txn.add(self._sb_idl.db_destroy(table, ch.name))
class HashRingHealthCheckPeriodics(object):

View File

@ -972,6 +972,64 @@ class TestMaintenance(_TestMaintenanceHelper):
lsp = self.nb_api.lookup('Logical_Switch_Port', p1['id'])
self.assertEqual(hcg_uuid, lsp.ha_chassis_group[0].uuid)
def test_remove_duplicated_chassis_registers(self):
hostnames = ['host1', 'host2']
for hostname in hostnames:
for _ in range(3):
self.add_fake_chassis(hostname)
chassis = self.sb_api.chassis_list().execute(check_error=True)
self.assertEqual(6, len(chassis))
# Make the chassis private timestamp different
for idx, ch in enumerate(chassis):
self.sb_api.db_set('Chassis_Private', ch.name,
('nb_cfg_timestamp', idx)).execute()
ch_private_dict = {} # host: [ch_private1, ch_private2, ...]
for hostname in hostnames:
ch_private_list = []
for ch in (ch for ch in chassis if ch.hostname == hostname):
ch_private = self.sb_api.lookup('Chassis_Private', ch.name,
default=None)
if ch_private:
# One of the "Chassis_Private" has been deleted on purpose
# in this test.
ch_private_list.append(ch_private)
ch_private_list.sort(key=lambda x: x.nb_cfg_timestamp,
reverse=True)
ch_private_dict[hostname] = ch_private_list
self.maint.remove_duplicated_chassis_registers()
chassis_result = self.sb_api.chassis_list().execute(check_error=True)
self.assertEqual(2, len(chassis_result))
for ch in chassis_result:
self.assertIn(ch.hostname, hostnames)
hostnames.remove(ch.hostname)
# From ch_private_dict[ch.hostname], we retrieve the first
# "Chassis_Private" register because these are ordered by
# timestamp. The newer one (bigger timestamp) should remain in the
# system.
ch_expected = ch_private_dict[ch.hostname][0].chassis[0]
self.assertEqual(ch_expected.name, ch.name)
def test_remove_duplicated_chassis_registers_no_ch_private_register(self):
for _ in range(2):
self.add_fake_chassis('host1')
chassis = self.sb_api.chassis_list().execute(check_error=True)
self.assertEqual(2, len(chassis))
# Make the chassis private timestamp different
# Delete on of the "Chassis_Private" registers.
self.sb_api.db_destroy('Chassis_Private', chassis[0].name).execute()
self.sb_api.db_set('Chassis_Private', chassis[1].name,
('nb_cfg_timestamp', 1)).execute()
self.maint.remove_duplicated_chassis_registers()
chassis_result = self.sb_api.chassis_list().execute(check_error=True)
# Both "Chassis" registers are still in the DB because one
# "Chassis_Private" register was missing.
self.assertEqual(2, len(chassis_result))
class TestLogMaintenance(_TestMaintenanceHelper,
test_log_driver.LogApiTestCaseBase):

View File

@ -0,0 +1,17 @@
---
issues:
- |
When using ML2/OVN, during an upgrade procedure, the OVS system-id stored
value can be changed. The ovn-controller service will create the "Chassis"
and "Chassis_Private" registers based on this OVS system-id. If the
ovn-controller process is not gracefully stopped, that could lead to the
existence of duplicated "Chassis" and "Chassis_Private" registers in the
OVN Southbound database.
fixes:
- |
A new OVN maintenance method ``remove_duplicated_chassis_registers`` is
added. This method will periodically check the OVN Southbound "Chassis"
and "Chassis_Private" tables looking for duplicated registers. The older
ones (based on the "Chassis_Private.nb_cfg_timestamp" value) will be
removed when more than one register has the same hostname, that should
be unique.