From 3b873932be005eb6ec2b979fe72a92ece4fe905b Mon Sep 17 00:00:00 2001 From: Corey Bryant Date: Fri, 6 Oct 2017 17:37:04 +0000 Subject: [PATCH] Add nova-manage failure verbosity and clean up Cells V2 code Update all nova-manage commands to use subprocess.check_output() and log subprocess.CalledProcessError.output on failure. This will help capture nova-manage error details on first failure. Specify cell1 uuid on discover_hosts call. This doesn't change behavior, it is just more explicit and useful if we move to multiple cells in the future. Introduce an is_cellv2_init_ready() function that uses contexts to check if cells_v2 is ready to initialize. This cleans up the corresponding TODOs. Move checks for cell v2 init readiness to update_cell_db_if_ready(), also cleaning up corresponding TODOs. Change-Id: I313edce84d3d249031e020a4fbb4baf216c01ddb Related-Bug: 1720846 --- hooks/nova_cc_context.py | 19 ++++ hooks/nova_cc_hooks.py | 29 +++--- hooks/nova_cc_utils.py | 142 +++++++++++++++++----------- unit_tests/test_nova_cc_contexts.py | 13 +++ unit_tests/test_nova_cc_hooks.py | 21 +++- unit_tests/test_nova_cc_utils.py | 60 +++++++++--- 6 files changed, 195 insertions(+), 89 deletions(-) diff --git a/hooks/nova_cc_context.py b/hooks/nova_cc_context.py index 8c416827..18240512 100644 --- a/hooks/nova_cc_context.py +++ b/hooks/nova_cc_context.py @@ -88,6 +88,25 @@ class NovaCellContext(context.OSContextGenerator): return {} +class NovaCellV2SharedDBContext(context.OSContextGenerator): + interfaces = ['shared-db'] + + def __call__(self): + log('Generating template context for cell v2 share-db') + ctxt = {} + for rid in relation_ids('shared-db'): + for unit in related_units(rid): + rdata = relation_get(rid=rid, unit=unit) + ctxt = { + 'novaapi_password': rdata.get('novaapi_password'), + 'novacell0_password': rdata.get('novacell0_password'), + 'nova_password': rdata.get('nova_password'), + } + if context.context_complete(ctxt): + return ctxt + return {} + + class CloudComputeContext(context.OSContextGenerator): "Dummy context used by service status to check relation exists" interfaces = ['nova-compute'] diff --git a/hooks/nova_cc_hooks.py b/hooks/nova_cc_hooks.py index 11783ad1..658d7c73 100755 --- a/hooks/nova_cc_hooks.py +++ b/hooks/nova_cc_hooks.py @@ -97,6 +97,7 @@ from nova_cc_utils import ( enable_services, git_install, is_api_ready, + is_cellv2_init_ready, keystone_ca_cert_b64, migrate_nova_databases, placement_api_enabled, @@ -231,6 +232,9 @@ def update_cell_db_if_ready(skip_acl_check=False, db_rid=None, unit=None): log("Database not initialised - skipping cell db update", level=DEBUG) return + if not is_cellv2_init_ready(): + return + allowed_units = relation_get('nova_allowed_units', rid=db_rid, unit=unit) if skip_acl_check or (allowed_units and local_unit() in allowed_units.split()): @@ -373,13 +377,11 @@ def amqp_changed(): log('amqp relation incomplete. Peer not ready?') return CONFIGS.write(NOVA_CONF) - # TODO: Replace the following check with a Cellsv2 context check. - if CompareOpenStackReleases(os_release('nova-common')) >= 'ocata': - # db init for cells v2 requires amqp transport_url and db connections - # to be set in nova.conf, so we attempt db init in here as well as the - # db relation-changed hooks. - leader_init_db_if_ready_allowed_units() - update_cell_db_if_ready_allowed_units() + leader_init_db_if_ready_allowed_units() + # db init for cells v2 requires amqp transport_url and db connections + # to be set in nova.conf, so we attempt db init in here as well as the + # db relation-changed hooks. + update_cell_db_if_ready_allowed_units() [nova_cell_relation_joined(rid=rid) for rid in relation_ids('cell')] @@ -472,12 +474,11 @@ def db_changed(): return CONFIGS.write_all() + leader_init_db_if_ready() # db init for cells v2 requires amqp transport_url and db connections to # be set in nova.conf, so we attempt db init in here as well as the # amqp-relation-changed hook. - leader_init_db_if_ready() - if CompareOpenStackReleases(os_release('nova-common')) >= 'ocata': - update_cell_db_if_ready() + update_cell_db_if_ready() @hooks.hook('pgsql-nova-db-relation-changed') @@ -491,8 +492,7 @@ def postgresql_nova_db_changed(): CONFIGS.write_all() leader_init_db_if_ready(skip_acl_check=True, skip_cells_restarts=True) - if CompareOpenStackReleases(os_release('nova-common')) >= 'ocata': - update_cell_db_if_ready(skip_acl_check=True) + update_cell_db_if_ready(skip_acl_check=True) for r_id in relation_ids('nova-api'): nova_api_relation_joined(rid=r_id) @@ -708,7 +708,7 @@ def compute_changed(rid=None, unit=None): if not rel_settings.get('region', None) == config('region'): relation_set(relation_id=rid, region=config('region')) - if is_db_initialised(): + if is_cellv2_init_ready() and is_db_initialised(): add_hosts_to_cell() if 'migration_auth_type' not in rel_settings: @@ -931,8 +931,7 @@ def ha_changed(): active=config('service-guard')) def db_departed(): CONFIGS.write_all() - if CompareOpenStackReleases(os_release('nova-common')) >= 'ocata': - update_cell_db_if_ready(skip_acl_check=True) + update_cell_db_if_ready(skip_acl_check=True) for r_id in relation_ids('cluster'): relation_set(relation_id=r_id, dbsync_state='incomplete') disable_services() diff --git a/hooks/nova_cc_utils.py b/hooks/nova_cc_utils.py index e737f6e2..31d14b6f 100644 --- a/hooks/nova_cc_utils.py +++ b/hooks/nova_cc_utils.py @@ -75,7 +75,6 @@ from charmhelpers.core.hookenv import ( charm_dir, config, is_leader, - is_relation_made, log, relation_get, relation_ids, @@ -599,6 +598,23 @@ def is_db_initialised(): return False +def is_cellv2_init_ready(): + """Determine if we're ready to initialize the cell v2 databases + + Cells v2 init requires transport_url and database connections to be set + in nova.conf. + """ + amqp = context.AMQPContext() + shared_db = nova_cc_context.NovaCellV2SharedDBContext() + if (CompareOpenStackReleases(os_release('nova-common')) >= 'ocata' and + amqp() and shared_db()): + return True + + log("OpenStack release, database, or rabbitmq not ready for Cells V2", + level=DEBUG) + return False + + def _do_openstack_upgrade(new_src): enable_policy_rcd() # All upgrades to Liberty are forced to step through Kilo. Liberty does @@ -695,7 +711,11 @@ def migrate_nova_flavors(): '''Runs nova-manage to migrate flavor data if needed''' log('Migrating nova flavour information in database.', level=INFO) cmd = ['nova-manage', 'db', 'migrate_flavor_data'] - subprocess.check_output(cmd) + try: + subprocess.check_output(cmd) + except subprocess.CalledProcessError as e: + log('migrate_flavor_data failed\n{}'.format(e.output), level=ERROR) + raise @retry_on_exception(5, base_delay=3, exc_type=subprocess.CalledProcessError) @@ -704,30 +724,38 @@ def online_data_migrations_if_needed(): if (is_leader() and CompareOpenStackReleases(os_release('nova-common')) >= 'mitaka'): log('Running online_data_migrations', level=INFO) - subprocess.check_output(['nova-manage', 'db', - 'online_data_migrations']) + cmd = ['nova-manage', 'db', 'online_data_migrations'] + try: + subprocess.check_output(cmd) + except subprocess.CalledProcessError as e: + log('online_data_migrations failed\n{}'.format(e.output), + level=ERROR) + raise def migrate_nova_api_database(): '''Initialize or migrate the nova_api database''' if CompareOpenStackReleases(os_release('nova-common')) >= 'mitaka': + log('Migrating the nova-api database.', level=INFO) + cmd = ['nova-manage', 'api_db', 'sync'] try: - log('Migrating the nova-api database.', level=INFO) - cmd = ['nova-manage', 'api_db', 'sync'] subprocess.check_output(cmd) - except subprocess.CalledProcessError: + except subprocess.CalledProcessError as e: # NOTE(coreycb): sync of api_db on upgrade from newton->ocata # fails but cell init is successful. log('Ignoring CalledProcessError during nova-api database ' - 'migration.', level=INFO) - return + 'migration\n{}'.format(e.output), level=INFO) def migrate_nova_database(): '''Initialize or migrate the nova database''' log('Migrating the nova database.', level=INFO) cmd = ['nova-manage', 'db', 'sync'] - subprocess.check_output(cmd) + try: + subprocess.check_output(cmd) + except subprocess.CalledProcessError as e: + log('db sync failed\n{}'.format(e.output), level=ERROR) + raise def initialize_cell_databases(): @@ -738,18 +766,25 @@ def initialize_cell_databases(): ''' log('Creating cell0 database records', level=INFO) cmd = ['nova-manage', 'cell_v2', 'map_cell0'] - subprocess.check_output(cmd) + try: + subprocess.check_output(cmd) + except subprocess.CalledProcessError as e: + log('map_cell0 failed\n{}'.format(e.output), level=ERROR) + raise log('Creating cell1 database records', level=INFO) - cmd = ['nova-manage', 'cell_v2', 'create_cell', '--name', 'cell1'] - rc = subprocess.call(cmd) - # TODO: Update to subprocess.check_call(), but note that rc == 2 is - # not a failure so only allow exception to be raised if rc == 1. - if rc == 0: + cmd = ['nova-manage', 'cell_v2', 'create_cell', '--name', 'cell1', + '--verbose'] + try: + subprocess.check_output(cmd) log('cell1 was successfully created', level=INFO) - elif rc == 1: - raise Exception("Cannot initialize cell1 because of missing " - "transport_url or database connection") + except subprocess.CalledProcessError as e: + if e.returncode == 1: + log('Cell1 create_cell failed\n{}'.format(e.output), level=ERROR) + raise + elif e.returncode == 2: + log('Cell1 create_cell failure ignored - a cell is already using ' + 'the transport_url/database combination.', level=INFO) def get_cell_uuid(cell): @@ -759,7 +794,11 @@ def get_cell_uuid(cell): ''' log("Listing cell, '{}'".format(cell), level=INFO) cmd = ['sudo', 'nova-manage', 'cell_v2', 'list_cells'] - out = subprocess.check_output(cmd) + try: + out = subprocess.check_output(cmd) + except subprocess.CalledProcessError as e: + log('list_cells failed\n{}'.format(e.output), level=ERROR) + raise cell_uuid = out.split(cell, 1)[1].split()[1] if not cell_uuid: raise Exception("Cannot find cell, '{}', in list_cells." @@ -776,45 +815,46 @@ def update_cell_database(): log('Updating cell1 properties', level=INFO) cell1_uuid = get_cell_uuid('cell1') cmd = ['nova-manage', 'cell_v2', 'update_cell', '--cell_uuid', cell1_uuid] - rc = subprocess.call(cmd) - # TODO: Update to subprocess.check_call(), but note that rc == 2 is - # not a failure so only allow exception to be raised if rc == 1. - if rc == 0: - log('cell1 properties updated successfully', level=INFO) - elif rc == 1: - raise Exception("Cannot find cell1 while attempting properties update") + try: + subprocess.check_output(cmd) + except subprocess.CalledProcessError as e: + if e.returncode == 1: + log('Cell1 update_cell failed\n{}'.format(e.output), level=ERROR) + raise + elif e.returncode == 2: + log('Cell1 update_cell failure ignored - the properties cannot ' + 'be set.', level=INFO) + else: + log('cell1 was successfully updated', level=INFO) def map_instances(): - '''Map instances + '''Map instances to cell - Updates nova_api.inatance_mappings with pre-existing instances + Updates nova_api.instance_mappings with pre-existing instances ''' log('Cell1 map_instances', level=INFO) cell1_uuid = get_cell_uuid('cell1') cmd = ['nova-manage', 'cell_v2', 'map_instances', '--cell_uuid', cell1_uuid] - rc = subprocess.call(cmd) - if rc == 0: - log('Cell1 map_instances updated successfully', level=INFO) - elif rc == 1: - raise Exception("map_instances failed") + try: + subprocess.check_output(cmd) + except subprocess.CalledProcessError as e: + log('Cell1 map_instances failed\n{}'.format(e.output), level=ERROR) + raise def add_hosts_to_cell(): - '''Add any new compute hosts to cell1''' - # TODO: Replace the following checks with a Cellsv2 context check. - if (CompareOpenStackReleases(os_release('nova-common')) >= 'ocata' and - is_relation_made('amqp', 'password') and - is_relation_made('shared-db', 'novaapi_password') and - is_relation_made('shared-db', 'novacell0_password') and - is_relation_made('shared-db', 'nova_password')): - cmd = ['nova-manage', 'cell_v2', 'list_cells'] - output = subprocess.check_output(cmd) - if 'cell1' in output: - log('Adding hosts to cell.', level=INFO) - cmd = ['nova-manage', 'cell_v2', 'discover_hosts'] - subprocess.check_output(cmd) + '''Map compute hosts to cell''' + log('Cell1 discover_hosts', level=INFO) + cell1_uuid = get_cell_uuid('cell1') + cmd = ['nova-manage', 'cell_v2', 'discover_hosts', '--cell_uuid', + cell1_uuid, '--verbose'] + try: + subprocess.check_output(cmd) + except subprocess.CalledProcessError as e: + log('Cell1 discover_hosts failed\n{}'.format(e.output), level=ERROR) + raise def finalize_migrate_nova_databases(): @@ -837,13 +877,7 @@ def migrate_nova_databases(): online_data_migrations_if_needed() finalize_migrate_nova_databases() - # TODO: Replace the following checks with a Cellsv2 context check. - elif (is_relation_made('amqp', 'password') and - is_relation_made('shared-db', 'novaapi_password') and - is_relation_made('shared-db', 'novacell0_password') and - is_relation_made('shared-db', 'nova_password')): - # Note: cells v2 init requires transport_url and database connections - # to be set in nova.conf. + elif is_cellv2_init_ready(): migrate_nova_api_database() initialize_cell_databases() migrate_nova_database() diff --git a/unit_tests/test_nova_cc_contexts.py b/unit_tests/test_nova_cc_contexts.py index 057a9549..6d23c16c 100644 --- a/unit_tests/test_nova_cc_contexts.py +++ b/unit_tests/test_nova_cc_contexts.py @@ -347,3 +347,16 @@ class NovaComputeContextTests(CharmTestCase): 'enable_serial_console': 'true'} ) mock_resolve_address.assert_called_with(endpoint_type=context.PUBLIC) + + def test_nova_cellv2_shared_db_context(self): + self.relation_ids.return_value = ['shared-db:0'] + self.related_units.return_value = ['mysql/0'] + self.test_relation.set( + {'novaapi_password': 'changeme', + 'novacell0_password': 'passw0rd', + 'nova_password': '1234'}) + self.assertEqual( + context.NovaCellV2SharedDBContext()(), + {'novaapi_password': 'changeme', + 'novacell0_password': 'passw0rd', + 'nova_password': '1234'}) diff --git a/unit_tests/test_nova_cc_hooks.py b/unit_tests/test_nova_cc_hooks.py index de03e433..75851575 100644 --- a/unit_tests/test_nova_cc_hooks.py +++ b/unit_tests/test_nova_cc_hooks.py @@ -282,17 +282,22 @@ class NovaCCHooksTests(CharmTestCase): hooks.config_changed() mock_compute_changed.assert_has_calls([call('generic_rid', 'unit/0')]) + @patch.object(hooks, 'is_cellv2_init_ready') @patch.object(hooks, 'is_db_initialised') @patch.object(hooks, 'nova_api_relation_joined') def test_compute_changed_nova_api_trigger(self, api_joined, - mock_is_db_initialised): + mock_is_db_initialised, + mock_is_cellv2_init_ready): self.relation_ids.return_value = ['nova-api/0'] mock_is_db_initialised.return_value = False + mock_is_cellv2_init_ready.return_value = False hooks.compute_changed() api_joined.assert_called_with(rid='nova-api/0') + @patch.object(hooks, 'is_cellv2_init_ready') @patch.object(hooks, 'is_db_initialised') - def test_compute_changed_ssh_migration(self, mock_is_db_initialised): + def test_compute_changed_ssh_migration(self, mock_is_db_initialised, + mock_is_cellv2_init_ready): self.test_relation.set({ 'migration_auth_type': 'ssh', 'ssh_public_key': 'fookey', 'private-address': '10.0.0.1', 'region': 'RegionOne'}) @@ -301,6 +306,7 @@ class NovaCCHooksTests(CharmTestCase): self.ssh_authorized_keys_lines.return_value = [ 'auth_0', 'auth_1', 'auth_2'] mock_is_db_initialised.return_value = False + mock_is_cellv2_init_ready.return_value = False hooks.compute_changed() self.ssh_compute_add.assert_called_with('fookey', rid=None, unit=None) expected_relations = [ @@ -321,8 +327,10 @@ class NovaCCHooksTests(CharmTestCase): self.assertEqual(sorted(self.relation_set.call_args_list), sorted(expected_relations)) + @patch.object(hooks, 'is_cellv2_init_ready') @patch.object(hooks, 'is_db_initialised') - def test_compute_changed_nova_public_key(self, mock_is_db_initialised): + def test_compute_changed_nova_public_key(self, mock_is_db_initialised, + mock_is_cellv2_init_ready): self.test_relation.set({ 'migration_auth_type': 'sasl', 'nova_ssh_public_key': 'fookey', 'private-address': '10.0.0.1', 'region': 'RegionOne'}) @@ -331,6 +339,7 @@ class NovaCCHooksTests(CharmTestCase): self.ssh_authorized_keys_lines.return_value = [ 'auth_0', 'auth_1', 'auth_2'] mock_is_db_initialised.return_value = False + mock_is_cellv2_init_ready.return_value = False hooks.compute_changed() self.ssh_compute_add.assert_called_with('fookey', user='nova', rid=None, unit=None) @@ -677,6 +686,7 @@ class NovaCCHooksTests(CharmTestCase): cell_joined.assert_called_with(rid='nova-cell-api/0') @patch.object(hooks, 'leader_init_db_if_ready_allowed_units') + @patch.object(hooks, 'update_cell_db_if_ready_allowed_units') @patch.object(hooks, 'is_db_initialised') @patch.object(hooks, 'quantum_joined') @patch.object(hooks, 'nova_api_relation_joined') @@ -684,7 +694,7 @@ class NovaCCHooksTests(CharmTestCase): @patch.object(hooks, 'CONFIGS') def test_amqp_changed_api_rel(self, configs, cell_joined, api_joined, quantum_joined, mock_is_db_initialised, - init_db_allowed): + update_db_allowed, init_db_allowed): self.relation_ids.side_effect = [ ['nova-cell-api/0'], ['nova-api/0'], @@ -705,6 +715,7 @@ class NovaCCHooksTests(CharmTestCase): remote_restart=True) @patch.object(hooks, 'leader_init_db_if_ready_allowed_units') + @patch.object(hooks, 'update_cell_db_if_ready_allowed_units') @patch.object(hooks, 'is_db_initialised') @patch.object(hooks, 'quantum_joined') @patch.object(hooks, 'nova_api_relation_joined') @@ -712,7 +723,7 @@ class NovaCCHooksTests(CharmTestCase): @patch.object(hooks, 'CONFIGS') def test_amqp_changed_noapi_rel(self, configs, cell_joined, api_joined, quantum_joined, mock_is_db_initialised, - init_db_allowed): + update_db_allowed, init_db_allowed): mock_is_db_initialised.return_value = False configs.complete_contexts = MagicMock() configs.complete_contexts.return_value = ['amqp'] diff --git a/unit_tests/test_nova_cc_utils.py b/unit_tests/test_nova_cc_utils.py index 9215f77e..ddcd757e 100644 --- a/unit_tests/test_nova_cc_utils.py +++ b/unit_tests/test_nova_cc_utils.py @@ -13,7 +13,7 @@ # limitations under the License. from collections import OrderedDict -from mock import patch, MagicMock, call, ANY +from mock import patch, MagicMock, call from test_utils import ( CharmTestCase, @@ -64,7 +64,6 @@ TO_PATCH = [ 'os_application_version_set', 'token_cache_pkgs', 'enable_memcache', - 'is_relation_made', ] SCRIPTRC_ENV_VARS = { @@ -724,24 +723,25 @@ class NovaCCUtilsTests(CharmTestCase): self.assertTrue(self.enable_services.called) self.cmd_all_services.assert_called_with('start') - @patch('subprocess.call') @patch('subprocess.check_output') - def test_migrate_nova_databases_ocata(self, check_output, mock_call): + @patch.object(utils, 'get_cell_uuid') + @patch.object(utils, 'is_cellv2_init_ready') + def test_migrate_nova_databases_ocata(self, cellv2_ready, get_cell_uuid, + check_output): "Migrate database with nova-manage in a clustered env" - self.is_relation_made.return_value = True + get_cell_uuid.return_value = 'c83121db-f1c7-464a-b657-38c28fac84c6' self.relation_ids.return_value = ['cluster:1'] self.os_release.return_value = 'ocata' utils.migrate_nova_databases() - mock_call.assert_has_calls([ - call(['nova-manage', 'cell_v2', 'create_cell', '--name', 'cell1']), - ]) check_output.assert_has_calls([ call(['nova-manage', 'api_db', 'sync']), call(['nova-manage', 'cell_v2', 'map_cell0']), + call(['nova-manage', 'cell_v2', 'create_cell', '--name', 'cell1', + '--verbose']), call(['nova-manage', 'db', 'sync']), call(['nova-manage', 'db', 'online_data_migrations']), - call(['nova-manage', 'cell_v2', 'list_cells']), - ANY, + call(['nova-manage', 'cell_v2', 'discover_hosts', '--cell_uuid', + 'c83121db-f1c7-464a-b657-38c28fac84c6', '--verbose']), ]) self.peer_store.assert_called_with('dbsync_state', 'complete') self.assertTrue(self.enable_services.called) @@ -1381,11 +1381,41 @@ class NovaCCUtilsTests(CharmTestCase): self.assertEqual(expected, utils.get_cell_uuid('cell1')) @patch.object(utils, 'get_cell_uuid') - @patch('subprocess.call') - def test_map_instances(self, mock_call, mock_get_cell_uuid): + @patch('subprocess.check_output') + def test_map_instances(self, mock_check_output, mock_get_cell_uuid): cell_uuid = 'c83121db-f1c7-464a-b657-38c28fac84c6' mock_get_cell_uuid.return_value = cell_uuid utils.map_instances() - mock_call.assert_called_with(['nova-manage', 'cell_v2', - 'map_instances', '--cell_uuid', - cell_uuid]) + mock_check_output.assert_called_with(['nova-manage', 'cell_v2', + 'map_instances', '--cell_uuid', + cell_uuid]) + + @patch.object(utils, 'get_cell_uuid') + @patch('subprocess.check_output') + def test_add_hosts_to_cell(self, mock_check_output, mock_get_cell_uuid): + cell_uuid = 'c83121db-f1c7-464a-b657-38c28fac84c6' + mock_get_cell_uuid.return_value = cell_uuid + utils.add_hosts_to_cell() + mock_check_output.assert_called_with( + ['nova-manage', 'cell_v2', 'discover_hosts', '--cell_uuid', + 'c83121db-f1c7-464a-b657-38c28fac84c6', '--verbose']) + + @patch('nova_cc_context.NovaCellV2SharedDBContext') + @patch('charmhelpers.contrib.openstack.context.AMQPContext') + def test_is_cellv2_init_ready_mitaka(self, amqp, shared_db): + self.os_release.return_value = 'mitaka' + utils.is_cellv2_init_ready() + self.os_release.assert_called_once_with('nova-common') + amqp.assert_called_once() + shared_db.assert_called_once() + self.log.assert_called_once() + + @patch('nova_cc_context.NovaCellV2SharedDBContext') + @patch('charmhelpers.contrib.openstack.context.AMQPContext') + def test_is_cellv2_init_ready_ocata(self, amqp, shared_db): + self.os_release.return_value = 'ocata' + utils.is_cellv2_init_ready() + self.os_release.assert_called_once_with('nova-common') + amqp.assert_called_once() + shared_db.assert_called_once() + self.log.assert_not_called()