diff --git a/cinder/cmd/manage.py b/cinder/cmd/manage.py index 29df21976a3..b7a115a77ee 100644 --- a/cinder/cmd/manage.py +++ b/cinder/cmd/manage.py @@ -88,6 +88,8 @@ from cinder.volume import utils as vutils CONF = cfg.CONF +LOG = logging.getLogger(__name__) + RPC_VERSIONS = { 'cinder-scheduler': scheduler_rpcapi.SchedulerAPI.RPC_API_VERSION, 'cinder-volume': volume_rpcapi.VolumeAPI.RPC_API_VERSION, @@ -339,34 +341,32 @@ class DbCommands(object): def _run_migration(self, ctxt, max_count): ran = 0 + exceptions = False migrations = {} for migration_meth in self.online_migrations: count = max_count - ran try: found, done = migration_meth(ctxt, count) except Exception: - print(_("Error attempting to run %(method)s") % - {'method': migration_meth.__name__}) + msg = (_("Error attempting to run %(method)s") % + {'method': migration_meth.__name__}) + print(msg) + LOG.exception(msg) + exceptions = True found = done = 0 name = migration_meth.__name__ - remaining = found - done if found: print(_('%(found)i rows matched query %(meth)s, %(done)i ' - 'migrated, %(remaining)i remaining') % {'found': found, - 'meth': name, - 'done': done, - 'remaining': - remaining}) - migrations.setdefault(name, (0, 0, 0)) - migrations[name] = (migrations[name][0] + found, - migrations[name][1] + done, - migrations[name][2] + remaining) + 'migrated') % {'found': found, + 'meth': name, + 'done': done}) + migrations[name] = found, done if max_count is not None: ran += done if ran >= max_count: break - return migrations + return migrations, exceptions @args('--max_count', metavar='', dest='max_count', type=int, help='Maximum number of objects to consider.') @@ -383,20 +383,19 @@ class DbCommands(object): max_count = 50 print(_('Running batches of %i until complete.') % max_count) - # FIXME(jdg): So this is annoying and confusing, - # we iterate through in batches until there are no - # more updates, that's AWESOME!! BUT we only print - # out a table reporting found/done AFTER the loop - # here, so that means the response the user sees is - # always a table of "needed 0" and "completed 0". - # So it's an indication of "all done" but it seems like - # some feedback as we go would be nice to have here. ran = None + exceptions = False migration_info = {} while ran is None or ran != 0: - migrations = self._run_migration(ctxt, max_count) - migration_info.update(migrations) - ran = sum([done for found, done, remaining in migrations.values()]) + migrations, exceptions = self._run_migration(ctxt, max_count) + ran = 0 + for name in migrations: + migration_info.setdefault(name, (0, 0)) + migration_info[name] = ( + max(migration_info[name][0], migrations[name][0]), + migration_info[name][1] + migrations[name][1], + ) + ran += migrations[name][1] if not unlimited: break headers = ["{}".format(_('Migration')), @@ -408,6 +407,18 @@ class DbCommands(object): t.add_row([name, info[0], info[1]]) print(t) + # NOTE(imacdonn): In the "unlimited" case, the loop above will only + # terminate when all possible migrations have been effected. If we're + # still getting exceptions, there's a problem that requires + # intervention. In the max-count case, exceptions are only considered + # fatal if no work was done by any other migrations ("not ran"), + # because otherwise work may still remain to be done, and that work + # may resolve dependencies for the failing migrations. + if exceptions and (unlimited or not ran): + print(_("Some migrations failed unexpectedly. Check log for " + "details.")) + sys.exit(2) + sys.exit(1 if ran else 0) @args('--enable-replication', action='store_true', default=False, diff --git a/cinder/tests/unit/test_cmd.py b/cinder/tests/unit/test_cmd.py index 5c79b17dd37..375a54e0284 100644 --- a/cinder/tests/unit/test_cmd.py +++ b/cinder/tests/unit/test_cmd.py @@ -450,8 +450,8 @@ class TestCinderManageCmd(test.TestCase): command.online_data_migrations, 10) self.assertEqual(1, exit.code) expected = """\ -5 rows matched query mock_mig_1, 4 migrated, 1 remaining -6 rows matched query mock_mig_2, 6 migrated, 0 remaining +5 rows matched query mock_mig_1, 4 migrated +6 rows matched query mock_mig_2, 6 migrated +------------+--------------+-----------+ | Migration | Total Needed | Completed | +------------+--------------+-----------+ @@ -466,6 +466,78 @@ class TestCinderManageCmd(test.TestCase): self.assertEqual(expected, sys.stdout.getvalue()) + @mock.patch('cinder.context.get_admin_context') + def test_online_migrations_no_max_count(self, mock_get_context): + self.useFixture(fixtures.MonkeyPatch('sys.stdout', StringIO())) + fake_remaining = [120] + + def fake_migration(context, count): + self.assertEqual(mock_get_context.return_value, context) + found = 120 + done = min(fake_remaining[0], count) + fake_remaining[0] -= done + return found, done + + command_cls = self._fake_db_command((fake_migration,)) + command = command_cls() + + exit = self.assertRaises(SystemExit, + command.online_data_migrations, None) + self.assertEqual(0, exit.code) + expected = """\ +Running batches of 50 until complete. +120 rows matched query fake_migration, 50 migrated +120 rows matched query fake_migration, 50 migrated +120 rows matched query fake_migration, 20 migrated +120 rows matched query fake_migration, 0 migrated ++----------------+--------------+-----------+ +| Migration | Total Needed | Completed | ++----------------+--------------+-----------+ +| fake_migration | 120 | 120 | ++----------------+--------------+-----------+ +""" + self.assertEqual(expected, sys.stdout.getvalue()) + + @mock.patch('cinder.context.get_admin_context') + def test_online_migrations_error(self, mock_get_context): + self.useFixture(fixtures.MonkeyPatch('sys.stdout', StringIO())) + good_remaining = [50] + + def good_migration(context, count): + self.assertEqual(mock_get_context.return_value, context) + found = 50 + done = min(good_remaining[0], count) + good_remaining[0] -= done + return found, done + + bad_migration = mock.MagicMock() + bad_migration.side_effect = test.TestingException + bad_migration.__name__ = 'bad_migration' + + command_cls = self._fake_db_command((bad_migration, good_migration)) + command = command_cls() + + # bad_migration raises an exception, but it could be because + # good_migration had not completed yet. We should get 1 in this case, + # because some work was done, and the command should be reiterated. + exit = self.assertRaises(SystemExit, + command.online_data_migrations, max_count=50) + self.assertEqual(1, exit.code) + + # When running this for the second time, there's no work left for + # good_migration to do, but bad_migration still fails - should + # get 2 this time. + exit = self.assertRaises(SystemExit, + command.online_data_migrations, max_count=50) + self.assertEqual(2, exit.code) + + # When --max-count is not used, we should get 2 if all possible + # migrations completed but some raise exceptions + good_remaining = [50] + exit = self.assertRaises(SystemExit, + command.online_data_migrations, None) + self.assertEqual(2, exit.code) + @mock.patch('cinder.cmd.manage.DbCommands.online_migrations', (mock.Mock(side_effect=((2, 2), (0, 0)), __name__='foo'),)) def test_db_commands_online_data_migrations_ignore_state_and_max(self): diff --git a/doc/source/contributor/groups.rst b/doc/source/contributor/groups.rst index 0cbaf8f1e9e..f03b1cd710a 100644 --- a/doc/source/contributor/groups.rst +++ b/doc/source/contributor/groups.rst @@ -310,10 +310,8 @@ and cgsnapshots will be removed from the database:: cinder-manage db online_data_migrations --max_count - --ignore_state max_count is optional. Default is 50. -ignore_state is optional. Default is False. After running the above migration command to migrate CGs to generic volume groups, CG and group APIs work as follows: diff --git a/doc/source/man/cinder-manage.rst b/doc/source/man/cinder-manage.rst index d8c6cec2e05..ea1dcb8c0aa 100644 --- a/doc/source/man/cinder-manage.rst +++ b/doc/source/man/cinder-manage.rst @@ -67,16 +67,30 @@ Cinder Db Purge database entries that are marked as deleted, that are older than the number of days specified. -``cinder-manage db online_data_migrations`` +``cinder-manage db online_data_migrations [--max-count ]`` - Perform online data migrations for database upgrade between releases in batches. + Perform online data migrations for database upgrade between releases in + batches. This command interprets the following options when it is invoked: - --max_count Maximum number of objects to consider. - --ignore_state Force records to migrate even if another operation is - performed on them. This may be dangerous, please refer to - release notes for more information. + --max-count Maximum number of objects to migrate. If not specified, + all possible migrations will be completed, in batches of + 50 at a time. + + Returns exit status 0 if no (further) updates are possible, 1 if the + ``--max-count`` option was used and some updates were completed + successfully (even if others generated errors), 2 if some updates generated + errors and no other migrations were able to take effect in the last batch + attempted, or 127 if invalid input is provided (e.g. non-numeric + max-count). + + This command should be run after upgrading the database schema. If it exits + with partial updates (exit status 1) it should be called again, even if + some updates initially generated errors, because some updates may depend on + others having completed. If it exits with status 2, intervention is + required to resolve the issue causing remaining updates to fail. It should + be considered successfully completed only when the exit status is 0. Cinder Logs ~~~~~~~~~~~ diff --git a/doc/source/upgrade.rst b/doc/source/upgrade.rst index 8be7c7ee9c7..1d93e5496e2 100644 --- a/doc/source/upgrade.rst +++ b/doc/source/upgrade.rst @@ -238,10 +238,17 @@ After maintenance window upgrade is performed. * Since Ocata, you also need to run ``cinder-manage db online_data_migrations`` - command to make sure data migrations are applied. The tool let's you limit - the impact of the data migrations by using ``--max_number`` option to limit - number of migrations executed in one run. You need to complete all of the - migrations before starting upgrade to the next version (e.g. you need to - complete Ocata's data migrations before proceeding with upgrade to Pike; you - won't be able to execute Pike's DB schema migrations before completing - Ocata's data migrations). + command to make sure data migrations are applied. The tool lets you limit + the impact of the data migrations by using ``--max-count`` option to limit + number of migrations executed in one run. If this option is used, the + exit status will be 1 if any migrations were successful (even if others + generated errors, which could be due to dependencies between migrations). + The command should be rerun while the exit status is 1. If no further + migrations are possible, the exit status will be 2 if some migrations are + still generating errors, which requires intervention to resolve. The + command should be considered completed successfully only when the exit + status is 0. You need to complete all of the migrations before starting + upgrade to the next version (e.g. you need to complete Ocata's data + migrations before proceeding with upgrade to Pike; you won't be able to + execute Pike's DB schema migrations before completing Ocata's data + migrations). diff --git a/releasenotes/notes/cinder-manage-online-migrations-exit-status-7c16edb7facc37bb.yaml b/releasenotes/notes/cinder-manage-online-migrations-exit-status-7c16edb7facc37bb.yaml new file mode 100644 index 00000000000..0f92e8cd44e --- /dev/null +++ b/releasenotes/notes/cinder-manage-online-migrations-exit-status-7c16edb7facc37bb.yaml @@ -0,0 +1,13 @@ +--- +upgrade: + - | + The ``cinder-manage db online_data_migrations`` command now returns exit + status 2 in the case where some migrations failed (raised exceptions) and + no others were completed successfully from the last batch attempted. This + should be considered a fatal condition that requires intervention. Exit + status 1 will be returned in the case where the ``--max-count`` option was + used and some migrations failed but others succeeded (updated at least one + row), because more work may remain for the non-failing migrations, and + their completion may be a dependency for the failing ones. The command + should be reiterated while it returns exit status 1, and considered + completed successfully only when it returns exit status 0.