Change docker upgrade steps

We used to have an 'error-prone' approach of making Docker upgrades,
and in really rare cases we had an old container in 'running' state
instead of new one.

Here's the steps when issue occurs:

* One container isn't running due to some reason (for instance, it
  failed). The supervisor scheduled to start it again soon.
* The fuel_upgrade script stops supervisor's processes. They will be
  stopped and won't be started again by supervisor (even if they have
  autostart=true setting).
* The container, which wasn't run by the time we was stopping them,
  is untouched. It's still scheduled to be running soon.
* The fuel_upgrade script stops Docker containers, but one of them
  will be starting again and again because of that working supervisor
  process.

In order to avoid this situation, we have to do the following things:

* Point supervisor to new config folder (which is empty by now).
* Restart supervisor, so it forget old configs and will use no of them.
* Upload images and create new containers.
* Generate new supervisor configs and restart supervisor in order to
  apply them (supervisor's processes will be attached automatically
  to running docker containers).

Signed-off-by: Igor Kalnitsky <igor@kalnitsky.org>

Change-Id: Ibc133b4471878efeffeb232192b4540d26401fae
Closes-Bug: #1441693
This commit is contained in:
Igor Kalnitsky 2015-04-14 16:43:58 +03:00
parent 8177601da3
commit 740aaea86b
No known key found for this signature in database
GPG Key ID: 38306881ADFE39E4
4 changed files with 34 additions and 44 deletions

View File

@ -107,6 +107,7 @@ class SupervisorClient(object):
current_cfg_path = self.config.supervisor['current_configs_prefix']
utils.symlink(self.supervisor_config_dir, current_cfg_path)
self.supervisor.reloadConfig()
def switch_to_previous_configs(self):
"""Switch to previous version of fuel
@ -116,6 +117,7 @@ class SupervisorClient(object):
utils.symlink(
self.previous_supervisor_config_path,
current_cfg_path)
self.supervisor.reloadConfig()
def stop_all_services(self):
"""Stops all processes
@ -130,7 +132,7 @@ class SupervisorClient(object):
self.supervisor.restart()
all_processes = utils.wait_for_true(
self.get_all_processes_safely,
lambda: self.get_all_processes_safely() is not None,
timeout=self.config.supervisor['restart_timeout'])
logger.debug(u'List of supervisor processes %s', all_processes)

View File

@ -68,31 +68,25 @@ class DockerUpgrader(UpgradeEngine):
self.save_db()
self.save_cobbler_configs()
# NOTE(akislitsky): fix for bug
# https://bugs.launchpad.net/fuel/+bug/1354465
# supervisord can restart old container even if it already stopped
# and new container start will be failed. We switch configs
# before upgrade, so if supervisord will try to start container
# it will be new container.
self.switch_to_new_configs()
# Stop all of the services
self.supervisor.stop_all_services()
self.stop_fuel_containers()
# Upload docker images
self.upload_images()
# Generate config with autostart is false
# not to run new services on supervisor restart
self.generate_configs(autostart=False)
# Point to new configs (version.yaml and supervisor configs) and
# restart supervisor in order to apply them
self.version_file.switch_to_new()
# Restart supervisor to read new configs
self.switch_to_new_configs()
self.supervisor.restart_and_wait()
# Create container and start it under supervisor
# Stop docker containers (it's safe, since at this time supervisor's
# configs are empty.
self.stop_fuel_containers()
# Upload new docker images and create containers
self.upload_images()
self.create_and_start_new_containers()
# Update configs in order to start all
# services on supervisor restart
# Generate supervisor configs for new containers and restart
# supervisor in order to apply them. Note, supervisor's processes
# will be attached to running docker containers automatically.
self.generate_configs(autostart=True)
self.supervisor.restart_and_wait()
# Verify that all services up and running
self.upgrade_verifier.verify()
@ -310,10 +304,6 @@ class DockerUpgrader(UpgradeEngine):
if container.get('after_container_creation_command'):
self.run_after_container_creation_command(container)
if container.get('supervisor_config'):
self.start_service_under_supervisor(
self.make_service_name(container['id']))
def run_after_container_creation_command(self, container):
"""Runs command in container with retries in
case of error
@ -352,13 +342,6 @@ class DockerUpgrader(UpgradeEngine):
return [port if not isinstance(port, list) else tuple(port)
for port in ports]
def start_service_under_supervisor(self, service_name):
"""Start service under supervisor
:param str service_name: name of the service
"""
self.supervisor.start(service_name)
def exec_with_retries(
self, func, exceptions, message, retries=0, interval=0):
# TODO(eli): refactor it and make retries

View File

@ -74,12 +74,10 @@ class TestDockerUpgrader(BaseTestCase):
self.assertEqual(
self.upgrader.generate_configs.call_args_list,
[mock.call(autostart=False),
mock.call(autostart=True)])
[mock.call(autostart=True)])
self.called_once(self.upgrader.stop_fuel_containers)
self.called_once(self.supervisor_mock.stop_all_services)
self.called_once(self.supervisor_mock.restart_and_wait)
self.assertEqual(self.supervisor_mock.restart_and_wait.call_count, 2)
self.called_once(self.upgrader.upgrade_verifier.verify)
self.called_once(self.version_mock.save_current)
self.called_once(self.version_mock.switch_to_new)
@ -157,7 +155,6 @@ class TestDockerUpgrader(BaseTestCase):
side_effect=mocked_create_container)
self.upgrader.start_container = mock.MagicMock()
self.upgrader.run_after_container_creation_command = mock.MagicMock()
self.upgrader.start_service_under_supervisor = mock.MagicMock()
self.upgrader.create_and_start_new_containers()
@ -177,8 +174,6 @@ class TestDockerUpgrader(BaseTestCase):
privileged=False, links=[],
network_mode=None)]
self.upgrader.start_service_under_supervisor.assert_called_once_with(
'docker-id2')
self.assertEqual(
self.upgrader.create_container.call_args_list,
create_container_calls)

View File

@ -43,23 +43,33 @@ class TestSupervisorClient(BaseTestCase):
self.utils_mock.symlink.assert_called_once_with(
self.new_version_supervisor_path,
self.fake_config.supervisor['current_configs_prefix'])
self.supervisor.supervisor.reloadConfig.assert_called_once_with()
def test_switch_to_previous_configs(self, os_mock):
self.supervisor.switch_to_previous_configs()
self.utils_mock.symlink.assert_called_once_with(
self.previous_version_supervisor_path,
self.fake_config.supervisor['current_configs_prefix'])
self.supervisor.supervisor.reloadConfig.assert_called_once_with()
def test_stop_all_services(self, _):
self.supervisor.stop_all_services()
self.supervisor.supervisor.stopAllProcesses.assert_called_once_with()
def test_restart_and_wait(self, _):
@mock.patch('fuel_upgrade.clients.supervisor_client.SupervisorClient.'
'get_all_processes_safely')
def test_restart_and_wait(self, _, __):
self.supervisor.restart_and_wait()
self.supervisor.supervisor.restart.assert_called_once_with()
self.utils_mock.wait_for_true.assert_called_once_with(
self.supervisor.get_all_processes_safely,
timeout=600)
timeout = self.utils_mock.wait_for_true.call_args[1]['timeout']
self.assertEqual(timeout, 600)
# since wait_for_true is mocked in all tests, let's check that
# callback really calls get_all_processes_safely function
callback = self.utils_mock.wait_for_true.call_args[0][0]
callback()
self.supervisor.get_all_processes_safely.assert_called_once_with()
def test_get_all_processes_safely(self, _):
self.supervisor.get_all_processes_safely()