From b5a14c6cd0be76156852afdc8c664e37e88bc7b3 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Wed, 9 Jan 2019 12:51:45 +0200 Subject: [PATCH] Always attempt a graceful container stop/remove When paunch removes a container, like applying a container's config changes, it does that not gracefully. That poses an issue for some cases, like stateful services. To mitigate that, attempt stopping the container prior to removing it. And use the defined stop signal and timeout settings for such a container, when stopping it. If there is no signal/timeout in the config, fallback to deaults of SIGTERM and a 10s. Related-bug: #1810690 Change-Id: I7c72ea055984bb5ff3ea7cea019693d61245212f Signed-off-by: Bogdan Dobrelya --- paunch/builder/base.py | 15 ++++++--- paunch/runner.py | 54 +++++++++++++++++++++++++++---- paunch/tests/test_builder_base.py | 17 +++++++++- paunch/tests/test_runner.py | 38 +++++++++++++++++++--- 4 files changed, 108 insertions(+), 16 deletions(-) diff --git a/paunch/builder/base.py b/paunch/builder/base.py index 6f47d37..f125114 100644 --- a/paunch/builder/base.py +++ b/paunch/builder/base.py @@ -63,9 +63,11 @@ class BaseBuilder(object): # When upgrading from Docker to Podman, we want to stop the # container that runs under Docker first before starting it with # Podman. The container will be removed later in THT during - # upgrade_tasks. + # upgrade_tasks. And attempt to do that gracefully as well. if self.runner.cont_cmd == 'podman' and self.which('docker'): - self.runner.stop_container(container, 'docker', quiet=True) + self.runner.stop_container(container, 'docker', quiet=True, + conf_id=self.config_id, + cconfig=cconfig) if action == 'run': if container in desired_names: @@ -125,12 +127,15 @@ class BaseBuilder(object): self.runner.remove_container(container) continue + cconfig = self.config.get(cn[-1]) ex_data_str = self.runner.inspect( container, '{{index .Config.Labels "config_data"}}') if not ex_data_str: self.log.debug("Deleting container (no config_data): %s" % container) - self.runner.remove_container(container) + self.runner.remove_container(container, + conf_id=self.config_id, + cconfig=cconfig) continue try: @@ -142,7 +147,9 @@ class BaseBuilder(object): if new_data != ex_data: self.log.debug("Deleting container (changed config_data): %s" % container) - self.runner.remove_container(container) + self.runner.remove_container(container, + conf_id=self.config_id, + cconfig=cconfig) # deleting containers is an opportunity for renames to their # preferred name diff --git a/paunch/runner.py b/paunch/runner.py index 1597fdb..3782670 100644 --- a/paunch/runner.py +++ b/paunch/runner.py @@ -18,6 +18,7 @@ import random import string import subprocess +from paunch.builder.base import BaseBuilder from paunch.builder import podman from paunch.utils import common from paunch.utils import systemd @@ -215,25 +216,66 @@ class BaseRunner(object): yield line.split() def remove_containers(self, conf_id): + configs = self.list_configs() for container in self.containers_in_config(conf_id): - self.remove_container(container) + # Find the container's configh in the known configs + # (the name should be matching its config name) + cid, cconfig = self.discover_container_config( + configs, container, container) + if cid != conf_id: + self.log.debug('Ignoring container %s from config' + 'ID %s' % (container, cid)) + continue + self.remove_container(container, conf_id, cconfig) - def remove_container(self, container): + def remove_container(self, container, conf_id=None, cconfig=None): if self.cont_cmd == 'podman': systemd.service_delete(container=container, log=self.log) + # NOTE(bogdando): that's also used when applying changes on + # containers and must be a graceful operation + self.stop_container(container=container, cont_cmd=self.cont_cmd, + quiet=True, conf_id=conf_id, cconfig=cconfig) cmd = [self.cont_cmd, 'rm', '-f', container] cmd_stdout, cmd_stderr, returncode = self.execute(cmd, self.log) if returncode != 0: self.log.error('Error removing container: %s' % container) self.log.error(cmd_stderr) - def stop_container(self, container, cont_cmd=None, quiet=False): + def stop_container(self, container, cont_cmd=None, quiet=False, + conf_id=None, cconfig=None): cont_cmd = cont_cmd or self.cont_cmd - cmd = [cont_cmd, 'stop', container] + cmd = [cont_cmd, 'stop'] + # Pick pre-configured timeout and signal or use defaults for graceful + # stopping of the container, which is important to have for applying + # the container config updates + config = {} + if cconfig and 'stop_signal' in cconfig.keys(): + config[container] = cconfig + else: + config[container] = {'stop_signal': 'SIGTERM'} + if cconfig and 'stop_grace_period' in cconfig.keys(): + config[container].update(cconfig) + else: + config[container].update({'stop_grace_period': '10s'}) + + builder = BaseBuilder(config_id=conf_id, + config=config, + runner=self, + labels=None, + log=self.log) + builder.string_arg(config[container], cmd, 'stop_signal', + '--stop-signal') + builder.string_arg(config[container], cmd, + 'stop_grace_period', '--stop-timeout', + builder.duration) + cmd.append(container) + # Log the stop command events disregard of the quiet setting + self.log.debug('Stopping container %s: %s' % (container, cmd)) cmd_stdout, cmd_stderr, returncode = self.execute(cmd, quiet=quiet) - if returncode != 0 and not quiet: + if returncode != 0: self.log.error('Error stopping container: %s' % container) - self.log.error(cmd_stderr) + if not quiet: + self.log.error(cmd_stderr) def rename_containers(self): current_containers = [] diff --git a/paunch/tests/test_builder_base.py b/paunch/tests/test_builder_base.py index 30f9b4c..8c24438 100644 --- a/paunch/tests/test_builder_base.py +++ b/paunch/tests/test_builder_base.py @@ -185,6 +185,8 @@ class TestBaseBuilder(base.TestCase): 'two': { 'start_order': 1, 'image': 'centos:7', + 'stop_signal': 'SIGKILL', + 'stop_grace_period': '1m2s', }, # running with the same config 'three': { @@ -213,12 +215,18 @@ class TestBaseBuilder(base.TestCase): six six two-12345678 two three-12345678 three''', '', 0), + # stop five + ('', '', 0), # rm five ('', '', 0), + # stop six + ('', '', 0), # rm six ('', '', 0), # inspect two ('{"start_order": 1, "image": "centos:6"}', '', 0), + # stop two, changed config data + ('', '', 0), # rm two, changed config data ('', '', 0), # inspect three @@ -262,12 +270,18 @@ three-12345678 three''', '', 0), mock.ANY ), # rm containers not in config + mock.call(['docker', 'stop', '--stop-signal=SIGTERM', + '--stop-timeout=10.0', 'five'], quiet=True), mock.call(['docker', 'rm', '-f', 'five'], mock.ANY), + mock.call(['docker', 'stop', '--stop-signal=SIGTERM', + '--stop-timeout=10.0', 'six'], quiet=True), mock.call(['docker', 'rm', '-f', 'six'], mock.ANY), # rm two, changed config mock.call(['docker', 'inspect', '--type', 'container', '--format', '{{index .Config.Labels "config_data"}}', 'two-12345678'], mock.ANY, False), + mock.call(['docker', 'stop', '--stop-signal=SIGKILL', + '--stop-timeout=62.0', 'two-12345678'], quiet=True), mock.call(['docker', 'rm', '-f', 'two-12345678'], mock.ANY), # check three, config hasn't changed mock.call(['docker', 'inspect', '--type', 'container', @@ -304,7 +318,8 @@ three-12345678 three''', '', 0), '--label', 'container_name=two', '--label', 'managed_by=tester', '--label', 'config_data=%s' % json.dumps(config['two']), - '--detach=true', 'centos:7'], mock.ANY + '--detach=true', '--stop-signal=SIGKILL', + '--stop-timeout=62.0', 'centos:7'], mock.ANY ), # don't run three, its already running # run four diff --git a/paunch/tests/test_runner.py b/paunch/tests/test_runner.py index fe042fc..f9433dc 100644 --- a/paunch/tests/test_runner.py +++ b/paunch/tests/test_runner.py @@ -248,7 +248,10 @@ class TestBaseRunner(base.TestCase): }, result) @mock.patch('subprocess.Popen') - def test_remove_containers(self, popen): + @mock.patch('paunch.runner.BaseRunner.discover_container_config') + @mock.patch('paunch.runner.BaseRunner.list_configs', return_value=['foo']) + def test_remove_containers(self, configs, disco, popen): + disco.return_value = ('foo', None) self.mock_execute(popen, 'one\ntwo\nthree', '', 0) self.runner.remove_container = mock.Mock() @@ -260,7 +263,27 @@ class TestBaseRunner(base.TestCase): '--filter', 'label=config_id=foo'] ) self.runner.remove_container.assert_has_calls([ - mock.call('one'), mock.call('two'), mock.call('three') + mock.call('one', 'foo', None), mock.call('two', 'foo', None), + mock.call('three', 'foo', None) + ]) + + @mock.patch('subprocess.Popen') + @mock.patch('paunch.runner.BaseRunner.discover_container_config') + @mock.patch('paunch.runner.BaseRunner.list_configs', return_value=['foo']) + def test_remove_containers_omitting(self, configs, disco, popen): + disco.side_effect = [('foo', None), ('bar', None), ('foo', None)] + self.mock_execute(popen, 'one\ntwo\nthree', '', 0) + self.runner.remove_container = mock.Mock() + + self.runner.remove_containers('foo') + + self.assert_execute( + popen, ['docker', 'ps', '-q', '-a', + '--filter', 'label=managed_by=tester', + '--filter', 'label=config_id=foo'] + ) + self.runner.remove_container.assert_has_calls([ + mock.call('one', 'foo', None), mock.call('three', 'foo', None) ]) @mock.patch('subprocess.Popen') @@ -278,16 +301,21 @@ class TestBaseRunner(base.TestCase): self.runner.stop_container('one') self.assert_execute( - popen, ['docker', 'stop', 'one'] + popen, ['docker', 'stop', '--stop-signal=SIGTERM', + '--stop-timeout=10.0', 'one'] ) @mock.patch('subprocess.Popen') def test_stop_container_override(self, popen): self.mock_execute(popen, '', '', 0) - self.runner.stop_container('one', 'podman') + config = {'one': {'stop_grace_period': '42m', + 'stop_signal': 'SIGINT'}} + self.runner.stop_container('one', 'podman', conf_id='foo', + cconfig=config['one']) self.assert_execute( - popen, ['podman', 'stop', 'one'] + popen, ['podman', 'stop', '--stop-signal=SIGINT', + '--stop-timeout=2520.0', 'one'] ) @mock.patch('subprocess.Popen')