Create cells before starting nova services

Nova services may reasonably expect cell databases to exist when they
start. The current cell setup tasks in kolla run after the nova
containers have started, meaning that cells may or may not exist in the
database when they start, depending on timing. In particular, we are
seeing issues in kolla CI currently with jobs timing out waiting for
nova compute services to start. The following error is seen in the nova
logs of these jobs, which may or may not be relevant:

No cells are configured, unable to continue

This change creates the cell0 and cell1 databases prior to starting nova
services.

In order to do this, we must create new containers in which to run the
nova-manage commands, because the nova-api container may not yet exist.
This required adding support to the kolla_docker module for specifying a
command for the container to run that overrides the image's command.

We also add the standard output and error to the module's result when a
non-detached container is run. A secondary benefit of this is that the
output of bootstrap containers is now displayed in the Ansible output if
the bootstrapping command fails, which will help with debugging.

Change-Id: I2c1e991064f9f588f398ccbabda94f69dc285e61
Closes-Bug: #1808575
This commit is contained in:
Mark Goddard 2018-12-14 12:01:05 +00:00
parent 404d0b1c12
commit 365bb5177d
5 changed files with 123 additions and 39 deletions

View File

@ -72,6 +72,11 @@ options:
- The username used to authenticate
required: False
type: str
command:
description:
- The command to execute in the container
required: False
type: str
detach:
description:
- Detach from the container after it is created
@ -214,6 +219,7 @@ EXAMPLES = '''
import json
import os
import shlex
import traceback
import docker
@ -229,6 +235,8 @@ class DockerWorker(object):
self.module = module
self.params = self.module.params
self.changed = False
# Use this to store arguments to pass to exit_json().
self.result = {}
# TLS not fully implemented
# tls_config = self.generate_tls()
@ -315,7 +323,8 @@ class DockerWorker(object):
self.compare_volumes_from(container_info) or
self.compare_environment(container_info) or
self.compare_container_state(container_info) or
self.compare_dimensions(container_info)
self.compare_dimensions(container_info) or
self.compare_command(container_info)
)
def compare_ipc_mode(self, container_info):
@ -482,6 +491,16 @@ class DockerWorker(object):
# '' or 0 - both falsey.
return True
def compare_command(self, container_info):
new_command = self.params.get('command')
if new_command is not None:
new_command_split = shlex.split(new_command)
new_path = new_command_split[0]
new_args = new_command_split[1:]
if (new_path != container_info['Path'] or
new_args != container_info['Args']):
return True
def parse_image(self):
full_image = self.params.get('image')
@ -638,6 +657,7 @@ class DockerWorker(object):
def build_container_options(self):
volumes, binds = self.generate_volumes()
return {
'command': self.params.get('command'),
'detach': self.params.get('detach'),
'environment': self._format_env_vars(),
'host_config': self.build_host_config(binds),
@ -697,15 +717,22 @@ class DockerWorker(object):
# dict all the time.
if isinstance(rc, dict):
rc = rc['StatusCode']
if rc != 0:
self.module.fail_json(
failed=True,
changed=True,
msg="Container exited with non-zero return code %s" % rc
)
# Include container's return code, standard output and error in the
# result.
self.result['rc'] = rc
self.result['stdout'] = self.dc.logs(self.params.get('name'),
stdout=True, stderr=False)
self.result['stderr'] = self.dc.logs(self.params.get('name'),
stdout=False, stderr=True)
if self.params.get('remove_on_exit'):
self.stop_container()
self.remove_container()
if rc != 0:
self.module.fail_json(
changed=True,
msg="Container exited with non-zero return code %s" % rc,
**self.result
)
def get_container_env(self):
name = self.params.get('name')
@ -817,6 +844,7 @@ def generate_module():
auth_password=dict(required=False, type='str', no_log=True),
auth_registry=dict(required=False, type='str'),
auth_username=dict(required=False, type='str'),
command=dict(required=False, type='str'),
detach=dict(required=False, type='bool', default=True),
labels=dict(required=False, type='dict', default=dict()),
name=dict(required=False, type='str'),
@ -905,10 +933,10 @@ def main():
# types. If we ever add method that will have to return some
# meaningful data, we need to refactor all methods to return dicts.
result = bool(getattr(dw, module.params.get('action'))())
module.exit_json(changed=dw.changed, result=result)
module.exit_json(changed=dw.changed, result=result, **dw.result)
except Exception:
module.exit_json(failed=True, changed=True,
msg=repr(traceback.format_exc()))
module.fail_json(changed=True, msg=repr(traceback.format_exc()),
**dw.result)
# import module snippets
from ansible.module_utils.basic import * # noqa

View File

@ -0,0 +1,51 @@
---
- name: Create cell0 mappings
vars:
nova_api: "{{ nova_services['nova-api'] }}"
become: true
kolla_docker:
action: "start_container"
command: bash -c 'sudo -E kolla_set_configs && nova-manage cell_v2 map_cell0'
common_options: "{{ docker_common_options }}"
detach: False
image: "{{ nova_api.image }}"
labels:
BOOTSTRAP:
name: "create_cell0_nova"
restart_policy: "never"
volumes: "{{ nova_api.volumes|reject('equalto', '')|list }}"
register: map_cell0
changed_when:
- map_cell0 is success
- '"Cell0 is already setup" not in map_cell0.stdout'
failed_when:
- map_cell0.rc != 0
run_once: True
delegate_to: "{{ groups[nova_api.group][0] }}"
- include_tasks: bootstrap_service.yml
when: map_cell0.changed
- name: Create base cell for legacy instances
vars:
nova_api: "{{ nova_services['nova-api'] }}"
become: true
kolla_docker:
action: "start_container"
command: bash -c 'sudo -E kolla_set_configs && nova-manage cell_v2 create_cell'
common_options: "{{ docker_common_options }}"
detach: False
image: "{{ nova_api.image }}"
labels:
BOOTSTRAP:
name: "create_cell_nova"
restart_policy: "never"
volumes: "{{ nova_api.volumes|reject('equalto', '')|list }}"
register: base_cell
changed_when:
- base_cell is success
failed_when:
- base_cell.rc != 0
- '"already exists" not in base_cell.stdout'
run_once: True
delegate_to: "{{ groups[nova_api.group][0] }}"

View File

@ -21,7 +21,9 @@
when: inventory_hostname in groups['nova-api'] or
inventory_hostname in groups['compute']
- include_tasks: create_cells.yml
- name: Flush handlers
meta: flush_handlers
- include_tasks: simple_cell_setup.yml
- include_tasks: discover_computes.yml

View File

@ -1,31 +1,4 @@
---
- name: Create cell0 mappings
command: >
docker exec nova_api nova-manage cell_v2 map_cell0
register: map_cell0
changed_when:
- map_cell0 is success
- '"Cell0 is already setup" not in map_cell0.stdout'
failed_when:
- map_cell0.rc != 0
run_once: True
delegate_to: "{{ groups['nova-api'][0] }}"
- include_tasks: bootstrap_service.yml
when: map_cell0.changed
- name: Create base cell for legacy instances
command: >
docker exec nova_api nova-manage cell_v2 create_cell
register: base_cell
changed_when:
- base_cell is success
failed_when:
- base_cell.rc != 0
- '"already exists" not in base_cell.stdout'
run_once: True
delegate_to: "{{ groups['nova-api'][0] }}"
- name: Waiting for nova-compute service up
command: >
docker exec kolla_toolbox openstack

View File

@ -52,6 +52,7 @@ class ModuleArgsTest(base.BaseTestCase):
auth_password=dict(required=False, type='str', no_log=True),
auth_registry=dict(required=False, type='str'),
auth_username=dict(required=False, type='str'),
command=dict(required=False, type='str'),
detach=dict(required=False, type='bool', default=True),
labels=dict(required=False, type='dict', default=dict()),
name=dict(required=False, type='str'),
@ -114,6 +115,7 @@ class ModuleArgsTest(base.BaseTestCase):
FAKE_DATA = {
'params': {
'command': None,
'detach': True,
'environment': {},
'host_config': {
@ -133,6 +135,7 @@ FAKE_DATA = {
'vendor': 'ubuntuOS'},
'image': 'myregistrydomain.com:5000/ubuntu:16.04',
'name': 'test_container',
'remove_on_exit': True,
'volumes': None,
'tty': False,
},
@ -210,8 +213,10 @@ class TestContainer(base.BaseTestCase):
self.assertTrue(self.dw.changed)
self.fake_data['params'].pop('dimensions')
self.fake_data['params']['host_config']['blkio_weight'] = '10'
expected_args = {'command', 'detach', 'environment', 'host_config',
'image', 'labels', 'name', 'tty', 'volumes'}
self.dw.dc.create_container.assert_called_once_with(
**self.fake_data['params'])
**{k: self.fake_data['params'][k] for k in expected_args})
self.dw.dc.create_host_config.assert_called_with(
cap_add=None, network_mode='host', ipc_mode=None,
pid_mode=None, volumes_from=None, blkio_weight=10,
@ -295,6 +300,31 @@ class TestContainer(base.BaseTestCase):
self.dw.dc.start.assert_called_once_with(
container=self.fake_data['params'].get('name'))
def test_start_container_no_detach(self):
self.fake_data['params'].update({'name': 'my_container',
'detach': False})
self.dw = get_DockerWorker(self.fake_data['params'])
self.dw.dc.images = mock.MagicMock(
return_value=self.fake_data['images'])
self.dw.dc.containers = mock.MagicMock(side_effect=[
[], self.fake_data['containers'], self.fake_data['containers'],
self.fake_data['containers']])
self.dw.dc.wait = mock.MagicMock(return_value={'StatusCode': 0})
self.dw.dc.logs = mock.MagicMock(
side_effect=['fake stdout', 'fake stderr'])
self.dw.start_container()
self.assertTrue(self.dw.changed)
name = self.fake_data['params'].get('name')
self.dw.dc.wait.assert_called_once_with(name)
self.dw.dc.logs.assert_has_calls([
mock.call(name, stdout=True, stderr=False),
mock.call(name, stdout=False, stderr=True)])
self.dw.dc.stop.assert_called_once_with(name, timeout=10)
self.dw.dc.remove_container.assert_called_once_with(
container=name, force=True)
expected = {'rc': 0, 'stdout': 'fake stdout', 'stderr': 'fake stderr'}
self.assertEqual(expected, self.dw.result)
def test_stop_container(self):
self.dw = get_DockerWorker({'name': 'my_container',
'action': 'stop_container'})