Ensure nova-api is running before starting nova-compute containers

If nova-api is delayed starting then the nova_wait_for_compute_service
can timeout. A deployment using a slow/busy remote container repository is
particularly susceptible to this issue. To resolve this nova_compute and
nova_wait_for_compute_service have been postponed to step_5 and a task
has been added to step_4 to ensure nova_api is active before proceeding.

Conflicts:
  deployment/nova/nova-compute-container-puppet.yaml
  deployment/placement/placement-api-container-puppet.yaml

Note: Since this is not a direct cherry-pick due to the placement
extraction in train release, this backport also includes needed
changed from https://review.opendev.org/688399.

Change-Id: I6fcbc5cb5d4f3cbb618d9661d2a36c868e18b3d6
Closes-bug: #1842948
(cherry picked from commit 8a87cbcc34)
This commit is contained in:
Oliver Walsh 2019-09-09 15:48:23 +01:00 committed by Martin Schuppert
parent d4e5170e3f
commit 20b485fe8e
8 changed files with 242 additions and 83 deletions

View File

@ -0,0 +1,109 @@
#!/usr/bin/env python
#
# Copyright 2018 Red Hat Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import logging
from optparse import OptionParser
import os
import six
import sys
import time
from keystoneauth1 import loading
from keystoneauth1 import session
from novaclient import client
from novaclient.exceptions import ClientException
# In python3 SafeConfigParser was renamed to ConfigParser and the default
# for duplicate options default to true. In case of nova it is valid to
# have duplicate option lines, e.g. passthrough_whitelist which leads to
# issues reading the nova.conf
# https://bugs.launchpad.net/tripleo/+bug/1827775
if six.PY3:
from six.moves.configparser import ConfigParser
config = ConfigParser(strict=False)
else:
from six.moves.configparser import SafeConfigParser
config = SafeConfigParser()
debug = os.getenv('__OS_DEBUG', 'false')
if debug.lower() == 'true':
loglevel = logging.DEBUG
else:
loglevel = logging.INFO
logging.basicConfig(stream=sys.stdout, level=loglevel)
LOG = logging.getLogger('nova_wait_for_api_service')
iterations = 60
timeout = 10
nova_cfg = '/etc/nova/nova.conf'
if __name__ == '__main__':
parser = OptionParser(usage="usage: %prog [options]")
parser.add_option('-k', '--insecure',
action="store_false",
dest='insecure',
default=True,
help='Allow insecure connection when using SSL')
(options, args) = parser.parse_args()
LOG.debug('Running with parameter insecure = %s',
options.insecure)
if os.path.isfile(nova_cfg):
try:
config.read(nova_cfg)
except Exception:
LOG.exception('Error while reading nova.conf:')
else:
LOG.error('Nova configuration file %s does not exist', nova_cfg)
sys.exit(1)
loader = loading.get_plugin_loader('password')
auth = loader.load_from_options(
auth_url=config.get('neutron',
'auth_url'),
username=config.get('neutron',
'username'),
password=config.get('neutron',
'password'),
project_name=config.get('neutron',
'project_name'),
project_domain_name=config.get('neutron',
'project_domain_name'),
user_domain_name=config.get('neutron',
'user_domain_name'))
sess = session.Session(auth=auth, verify=options.insecure)
# Wait until this host is listed in the service list
for i in range(iterations):
try:
nova = client.Client('2.11', session=sess,
endpoint_type='internal')
nova.versions.list()
LOG.info('Nova-api service active')
sys.exit(0)
except ClientException:
LOG.info('Waiting for nova-api service')
except Exception:
LOG.exception(
'Error while waiting for nova-api service')
time.sleep(timeout)
sys.exit(1)
# vim: set et ts=4 sw=4 :

View File

@ -194,6 +194,12 @@ outputs:
- path: /var/log/nova
owner: nova:nova
recurse: true
container_config_scripts:
map_merge:
- {get_attr: [ContainersCommon, container_config_scripts]}
- placement_wait_for_service.py:
mode: "0755"
content: { get_file: ../../../container_config_scripts/placement_wait_for_service.py }
docker_config:
step_2:
get_attr: [NovaPlacementLogging, docker_config, step_2]
@ -201,7 +207,7 @@ outputs:
step_4:
nova_placement:
start_order: 1
image: {get_param: DockerNovaPlacementImage}
image: &nova_placement_api_image {get_param: DockerNovaPlacementImage}
net: host
user: root
restart: always
@ -226,6 +232,29 @@ outputs:
- ''
environment:
- KOLLA_CONFIG_STRATEGY=COPY_ALWAYS
nova_placement_wait_for_service:
start_order: 2
image: *nova_placement_api_image
user: root
net: host
privileged: false
detach: false
volumes:
list_concat:
- {get_attr: [ContainersCommon, volumes]}
- get_attr: [NovaPlacementLogging, volumes]
-
- /var/lib/container-config-scripts/:/container-config-scripts/:z
- /var/lib/config-data/nova_placement/etc/placement/:/etc/placement/:ro
command: "/usr/bin/bootstrap_host_exec nova_placement su nova -s /bin/bash -c '/container-config-scripts/pyshim.sh /container-config-scripts/placement_wait_for_service.py'"
environment:
- list_join:
- ''
- - '__OS_DEBUG='
- yaql:
expression: str($.data.debug)
data:
debug: {get_attr: [NovaBase, role_data, config_settings, 'nova::logging::debug']}
host_prep_tasks:
- {get_attr: [NovaPlacementLogging, host_prep_tasks]}
- name: create persistent directory

View File

@ -310,54 +310,59 @@ outputs:
owner: nova:nova
recurse: true
container_config_scripts:
nova_api_ensure_default_cell.sh:
mode: "0700"
content:
str_replace:
template: |
#!/bin/bash
DEFID=$(su nova -s /bin/bash -c "nova-manage cell_v2 list_cells" | sed -e '1,3d' -e '$d' | awk -F ' *| *' '$2 == "default" {print $4}')
if [ "$DEFID" ]; then
echo "(cellv2) Updating default cell_v2 cell $DEFID"
su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 update_cell --cell_uuid $DEFID --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'"
else
echo "(cellv2) Creating default cell_v2 cell"
su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 create_cell --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'"
fi
params:
CELLDB:
list_join:
- ''
- - '{scheme}'
- '://'
- '{username}'
- ':'
- '{password}'
- '@'
-
if:
- mysql_ipv6_use_ip_address
- '[{hostname}]'
map_merge:
- {get_attr: [ContainersCommon, container_config_scripts]}
- nova_wait_for_api_service.py:
mode: "0755"
content: { get_file: ../../container_config_scripts/nova_wait_for_api_service.py }
nova_api_ensure_default_cell.sh:
mode: "0700"
content:
str_replace:
template: |
#!/bin/bash
DEFID=$(su nova -s /bin/bash -c "nova-manage cell_v2 list_cells" | sed -e '1,3d' -e '$d' | awk -F ' *| *' '$2 == "default" {print $4}')
if [ "$DEFID" ]; then
echo "(cellv2) Updating default cell_v2 cell $DEFID"
su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 update_cell --cell_uuid $DEFID --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'"
else
echo "(cellv2) Creating default cell_v2 cell"
su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 create_cell --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'"
fi
params:
CELLDB:
list_join:
- ''
- - '{scheme}'
- '://'
- '{username}'
- ':'
- '{password}'
- '@'
-
if:
- mysql_ipv6_use_ip_address
- '[{hostname}]'
- '{hostname}'
- '/'
- 'nova'
- '?'
- '{query}'
TRANSPORTURL:
list_join:
- ''
- - '{scheme}'
- '://'
- '{username}'
- ':'
- '{password}'
- '@'
- '{hostname}'
- '/'
- 'nova'
- '?'
- '{query}'
TRANSPORTURL:
list_join:
- ''
- - '{scheme}'
- '://'
- '{username}'
- ':'
- '{password}'
- '@'
- '{hostname}'
- ':'
- '{port}'
- '/'
- '?'
- '{query}'
- ':'
- '{port}'
- '/'
- '?'
- '{query}'
docker_config:
step_2:
get_attr: [NovaApiLogging, docker_config, step_2]
@ -447,7 +452,32 @@ outputs:
- ''
environment:
- KOLLA_CONFIG_STRATEGY=COPY_ALWAYS
nova_wait_for_api_service:
start_order: 3
image: *nova_api_image
user: root
net: host
privileged: false
detach: false
volumes:
list_concat:
- {get_attr: [ContainersCommon, volumes]}
-
- /var/lib/config-data/nova/etc/my.cnf.d/:/etc/my.cnf.d/:ro
- /var/lib/config-data/nova/etc/nova/:/etc/nova/:ro
- /var/log/containers/nova:/var/log/nova
- /var/lib/container-config-scripts/:/container-config-scripts/:z
command: "/usr/bin/bootstrap_host_exec nova_api su nova -s /bin/bash -c '/container-config-scripts/pyshim.sh /container-config-scripts/nova_wait_for_api_service.py'"
environment:
- list_join:
- ''
- - '__OS_DEBUG='
- yaql:
expression: str($.data.debug)
data:
debug: {get_attr: [NovaBase, role_data, config_settings, 'nova::logging::debug']}
nova_api_cron:
start_order: 4
image: *nova_api_image
net: host
user: root

View File

@ -44,40 +44,41 @@ outputs:
nova_statedir_ownership.py:
mode: "0700"
content: { get_file: ../../container_config_scripts/nova_statedir_ownership.py }
nova_wait_for_placement_service.py:
mode: "0755"
content: { get_file: ../../container_config_scripts/nova_wait_for_placement_service.py }
nova_wait_for_compute_service.py:
mode: "0755"
content: { get_file: ../../container_config_scripts/nova_wait_for_compute_service.py }
nova_compute_common_deploy_steps_tasks:
description: Common host prep tasks for nova-compute services (compute + ironic)
# Runs as external_post_deploy_tasks
value: &nova_compute_common_deploy_steps_tasks
- when: step|int == 5
block:
- block:
- name: is additonal Cell?
set_fact:
nova_additional_cell: {get_param: NovaAdditionalCell}
- name: discover nodes if it is not an additional cell
- name: check if discover hosts is required
when:
- not nova_additional_cell|bool
- nova_cellv2_discovery_done is not defined
block:
- name: discover via nova_compute?
set_fact:
delegate_host: "{{ groups['nova_compute'][0] }}"
nova_cellv2_discovery_delegate_host: "{{ groups['nova_compute'][0] }}"
when:
- groups['nova_compute'] is defined and (groups['nova_compute']|length>0)
- name: discover via nova_ironic?
set_fact:
delegate_host: "{{ groups['nova_ironic'][0] }}"
nova_cellv2_discovery_delegate_host: "{{ groups['nova_ironic'][0] }}"
when:
- delegate_host is not defined
- nova_cellv2_discovery_delegate_host is not defined
- groups['nova_ironic'] is defined and (groups['nova_ironic']|length>0)
- name: Discovering nova hosts
command: "{{ container_cli }} exec nova_compute nova-manage cell_v2 discover_hosts --by-service"
become: true
changed_when: False
changed_when: false
delegate_to: '{{ nova_cellv2_discovery_delegate_host }}'
when:
- delegate_host is defined
- inventory_hostname == delegate_host
- nova_cellv2_discovery_delegate_host is defined
# Could be included multiple times so ensure it only runs once
- set_fact:
nova_cellv2_discovery_done: true

View File

@ -612,27 +612,8 @@ outputs:
expression: str($.data.debug)
data:
debug: {get_attr: [NovaBase, role_data, config_settings, 'nova::logging::debug']}
step_4:
step_5:
map_merge:
- nova_wait_for_placement_service:
start_order: 2
image: *nova_compute_image
user: nova
net: host
privileged: false
detach: false
volumes:
- /var/lib/container-config-scripts/:/container-config-scripts/:z
- /var/lib/config-data/puppet-generated/nova_libvirt/etc/nova:/etc/nova:ro
command: "/container-config-scripts/pyshim.sh /container-config-scripts/nova_wait_for_placement_service.py"
environment:
- list_join:
- ''
- - '__OS_DEBUG='
- yaql:
expression: str($.data.debug)
data:
debug: {get_attr: [NovaBase, role_data, config_settings, 'nova::logging::debug']}
- nova_compute:
start_order: 3
image: *nova_compute_image
@ -890,7 +871,7 @@ outputs:
with_items:
- ksm.service
- ksmtuned.service
deploy_steps_tasks: {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]}
external_post_deploy_tasks: {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]}
upgrade_tasks:
- name: Remove openstack-nova-compute and python-nova package during upgrade
package:

View File

@ -150,7 +150,7 @@ outputs:
-
- /var/lib/container-config-scripts/:/container-config-scripts/
command: "/container-config-scripts/pyshim.sh /container-config-scripts/nova_statedir_ownership.py"
step_4:
step_5:
nova_compute:
start_order: 100 # After the ironic services
image: *nova_ironic_image
@ -216,7 +216,7 @@ outputs:
name: virt_sandbox_use_netlink
persistent: yes
state: yes
deploy_steps_tasks: {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]}
external_post_deploy_tasks: {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]}
post_upgrade_tasks:
- when: step|int == 1
import_role:

View File

@ -0,0 +1,9 @@
---
fixes:
- |
If nova-api is delayed starting then the nova_wait_for_compute_service
can timeout. A deployment using a slow/busy remote container repository is
particularly susceptible to this issue. To resolve this nova_compute and
nova_wait_for_compute_service have been postponed to step_5 and a task
has been added to step_4 to ensure nova_api is active before proceeding.
Resolves Bug `1842948 <https://bugs.launchpad.net/tripleo/+bug/1842948>`_.