Ensure nova-api is running before starting nova-compute containers
If nova-api is delayed starting then the nova_wait_for_compute_service can timeout. A deployment using a slow/busy remote container repository is particularly susceptible to this issue. To resolve this nova_compute and nova_wait_for_compute_service have been postponed to step_5 and a task has been added to step_4 to ensure nova_api is active before proceeding. Change-Id: I6fcbc5cb5d4f3cbb618d9661d2a36c868e18b3d6 Closes-bug: #1842948
This commit is contained in:
parent
bf055342a2
commit
8a87cbcc34
|
@ -0,0 +1,108 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2018 Red Hat Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import logging
|
||||
from optparse import OptionParser
|
||||
import os
|
||||
import six
|
||||
import sys
|
||||
import time
|
||||
|
||||
|
||||
from keystoneauth1 import loading
|
||||
from keystoneauth1 import session
|
||||
|
||||
from novaclient import client
|
||||
from novaclient.exceptions import ClientException
|
||||
|
||||
# In python3 SafeConfigParser was renamed to ConfigParser and the default
|
||||
# for duplicate options default to true. In case of nova it is valid to
|
||||
# have duplicate option lines, e.g. passthrough_whitelist which leads to
|
||||
# issues reading the nova.conf
|
||||
# https://bugs.launchpad.net/tripleo/+bug/1827775
|
||||
if six.PY3:
|
||||
from six.moves.configparser import ConfigParser
|
||||
config = ConfigParser(strict=False)
|
||||
else:
|
||||
from six.moves.configparser import SafeConfigParser
|
||||
config = SafeConfigParser()
|
||||
|
||||
debug = os.getenv('__OS_DEBUG', 'false')
|
||||
|
||||
if debug.lower() == 'true':
|
||||
loglevel = logging.DEBUG
|
||||
else:
|
||||
loglevel = logging.INFO
|
||||
|
||||
logging.basicConfig(stream=sys.stdout, level=loglevel)
|
||||
LOG = logging.getLogger('nova_wait_for_api_service')
|
||||
|
||||
iterations = 60
|
||||
timeout = 10
|
||||
nova_cfg = '/etc/nova/nova.conf'
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = OptionParser(usage="usage: %prog [options]")
|
||||
parser.add_option('-k', '--insecure',
|
||||
action="store_false",
|
||||
dest='insecure',
|
||||
default=True,
|
||||
help='Allow insecure connection when using SSL')
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
LOG.debug('Running with parameter insecure = %s',
|
||||
options.insecure)
|
||||
|
||||
if os.path.isfile(nova_cfg):
|
||||
try:
|
||||
config.read(nova_cfg)
|
||||
except Exception:
|
||||
LOG.exception('Error while reading nova.conf:')
|
||||
else:
|
||||
LOG.error('Nova configuration file %s does not exist', nova_cfg)
|
||||
sys.exit(1)
|
||||
|
||||
loader = loading.get_plugin_loader('password')
|
||||
auth = loader.load_from_options(
|
||||
auth_url=config.get('neutron',
|
||||
'auth_url'),
|
||||
username=config.get('neutron',
|
||||
'username'),
|
||||
password=config.get('neutron',
|
||||
'password'),
|
||||
project_name=config.get('neutron',
|
||||
'project_name'),
|
||||
project_domain_name=config.get('neutron',
|
||||
'project_domain_name'),
|
||||
user_domain_name=config.get('neutron',
|
||||
'user_domain_name'))
|
||||
sess = session.Session(auth=auth, verify=options.insecure)
|
||||
|
||||
# Wait until this host is listed in the service list
|
||||
for i in range(iterations):
|
||||
try:
|
||||
nova = client.Client('2.11', session=sess, endpoint_type='internal')
|
||||
nova.versions.list()
|
||||
LOG.info('Nova-api service active')
|
||||
sys.exit(0)
|
||||
except ClientException:
|
||||
LOG.info('Waiting for nova-api service')
|
||||
except Exception:
|
||||
LOG.exception(
|
||||
'Error while waiting for nova-api service')
|
||||
time.sleep(timeout)
|
||||
sys.exit(1)
|
||||
|
||||
# vim: set et ts=4 sw=4 :
|
|
@ -272,54 +272,59 @@ outputs:
|
|||
owner: nova:nova
|
||||
recurse: true
|
||||
container_config_scripts:
|
||||
nova_api_ensure_default_cell.sh:
|
||||
mode: "0700"
|
||||
content:
|
||||
str_replace:
|
||||
template: |
|
||||
#!/bin/bash
|
||||
DEFID=$(su nova -s /bin/bash -c "nova-manage cell_v2 list_cells" | sed -e '1,3d' -e '$d' | awk -F ' *| *' '$2 == "default" {print $4}')
|
||||
if [ "$DEFID" ]; then
|
||||
echo "(cellv2) Updating default cell_v2 cell $DEFID"
|
||||
su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 update_cell --cell_uuid $DEFID --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'"
|
||||
else
|
||||
echo "(cellv2) Creating default cell_v2 cell"
|
||||
su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 create_cell --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'"
|
||||
fi
|
||||
params:
|
||||
CELLDB:
|
||||
list_join:
|
||||
- ''
|
||||
- - '{scheme}'
|
||||
- '://'
|
||||
- '{username}'
|
||||
- ':'
|
||||
- '{password}'
|
||||
- '@'
|
||||
-
|
||||
if:
|
||||
- mysql_ipv6_use_ip_address
|
||||
- '[{hostname}]'
|
||||
map_merge:
|
||||
- {get_attr: [ContainersCommon, container_config_scripts]}
|
||||
- nova_wait_for_api_service.py:
|
||||
mode: "0755"
|
||||
content: { get_file: ../../container_config_scripts/nova_wait_for_api_service.py }
|
||||
nova_api_ensure_default_cell.sh:
|
||||
mode: "0700"
|
||||
content:
|
||||
str_replace:
|
||||
template: |
|
||||
#!/bin/bash
|
||||
DEFID=$(su nova -s /bin/bash -c "nova-manage cell_v2 list_cells" | sed -e '1,3d' -e '$d' | awk -F ' *| *' '$2 == "default" {print $4}')
|
||||
if [ "$DEFID" ]; then
|
||||
echo "(cellv2) Updating default cell_v2 cell $DEFID"
|
||||
su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 update_cell --cell_uuid $DEFID --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'"
|
||||
else
|
||||
echo "(cellv2) Creating default cell_v2 cell"
|
||||
su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 create_cell --name=default --database_connection='CELLDB' --transport-url='TRANSPORTURL'"
|
||||
fi
|
||||
params:
|
||||
CELLDB:
|
||||
list_join:
|
||||
- ''
|
||||
- - '{scheme}'
|
||||
- '://'
|
||||
- '{username}'
|
||||
- ':'
|
||||
- '{password}'
|
||||
- '@'
|
||||
-
|
||||
if:
|
||||
- mysql_ipv6_use_ip_address
|
||||
- '[{hostname}]'
|
||||
- '{hostname}'
|
||||
- '/'
|
||||
- 'nova'
|
||||
- '?'
|
||||
- '{query}'
|
||||
TRANSPORTURL:
|
||||
list_join:
|
||||
- ''
|
||||
- - '{scheme}'
|
||||
- '://'
|
||||
- '{username}'
|
||||
- ':'
|
||||
- '{password}'
|
||||
- '@'
|
||||
- '{hostname}'
|
||||
- '/'
|
||||
- 'nova'
|
||||
- '?'
|
||||
- '{query}'
|
||||
TRANSPORTURL:
|
||||
list_join:
|
||||
- ''
|
||||
- - '{scheme}'
|
||||
- '://'
|
||||
- '{username}'
|
||||
- ':'
|
||||
- '{password}'
|
||||
- '@'
|
||||
- '{hostname}'
|
||||
- ':'
|
||||
- '{port}'
|
||||
- '/'
|
||||
- '?'
|
||||
- '{query}'
|
||||
- ':'
|
||||
- '{port}'
|
||||
- '/'
|
||||
- '?'
|
||||
- '{query}'
|
||||
docker_config:
|
||||
step_2:
|
||||
get_attr: [NovaApiLogging, docker_config, step_2]
|
||||
|
@ -409,7 +414,32 @@ outputs:
|
|||
- ''
|
||||
environment:
|
||||
- KOLLA_CONFIG_STRATEGY=COPY_ALWAYS
|
||||
nova_wait_for_api_service:
|
||||
start_order: 3
|
||||
image: *nova_api_image
|
||||
user: root
|
||||
net: host
|
||||
privileged: false
|
||||
detach: false
|
||||
volumes:
|
||||
list_concat:
|
||||
- {get_attr: [ContainersCommon, volumes]}
|
||||
-
|
||||
- /var/lib/config-data/nova/etc/my.cnf.d/:/etc/my.cnf.d/:ro
|
||||
- /var/lib/config-data/nova/etc/nova/:/etc/nova/:ro
|
||||
- /var/log/containers/nova:/var/log/nova
|
||||
- /var/lib/container-config-scripts/:/container-config-scripts/:z
|
||||
command: "/usr/bin/bootstrap_host_exec nova_api su nova -s /bin/bash -c '/container-config-scripts/pyshim.sh /container-config-scripts/nova_wait_for_api_service.py'"
|
||||
environment:
|
||||
- list_join:
|
||||
- ''
|
||||
- - '__OS_DEBUG='
|
||||
- yaql:
|
||||
expression: str($.data.debug)
|
||||
data:
|
||||
debug: {get_attr: [NovaBase, role_data, config_settings, 'nova::logging::debug']}
|
||||
nova_api_cron:
|
||||
start_order: 4
|
||||
image: *nova_api_image
|
||||
net: host
|
||||
user: root
|
||||
|
|
|
@ -44,40 +44,41 @@ outputs:
|
|||
nova_statedir_ownership.py:
|
||||
mode: "0700"
|
||||
content: { get_file: ../../container_config_scripts/nova_statedir_ownership.py }
|
||||
nova_wait_for_placement_service.py:
|
||||
mode: "0755"
|
||||
content: { get_file: ../../container_config_scripts/nova_wait_for_placement_service.py }
|
||||
nova_wait_for_compute_service.py:
|
||||
mode: "0755"
|
||||
content: { get_file: ../../container_config_scripts/nova_wait_for_compute_service.py }
|
||||
|
||||
nova_compute_common_deploy_steps_tasks:
|
||||
description: Common host prep tasks for nova-compute services (compute + ironic)
|
||||
# Runs as external_post_deploy_tasks
|
||||
value: &nova_compute_common_deploy_steps_tasks
|
||||
- when: step|int == 5
|
||||
block:
|
||||
- block:
|
||||
- name: is additonal Cell?
|
||||
set_fact:
|
||||
nova_additional_cell: {get_param: NovaAdditionalCell}
|
||||
- name: discover nodes if it is not an additional cell
|
||||
- name: check if discover hosts is required
|
||||
when:
|
||||
- not nova_additional_cell|bool
|
||||
- nova_cellv2_discovery_done is not defined
|
||||
block:
|
||||
- name: discover via nova_compute?
|
||||
set_fact:
|
||||
delegate_host: "{{ groups['nova_compute'][0] }}"
|
||||
nova_cellv2_discovery_delegate_host: "{{ groups['nova_compute'][0] }}"
|
||||
when:
|
||||
- groups['nova_compute'] is defined and (groups['nova_compute']|length>0)
|
||||
- name: discover via nova_ironic?
|
||||
set_fact:
|
||||
delegate_host: "{{ groups['nova_ironic'][0] }}"
|
||||
nova_cellv2_discovery_delegate_host: "{{ groups['nova_ironic'][0] }}"
|
||||
when:
|
||||
- delegate_host is not defined
|
||||
- nova_cellv2_discovery_delegate_host is not defined
|
||||
- groups['nova_ironic'] is defined and (groups['nova_ironic']|length>0)
|
||||
- name: Discovering nova hosts
|
||||
command: "{{ container_cli }} exec nova_compute nova-manage cell_v2 discover_hosts --by-service"
|
||||
become: true
|
||||
changed_when: False
|
||||
changed_when: false
|
||||
delegate_to: '{{ nova_cellv2_discovery_delegate_host }}'
|
||||
when:
|
||||
- delegate_host is defined
|
||||
- inventory_hostname == delegate_host
|
||||
- nova_cellv2_discovery_delegate_host is defined
|
||||
# Could be included multiple times so ensure it only runs once
|
||||
- set_fact:
|
||||
nova_cellv2_discovery_done: true
|
||||
|
|
|
@ -654,27 +654,8 @@ outputs:
|
|||
expression: str($.data.debug)
|
||||
data:
|
||||
debug: {get_attr: [NovaBase, role_data, config_settings, 'nova::logging::debug']}
|
||||
step_4:
|
||||
step_5:
|
||||
map_merge:
|
||||
- nova_wait_for_placement_service:
|
||||
start_order: 2
|
||||
image: *nova_compute_image
|
||||
user: nova
|
||||
net: host
|
||||
privileged: false
|
||||
detach: false
|
||||
volumes:
|
||||
- /var/lib/container-config-scripts/:/container-config-scripts/:z
|
||||
- /var/lib/config-data/puppet-generated/nova_libvirt/etc/nova:/etc/nova:ro
|
||||
command: "/container-config-scripts/pyshim.sh /container-config-scripts/nova_wait_for_placement_service.py"
|
||||
environment:
|
||||
- list_join:
|
||||
- ''
|
||||
- - '__OS_DEBUG='
|
||||
- yaql:
|
||||
expression: str($.data.debug)
|
||||
data:
|
||||
debug: {get_attr: [NovaBase, role_data, config_settings, 'nova::logging::debug']}
|
||||
- nova_compute:
|
||||
start_order: 3
|
||||
image: *nova_compute_image
|
||||
|
@ -939,31 +920,30 @@ outputs:
|
|||
- ksm.service
|
||||
- ksmtuned.service
|
||||
deploy_steps_tasks:
|
||||
list_concat:
|
||||
- {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]}
|
||||
- - name: validate nova compute container state
|
||||
when:
|
||||
- container_cli == 'podman'
|
||||
- not container_healthcheck_disabled
|
||||
- step|int == 5
|
||||
- false
|
||||
tags:
|
||||
- opendev-validation
|
||||
- opendev-validation-nova
|
||||
block:
|
||||
- name: Get nova-compute healthcheck status
|
||||
register: nova_compute_healthcheck_state
|
||||
systemd:
|
||||
name: tripleo_nova_compute_healthcheck
|
||||
retries: 10
|
||||
delay: 30
|
||||
until: nova_compute_healthcheck_state.status.ExecMainPID != '0' and
|
||||
nova_compute_healthcheck_state.status.ActiveState in ['inactive', 'failed']
|
||||
ignore_errors: yes
|
||||
- name: Fail if nova-compute healthcheck report failed status
|
||||
fail:
|
||||
msg: nova-compute isn't working (healthcheck failed)
|
||||
when: nova_compute_healthcheck_state.status.ExecMainStatus != '0'
|
||||
- name: validate nova compute container state
|
||||
when:
|
||||
- container_cli == 'podman'
|
||||
- not container_healthcheck_disabled
|
||||
- step|int == 6 #FIXME: there is no step6
|
||||
- false
|
||||
tags:
|
||||
- opendev-validation
|
||||
- opendev-validation-nova
|
||||
block:
|
||||
- name: Get nova-compute healthcheck status
|
||||
register: nova_compute_healthcheck_state
|
||||
systemd:
|
||||
name: tripleo_nova_compute_healthcheck
|
||||
retries: 10
|
||||
delay: 30
|
||||
until: nova_compute_healthcheck_state.status.ExecMainPID != '0' and
|
||||
nova_compute_healthcheck_state.status.ActiveState in ['inactive', 'failed']
|
||||
ignore_errors: yes
|
||||
- name: Fail if nova-compute healthcheck report failed status
|
||||
fail:
|
||||
msg: nova-compute isn't working (healthcheck failed)
|
||||
when: nova_compute_healthcheck_state.status.ExecMainStatus != '0'
|
||||
external_post_deploy_tasks: {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]}
|
||||
upgrade_tasks:
|
||||
- name: Remove openstack-nova-compute and python-nova package during upgrade
|
||||
package:
|
||||
|
|
|
@ -153,7 +153,7 @@ outputs:
|
|||
-
|
||||
- /var/lib/container-config-scripts/:/container-config-scripts/
|
||||
command: "/container-config-scripts/pyshim.sh /container-config-scripts/nova_statedir_ownership.py"
|
||||
step_4:
|
||||
step_5:
|
||||
nova_compute:
|
||||
start_order: 100 # After the ironic services
|
||||
image: *nova_ironic_image
|
||||
|
@ -225,7 +225,7 @@ outputs:
|
|||
name: virt_sandbox_use_netlink
|
||||
persistent: yes
|
||||
state: yes
|
||||
deploy_steps_tasks: {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]}
|
||||
external_post_deploy_tasks: {get_attr: [NovaComputeCommon, nova_compute_common_deploy_steps_tasks]}
|
||||
post_upgrade_tasks:
|
||||
- when: step|int == 1
|
||||
import_role:
|
||||
|
|
|
@ -68,6 +68,10 @@ parameters:
|
|||
description: The password for the nova service and db account
|
||||
type: string
|
||||
hidden: true
|
||||
Debug:
|
||||
type: boolean
|
||||
default: false
|
||||
description: Set to True to enable debugging on all services.
|
||||
|
||||
conditions:
|
||||
placement_workers_zero: {equals : [{get_param: PlacementWorkers}, 0]}
|
||||
|
@ -190,6 +194,12 @@ outputs:
|
|||
- path: /var/log/placement
|
||||
owner: placement:placement
|
||||
recurse: true
|
||||
container_config_scripts:
|
||||
map_merge:
|
||||
- {get_attr: [ContainersCommon, container_config_scripts]}
|
||||
- placement_wait_for_service.py:
|
||||
mode: "0755"
|
||||
content: { get_file: ../../container_config_scripts/placement_wait_for_service.py }
|
||||
docker_config:
|
||||
step_2:
|
||||
get_attr: [PlacementLogging, docker_config, step_2]
|
||||
|
@ -280,6 +290,25 @@ outputs:
|
|||
- ''
|
||||
environment:
|
||||
- KOLLA_CONFIG_STRATEGY=COPY_ALWAYS
|
||||
placement_wait_for_service:
|
||||
start_order: 2
|
||||
image: *placement_api_image
|
||||
user: root
|
||||
net: host
|
||||
privileged: false
|
||||
detach: false
|
||||
volumes:
|
||||
- /var/lib/container-config-scripts/:/container-config-scripts/:z
|
||||
- /var/lib/config-data/puppet-generated/placement/:/var/lib/kolla/config_files/src:ro
|
||||
command: "/usr/bin/bootstrap_host_exec placement su placement -s /bin/bash -c '/container-config-scripts/pyshim.sh /container-config-scripts/placement_wait_for_service.py'"
|
||||
environment:
|
||||
- list_join:
|
||||
- ''
|
||||
- - '__OS_DEBUG='
|
||||
- yaql:
|
||||
expression: str($.data.debug)
|
||||
data:
|
||||
debug: {get_param: Debug}
|
||||
host_prep_tasks: {get_attr: [PlacementLogging, host_prep_tasks]}
|
||||
upgrade_tasks: []
|
||||
post_upgrade_tasks:
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
---
|
||||
fixes:
|
||||
- |
|
||||
If nova-api is delayed starting then the nova_wait_for_compute_service
|
||||
can timeout. A deployment using a slow/busy remote container repository is
|
||||
particularly susceptible to this issue. To resolve this nova_compute and
|
||||
nova_wait_for_compute_service have been postponed to step_5 and a task
|
||||
has been added to step_4 to ensure nova_api is active before proceeding.
|
||||
Resolves Bug `1842948 <https://bugs.launchpad.net/tripleo/+bug/1842948>`_.
|
Loading…
Reference in New Issue