From d7704bbf56cc155884e233a816422aa6e721cda3 Mon Sep 17 00:00:00 2001 From: Oliver Walsh Date: Tue, 19 Jun 2018 11:56:16 +0100 Subject: [PATCH] Move cellv2 discovery from control plane services to compute services If compute nodes are deployed without deploying/updating the controllers then the computes will not have cellv2 mappings as this is run in the controller deploy steps (nova-api). This can happen if the controller nodes are blacklisted during a compute scale out. It's also likely to be an issue going forward if the deployment is staged (e.g split control plane). This change moves the cell_v2 discovery logic to the nova-compute/nova-ironic deploy step. Conflicts: common/services.yaml docker/services/nova-api.yaml Closes-bug: 1786961 Change-Id: I12a02f636f31985bc1b71bff5b744d346286a95f (cherry picked from commit e0e885b8ca3332e0815c537a32c564cac81f7f7e) --- common/services.yaml | 13 --- docker/services/nova-api.yaml | 75 ---------------- docker/services/nova-compute-common.yaml | 16 ++++ docker/services/nova-compute.yaml | 17 +++- docker/services/nova-ironic.yaml | 16 +++- .../nova_cell_v2_discover_host.py | 89 +++++++++++++++++++ .../nova_statedir_ownership.py | 0 overcloud.j2.yaml | 15 ---- puppet/all-nodes-config.j2.yaml | 6 -- 9 files changed, 136 insertions(+), 111 deletions(-) create mode 100644 docker_config_scripts/nova_cell_v2_discover_host.py mode change 100755 => 100644 docker_config_scripts/nova_statedir_ownership.py diff --git a/common/services.yaml b/common/services.yaml index a9eca5ea96..c904ad4042 100644 --- a/common/services.yaml +++ b/common/services.yaml @@ -108,18 +108,6 @@ resources: service_names: {get_attr: [ServiceChain, role_data, service_names]} docker_config_scripts: {get_attr: [ServiceChain, role_data, docker_config_scripts]} - CellV2Discovery: - type: OS::Heat::Value - properties: - type: boolean - value: - yaql: - expression: - # If any service in this role requires cellv2_discovery then this value is true - coalesce($.data.cellv2_discovery, []).contains(true) - data: - cellv2_discovery: {get_attr: [ServiceChain, role_data, cellv2_discovery]} - LoggingSourcesConfig: type: OS::Heat::Value properties: @@ -418,4 +406,3 @@ outputs: docker_config_scripts: {get_attr: [DockerConfigScripts, value]} docker_puppet_tasks: {get_attr: [DockerPuppetTasks, value]} host_prep_tasks: {get_attr: [HostPrepTasks, value]} - cellv2_discovery: {get_attr: [CellV2Discovery, value]} diff --git a/docker/services/nova-api.yaml b/docker/services/nova-api.yaml index 0ca4092952..66a4fbbc93 100644 --- a/docker/services/nova-api.yaml +++ b/docker/services/nova-api.yaml @@ -143,58 +143,6 @@ outputs: owner: nova:nova recurse: true docker_config_scripts: - nova_api_discover_hosts.sh: - mode: "0700" - content: | - #!/bin/bash - export OS_PROJECT_DOMAIN_NAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken project_domain_name) - export OS_USER_DOMAIN_NAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken user_domain_name) - export OS_PROJECT_NAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken project_name) - export OS_USERNAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken username) - export OS_PASSWORD=$(crudini --get /etc/nova/nova.conf keystone_authtoken password) - export OS_AUTH_URL=$(crudini --get /etc/nova/nova.conf keystone_authtoken auth_url) - export OS_AUTH_TYPE=password - export OS_IDENTITY_API_VERSION=3 - - echo "(cellv2) Running cell_v2 host discovery" - timeout=600 - loop_wait=30 - declare -A discoverable_hosts - for host in $(hiera -c /etc/puppet/hiera.yaml cellv2_discovery_hosts | sed -e '/^nil$/d' | tr "," " "); do discoverable_hosts[$host]=1; done - timeout_at=$(( $(date +"%s") + ${timeout} )) - echo "(cellv2) Waiting ${timeout} seconds for hosts to register" - finished=0 - while : ; do - for host in $(openstack -q compute service list -c 'Host' -c 'Zone' -f value | awk '$2 != "internal" { print $1 }'); do - if (( discoverable_hosts[$host] == 1 )); then - echo "(cellv2) compute node $host has registered" - unset discoverable_hosts[$host] - fi - done - finished=1 - for host in "${!discoverable_hosts[@]}"; do - if (( ${discoverable_hosts[$host]} == 1 )); then - echo "(cellv2) compute node $host has not registered" - finished=0 - fi - done - remaining=$(( $timeout_at - $(date +"%s") )) - if (( $finished == 1 )); then - echo "(cellv2) All nodes registered" - break - elif (( $remaining <= 0 )); then - echo "(cellv2) WARNING: timeout waiting for nodes to register, running host discovery regardless" - echo "(cellv2) Expected host list:" $(hiera -c /etc/puppet/hiera.yaml cellv2_discovery_hosts | sed -e '/^nil$/d' | sort -u | tr ',' ' ') - echo "(cellv2) Detected host list:" $(openstack -q compute service list -c 'Host' -c 'Zone' -f value | awk '$2 != "internal" { print $1 }' | sort -u | tr '\n', ' ') - break - else - echo "(cellv2) Waiting ${remaining} seconds for hosts to register" - sleep $loop_wait - fi - done - echo "(cellv2) Running host discovery..." - su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 discover_hosts --by-service --verbose" - nova_api_ensure_default_cell.sh: mode: "0700" content: | @@ -299,29 +247,6 @@ outputs: - /var/lib/config-data/puppet-generated/nova/:/var/lib/kolla/config_files/src:ro environment: - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS - step_5: - nova_api_discover_hosts: - start_order: 1 - image: *nova_api_image - net: host - detach: false - volumes: - list_concat: - - *nova_api_bootstrap_volumes - - - - /var/lib/config-data/nova/etc/my.cnf.d/tripleo.cnf:/etc/my.cnf.d/tripleo.cnf:ro - - /var/lib/config-data/nova/etc/nova/:/etc/nova/:ro - - /var/log/containers/nova:/var/log/nova - - /var/lib/docker-config-scripts/nova_api_discover_hosts.sh:/nova_api_discover_hosts.sh:ro - user: root - command: "/usr/bin/bootstrap_host_exec nova_api /nova_api_discover_hosts.sh" - environment: - # NOTE: this should force this container to re-run on each - # update (scale-out, etc.) - - list_join: - - '' - - - 'TRIPLEO_DEPLOY_IDENTIFIER=' - - {get_param: DeployIdentifier} metadata_settings: get_attr: [NovaApiBase, role_data, metadata_settings] host_prep_tasks: {get_attr: [NovaApiLogging, host_prep_tasks]} diff --git a/docker/services/nova-compute-common.yaml b/docker/services/nova-compute-common.yaml index 55ccbed696..76f41567de 100644 --- a/docker/services/nova-compute-common.yaml +++ b/docker/services/nova-compute-common.yaml @@ -32,6 +32,10 @@ parameters: default: {} description: Parameters specific to the role type: json + NovaPassword: + description: The password for the nova service and db account + type: string + hidden: true outputs: docker_config_scripts: @@ -43,3 +47,15 @@ outputs: nova_wait_for_placement_service.py: mode: "0700" content: { get_file: ../../docker_config_scripts/nova_wait_for_placement_service.py } + nova_cell_v2_discover_host.py: + mode: "0700" + content: + str_replace: + template: { get_file: ../../docker_config_scripts/nova_cell_v2_discover_host.py } + params: + __OS_PROJECT_DOMAIN_NAME: 'Default' + __OS_PROJECT_USER_NAME: 'Default' + __OS_PROJECT_NAME: 'service' + __OS_USERNAME: 'nova' + __OS_PASSWORD: {get_param: NovaPassword} + __OS_AUTH_URL: {get_param: [EndpointMap, KeystoneInternal, uri_no_suffix]} diff --git a/docker/services/nova-compute.yaml b/docker/services/nova-compute.yaml index 63e2463524..1c0a2d6daa 100644 --- a/docker/services/nova-compute.yaml +++ b/docker/services/nova-compute.yaml @@ -129,7 +129,6 @@ outputs: description: Role data for the Nova Compute service. value: service_name: {get_attr: [NovaComputeBase, role_data, service_name]} - cellv2_discovery: true config_settings: map_merge: - get_attr: [NovaComputeBase, role_data, config_settings] @@ -260,6 +259,22 @@ outputs: - {get_param: NovaComputeOptEnvVars} - - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS + step_5: + nova_cellv2_discover_hosts: + start_order: 0 + image: *nova_compute_image + net: host + detach: false + volumes: + list_concat: + - {get_attr: [ContainersCommon, volumes]} + - + - /var/lib/config-data/nova_libvirt/etc/my.cnf.d/:/etc/my.cnf.d/:ro + - /var/lib/config-data/nova_libvirt/etc/nova/:/etc/nova/:ro + - /var/log/containers/nova:/var/log/nova + - /var/lib/docker-config-scripts/:/docker-config-scripts/ + user: root + command: "/docker-config-scripts/nova_cell_v2_discover_host.py" host_prep_tasks: list_concat: - {get_attr: [NovaLogging, host_prep_tasks]} diff --git a/docker/services/nova-ironic.yaml b/docker/services/nova-ironic.yaml index dab4320337..7b7a20cb12 100644 --- a/docker/services/nova-ironic.yaml +++ b/docker/services/nova-ironic.yaml @@ -74,7 +74,6 @@ outputs: description: Role data for the Nova Compute service. value: service_name: {get_attr: [NovaIronicBase, role_data, service_name]} - cellv2_discovery: true config_settings: {get_attr: [NovaIronicBase, role_data, config_settings]} logging_source: {get_attr: [NovaIronicBase, role_data, logging_source]} logging_groups: {get_attr: [NovaIronicBase, role_data, logging_groups]} @@ -145,6 +144,21 @@ outputs: - /var/log/containers/nova:/var/log/nova environment: - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS + nova_cellv2_discover_hosts: + start_order: 1 + image: *nova_ironic_image + net: host + detach: false + volumes: + list_concat: + - {get_attr: [ContainersCommon, volumes]} + - + - /var/lib/config-data/nova/etc/my.cnf.d/:/etc/my.cnf.d/:ro + - /var/lib/config-data/nova/etc/nova/:/etc/nova/:ro + - /var/log/containers/nova:/var/log/nova + - /var/lib/docker-config-scripts/:/docker-config-scripts/ + user: root + command: "/docker-config-scripts/nova_cell_v2_discover_host.py" host_prep_tasks: - name: create persistent directories file: diff --git a/docker_config_scripts/nova_cell_v2_discover_host.py b/docker_config_scripts/nova_cell_v2_discover_host.py new file mode 100644 index 0000000000..74f3c2cb51 --- /dev/null +++ b/docker_config_scripts/nova_cell_v2_discover_host.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +# +# Copyright 2018 Red Hat Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +from __future__ import print_function +import os +import pwd +import socket +import subprocess +import sys +import time + +# Delete this immediataly as it contains auth info +os.unlink(__file__) + +# Only need root to read this script, drop to nova user +nova_uid, nova_gid = pwd.getpwnam('nova')[2:4] +os.setgid(nova_gid) +os.setuid(nova_uid) + + +os.environ.update( + OS_PROJECT_DOMAIN_NAME='__OS_PROJECT_DOMAIN_NAME', + OS_USER_DOMAIN_NAME='__OS_PROJECT_USER_NAME', + OS_PROJECT_NAME='__OS_PROJECT_NAME', + OS_USERNAME='__OS_USERNAME', + OS_PASSWORD='__OS_PASSWORD', + OS_AUTH_URL='__OS_AUTH_URL', + OS_AUTH_TYPE='password', + OS_IDENTITY_API_VERSION='3' +) + +try: + my_host = subprocess.check_output([ + 'crudini', + '--get', + '/etc/nova/nova.conf', + 'DEFAULT', + 'host' + ]).rstrip() +except subprocess.CalledProcessError: + # If host isn't set nova defaults to this + my_host = socket.gethostname() + +# Wait until this host is listed in the service list then +# run cellv2 host discovery +retries = 10 +for i in range(retries): + try: + service_list = subprocess.check_output([ + 'openstack', + '-q', + 'compute', + 'service', + 'list', + '-c', + 'Host', + '-c', + 'Zone', + '-f', + 'value' + ]).split('\n') + for entry in service_list: + host, zone = entry.split() + if host == my_host and zone != 'internal': + print('(cellv2) Service registered, running discovery') + sys.exit(subprocess.call([ + '/usr/bin/nova-manage', + 'cell_v2', + 'discover_hosts', + '--by-service', + '--verbose' + ])) + print('(cellv2) Waiting for service to register') + except subprocess.CalledProcessError: + print('(cellv2) Retrying') + time.sleep(30) +sys.exit(1) diff --git a/docker_config_scripts/nova_statedir_ownership.py b/docker_config_scripts/nova_statedir_ownership.py old mode 100755 new mode 100644 diff --git a/overcloud.j2.yaml b/overcloud.j2.yaml index 03b387929e..56d8691195 100644 --- a/overcloud.j2.yaml +++ b/overcloud.j2.yaml @@ -713,21 +713,6 @@ resources: {% for role in roles %} - {get_attr: [{{role.name}}ServiceNames, value]} {% endfor %} - cellv2_discovery_hosts: - # Collects compute hostnames for all roles with a service that requires cellv2 host discovery - list_join: - - ',' - - yaql: - expression: coalesce($.data.e.zip($.data.l).where($[0]).select($[1]).flatten(), []) - data: - e: # list of true/fails for whether cellsv2 host discovery is required for the roles -{%- for role in roles %} - - {get_attr: [{{role.name}}ServiceChainRoleData, value, cellv2_discovery]} -{%- endfor %} - l: # list of list of compute hostnames for the roles -{%- for role in roles %} - - {get_attr: [{{role.name}}, hostname_map, canonical]} -{%- endfor %} controller_ips: {get_attr: [{{primary_role_name}}, ip_address]} controller_names: {get_attr: [{{primary_role_name}}, hostname]} service_ips: diff --git a/puppet/all-nodes-config.j2.yaml b/puppet/all-nodes-config.j2.yaml index adb4e78fd4..0f0bfd356b 100644 --- a/puppet/all-nodes-config.j2.yaml +++ b/puppet/all-nodes-config.j2.yaml @@ -22,8 +22,6 @@ parameters: type: json controller_names: type: comma_delimited_list - cellv2_discovery_hosts: - type: comma_delimited_list NetVipMap: type: json RedisVirtualIP: @@ -139,10 +137,6 @@ resources: list_join: - ',' - {get_param: controller_names} - - cellv2_discovery_hosts: - list_join: - - ',' - - {get_param: cellv2_discovery_hosts} deploy_identifier: {get_param: DeployIdentifier} update_identifier: {get_param: UpdateIdentifier} stack_action: {get_param: StackAction}