Add capability to wait on compute nodes

This patchset adds the capability to the Nova chart to be able to wait
for a percentage of the compute nodes/hypervisors to become ready/available
before continuing on with the deployment. It will be disabled by default,
because this is a feature that may or may not be needed in production
deployments.

Change-Id: I971151a663afc87e7d62efa4ab3723c5472a3736
This commit is contained in:
Cliff Parsons 2019-10-26 16:15:55 -05:00 committed by Tin Lam
parent 97ac0575ba
commit 58291db1a6
6 changed files with 203 additions and 4 deletions

View File

@ -30,4 +30,10 @@ openstack flavor show {{ .name }} || \
{{ end }}
{{ end }}
{{ .Values.bootstrap.script | default "echo 'Not Enabled'" }}
{{ if .Values.bootstrap.wait_for_computes.enabled }}
{{ .Values.bootstrap.wait_for_computes.scripts.wait_script }}
{{ else }}
echo 'Wait for Computes script not enabled'
{{ end }}
{{ .Values.bootstrap.script | default "echo 'No other bootstrap customizations found.'" }}

View File

@ -0,0 +1,21 @@
#!/bin/bash
{{/*
Copyright 2019 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
set -ex
{{ .Values.bootstrap.wait_for_computes.scripts.init_script | default "echo 'No wait-for-compute script configured'" }}

View File

@ -93,4 +93,6 @@ data:
{{ tuple "bin/_nova-service-cleaner.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
rabbit-init.sh: |
{{- include "helm-toolkit.scripts.rabbit_init" . | indent 4 }}
wait-for-computes-init.sh: |
{{ tuple "bin/_wait-for-computes-init.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
{{- end }}

View File

@ -14,7 +14,122 @@ See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if and .Values.manifests.job_bootstrap .Values.bootstrap.enabled }}
{{- $bootstrapJob := dict "envAll" . "serviceName" "nova" "keystoneUser" .Values.bootstrap.ks_user "logConfigFile" .Values.conf.nova.DEFAULT.log_config_append -}}
{{ $bootstrapJob | include "helm-toolkit.manifests.job_bootstrap" }}
{{- $envAll := . }}
{{- if and $envAll.Values.manifests.job_bootstrap $envAll.Values.bootstrap.enabled }}
{{- $serviceName := "nova" -}}
{{- $keystoneUser := $envAll.Values.bootstrap.ks_user -}}
{{- $configMapBin := printf "%s-%s" $serviceName "bin" -}}
{{- $configMapEtc := printf "%s-%s" $serviceName "etc" -}}
{{- $configFile := printf "/etc/%s/%s.conf" $serviceName $serviceName -}}
{{- $logConfigFile := $envAll.Values.conf.nova.DEFAULT.log_config_append -}}
{{- $nodeSelector := index . "nodeSelector" | default ( dict $envAll.Values.labels.job.node_selector_key $envAll.Values.labels.job.node_selector_value ) -}}
{{- $serviceAccountName := printf "%s-%s" $serviceName "bootstrap" -}}
{{ tuple $envAll "bootstrap" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }}
---
apiVersion: batch/v1
kind: Job
metadata:
name: {{ $serviceAccountName | quote }}
spec:
template:
metadata:
labels:
{{ tuple $envAll "nova" "bootstrap" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
annotations:
{{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" | indent 8 }}
spec:
serviceAccountName: {{ $serviceAccountName }}
restartPolicy: OnFailure
nodeSelector:
{{ toYaml $nodeSelector | indent 8 }}
initContainers:
{{ tuple $envAll "bootstrap" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
{{- if $envAll.Values.bootstrap.wait_for_computes.enabled }}
- name: nova-wait-for-computes-init
{{ tuple $envAll "nova_wait_for_computes_init" | include "helm-toolkit.snippets.image" | indent 10 }}
command:
- /bin/bash
- -c
- /tmp/wait-for-computes-init.sh
volumeMounts:
- name: pod-tmp
mountPath: /tmp
- name: bootstrap-sh
mountPath: /tmp/wait-for-computes-init.sh
subPath: wait-for-computes-init.sh
readOnly: true
{{- end }}
containers:
- name: bootstrap
image: {{ $envAll.Values.images.tags.bootstrap }}
imagePullPolicy: {{ $envAll.Values.images.pull_policy }}
{{ tuple $envAll $envAll.Values.pod.resources.jobs.bootstrap | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
env:
{{- with $env := dict "ksUserSecret" ( index $envAll.Values.secrets.identity $keystoneUser ) }}
{{- include "helm-toolkit.snippets.keystone_openrc_env_vars" $env | indent 12 }}
{{- end }}
- name: WAIT_PERCENTAGE
value: "{{ .Values.bootstrap.wait_for_computes.wait_percentage }}"
- name: REMAINING_WAIT
value: "{{ .Values.bootstrap.wait_for_computes.remaining_wait }}"
command:
- /bin/bash
- -c
- /tmp/bootstrap.sh
volumeMounts:
- name: pod-tmp
mountPath: /tmp
- name: bootstrap-sh
mountPath: /tmp/bootstrap.sh
subPath: bootstrap.sh
readOnly: true
- name: etc-service
mountPath: {{ dir $configFile | quote }}
- name: bootstrap-conf
mountPath: {{ $configFile | quote }}
subPath: {{ base $configFile | quote }}
readOnly: true
- name: bootstrap-conf
mountPath: {{ $logConfigFile | quote }}
subPath: {{ base $logConfigFile | quote }}
readOnly: true
volumes:
- name: pod-tmp
emptyDir: {}
- name: bootstrap-sh
configMap:
name: {{ $configMapBin | quote }}
defaultMode: 0555
- name: etc-service
emptyDir: {}
- name: bootstrap-conf
secret:
secretName: {{ $configMapEtc | quote }}
defaultMode: 0444
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ $serviceAccountName }}
rules:
- apiGroups:
- ''
resources:
- nodes
verbs:
- get
- list
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: {{ $serviceAccountName }}
subjects:
- kind: ServiceAccount
name: {{ $serviceAccountName }}
namespace: {{ $envAll.Release.Namespace }}
roleRef:
kind: ClusterRole
name: {{ $serviceAccountName }}
apiGroup: rbac.authorization.k8s.io
{{- end }}

View File

@ -89,6 +89,7 @@ images:
nova_spiceproxy_assets: 'docker.io/kolla/ubuntu-source-nova-spicehtml5proxy:ocata'
test: docker.io/xrally/xrally-openstack:1.3.0
image_repo_sync: docker.io/docker:17.07.0
nova_wait_for_computes_init: gcr.io/google_containers/hyperkube-amd64:v1.11.6
local_registry:
active: false
exclude:
@ -149,6 +150,58 @@ bootstrap:
ram: 16384
disk: 160
vcpus: 8
wait_for_computes:
enabled: false
# Wait percentage is the minimum percentage of compute hypervisors which
# must be available before the remainder of the bootstrap script can be run.
wait_percentage: 70
# Once the wait_percentage above is achieved, the remaining_wait is the
# amount of time in seconds to wait before executing the remainder of the
# boostrap script.
remaining_wait: 300
scripts:
init_script: |
# This runs in a bootstrap init container. It counts the number of compute nodes.
COMPUTE_NODES=$(kubectl get nodes -o custom-columns=NAME:.metadata.name --no-headers | sort)
/bin/echo $COMPUTE_NODES > /tmp/compute_nodes.txt
wait_script: |
# This script runs in the main bootstrap container just before the
# bootstrap.script is called.
COMPUTE_HOSTS=`cat /tmp/compute_nodes.txt | wc -w`
if [[ $COMPUTE_HOSTS == 0 ]]; then
echo "There are no compute hosts found!"
exit 1
fi
# Wait for all hypervisors to come up before moving on with the deployment
HYPERVISOR_WAIT=true
WAIT_AFTER_READY=0
SLEEP=5
while [[ $HYPERVISOR_WAIT == true ]]; do
# Its possible that openstack command may fail due to not being able to
# reach the compute service
set +e
HYPERVISORS=$(openstack hypervisor list -f value -c 'Hypervisor Hostname' | wc -w)
set -e
PERCENT_READY=$(( $HYPERVISORS * 100 / $COMPUTE_HOSTS ))
if [[ $PERCENT_READY -ge $WAIT_PERCENTAGE ]]; then
echo "Hypervisor ready percentage is $PERCENT_READY"
if [[ $PERCENT_READY == 100 ]]; then
HYPERVISOR_WAIT=false
echo "All hypervisors are ready."
elif [[ WAIT_AFTER_READY -ge $REMAINING_WAIT ]]; then
HYPERVISOR_WAIT=false
echo "Waited the configured time -- $HYPERVISORS out of $COMPUTE_HOSTS hypervisor(s) ready -- proceeding with the bootstrap."
else
sleep $SLEEP
WAIT_AFTER_READY=$(( $WAIT_AFTER_READY + $SLEEP ))
fi
else
echo "Waiting $SLEEP seconds for enough hypervisors to be discovered..."
sleep $SLEEP
fi
done
network:
# provide what type of network wiring will be used

View File

@ -27,6 +27,7 @@ if [ "x$(systemd-detect-virt)" == "xnone" ]; then
echo 'OSH is not being deployed in virtualized environment'
helm upgrade --install nova ./nova \
--namespace=openstack \
--set bootstrap.wait_for_computes.enabled=true \
--set conf.ceph.enabled=false \
${OSH_EXTRA_HELM_ARGS:=} \
${OSH_EXTRA_HELM_ARGS_NOVA}
@ -34,6 +35,7 @@ else
echo 'OSH is being deployed in virtualized environment, using qemu for nova'
helm upgrade --install nova ./nova \
--namespace=openstack \
--set bootstrap.wait_for_computes.enabled=true \
--set conf.ceph.enabled=false \
--set conf.nova.libvirt.virt_type=qemu \
--set conf.nova.libvirt.cpu_mode=none \