Merge "[fedora-atomic][k8s] Support operating system upgrade"
This commit is contained in:
commit
94caaaa344
|
@ -1,30 +1,38 @@
|
|||
Rolling upgrade is one of most important features user want to see for a
|
||||
managed Kubernetes service. And in Magnum, we're thinking more deeper to
|
||||
provide better user experience.
|
||||
Rolling upgrade is an important feature a user may want for a managed
|
||||
Kubernetes service.
|
||||
|
||||
.. note::
|
||||
|
||||
Kubernetes version upgrade is only supported by the Fedora Atomic and
|
||||
the Fedora CoreOS drivers.
|
||||
|
||||
A user can run a command as shown below to trigger a rolling ugprade for
|
||||
Kubernetes version upgrade or node operating system version upgrade.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
#!/bin/bash -x
|
||||
openstack coe cluster upgrade <cluster ID> <new cluster template ID>
|
||||
|
||||
IP="192.168.122.1"
|
||||
CLUSTER="797b39e1-fac2-48d3-8377-d6e6cc443d39"
|
||||
CT="e32c8cf7-394b-45e6-a17e-4fe6a30ad64b"
|
||||
The key parameter in the command is the new cluster template ID. For
|
||||
Kubernetes version upgrade, a newer version for label `kube_tag` should be
|
||||
provided. Downgrade is not supported.
|
||||
|
||||
# Upgrade curl
|
||||
req_body=$(cat << EOF
|
||||
{
|
||||
"max_batch_size": 1,
|
||||
"nodegroup": "master",
|
||||
"cluster_template": "${CT}"
|
||||
}
|
||||
EOF
|
||||
)
|
||||
USER_TOKEN=$(openstack token issue -c id -f value)
|
||||
curl -g -i -X PATCH https://${IP}:9511/v1/clusters/${CLUSTER}/actions/upgrade \
|
||||
-H "OpenStack-API-Version: container-infra latest" \
|
||||
-H "X-Auth-Token: $USER_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Accept: application/json" \
|
||||
-H "User-Agent: None" \
|
||||
-d "$req_body"
|
||||
A simple operating system upgrade can be applied using a new image ID in the
|
||||
new cluster template. However, this entails a downtime for applications running
|
||||
on the cluster, because all the nodes will be rebuilt one by one.
|
||||
|
||||
The Fedora Atomic driver supports a more gradeful operating system upgrade.
|
||||
Similar to the Kubernetes version upgrade, it will cordon and drain the nodes
|
||||
before upgrading the operating system with rpm-ostree command. There are one of
|
||||
two labels which must be provided to support this feature:
|
||||
|
||||
* `ostree_commit`: this is a commit ID of ostree the current system should be
|
||||
upgraded to. An example of a commit ID is
|
||||
`1766b4526f1a738ba1e6e0a66264139f65340bcc28e7045f10cbe6d161eb1925`,
|
||||
* `ostree_remote`: this is a remote name of ostree the current system should be
|
||||
rebased to. An example of a remote name is
|
||||
`fedora-atomic:fedora/29/x86_64/atomic-host`.
|
||||
|
||||
If both labels are present, `ostree_commit` takes precedence. To check if there
|
||||
are updates available, run `sudo rpm-ostree upgrade --check` on the Atomic host
|
||||
which will show you the latest commit ID that can be upgraded to.
|
||||
|
|
|
@ -6,8 +6,11 @@ set -x
|
|||
ssh_cmd="ssh -F /srv/magnum/.ssh/config root@localhost"
|
||||
KUBECONFIG="/etc/kubernetes/kubelet-config.yaml"
|
||||
new_kube_tag="$kube_tag_input"
|
||||
new_ostree_remote="$ostree_remote_input"
|
||||
new_ostree_commit="$ostree_commit_input"
|
||||
HOSTNAME_OVERRIDE="$(cat /etc/hostname | head -1 | sed 's/\.novalocal//')"
|
||||
|
||||
if [ ${new_kube_tag}!=${KUBE_TAG} ]; then
|
||||
function drain {
|
||||
# If there is only one master and this is the master node, skip the drain, just cordon it
|
||||
# If there is only one worker and this is the worker node, skip the drain, just cordon it
|
||||
all_masters=$(kubectl get nodes --selector=node-role.kubernetes.io/master= -o name)
|
||||
|
@ -17,6 +20,11 @@ if [ ${new_kube_tag}!=${KUBE_TAG} ]; then
|
|||
else
|
||||
kubectl cordon ${INSTANCE_NAME}
|
||||
fi
|
||||
}
|
||||
|
||||
if [ "${new_kube_tag}" != "${KUBE_TAG}" ]; then
|
||||
|
||||
drain
|
||||
|
||||
SERVICE_LIST=$($ssh_cmd podman ps -f name=kube --format {{.Names}})
|
||||
|
||||
|
@ -35,9 +43,61 @@ if [ ${new_kube_tag}!=${KUBE_TAG} ]; then
|
|||
i=0
|
||||
until kubectl uncordon ${INSTANCE_NAME}
|
||||
do
|
||||
((i++))
|
||||
i=$((i+1))
|
||||
[ $i -lt 30 ] || break;
|
||||
echo "Trying to uncordon node..."
|
||||
sleep 5s
|
||||
done
|
||||
fi
|
||||
|
||||
function setup_uncordon {
|
||||
# Create a service to uncordon the node itself after reboot
|
||||
if [ ! -f /etc/systemd/system/uncordon.service ]; then
|
||||
$ssh_cmd cat > /etc/systemd/system/uncordon.service << EOF
|
||||
[Unit]
|
||||
Description=magnum-uncordon
|
||||
After=network.target kubelet.service
|
||||
|
||||
[Service]
|
||||
Restart=Always
|
||||
RemainAfterExit=yes
|
||||
ExecStart=${kubecontrol} uncordon ${HOSTNAME_OVERRIDE}
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
${ssh_cmd} systemctl enable uncordon.service
|
||||
fi
|
||||
}
|
||||
|
||||
remote_list=`${ssh_cmd} ostree remote list`
|
||||
# Fedora Atomic 29 will be the last release before migrating to Fedora CoreOS, so we're OK to add 28 and 29 remotes directly
|
||||
if [[ ! " ${remote_list[@]} " =~ "fedora-atomic-28" ]]; then
|
||||
${ssh_cmd} ostree remote add --set=gpgkeypath=/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-28-primary --contenturl=mirrorlist=https://ostree.fedoraproject.org/mirrorlist fedora-atomic-28 https://kojipkgs.fedoraproject.org/atomic/repo/
|
||||
fi
|
||||
if [[ ! " ${remote_list[@]} " =~ "fedora-atomic-29" ]]; then
|
||||
${ssh_cmd} ostree remote add --set=gpgkeypath=/etc/pki/rpm-gpg/RPM-GPG-KEY-fedora-29-primary --contenturl=mirrorlist=https://ostree.fedoraproject.org/mirrorlist fedora-atomic-29 https://kojipkgs.fedoraproject.org/atomic/repo/
|
||||
fi
|
||||
# The uri of existing Fedora Atomic 27 remote is not accessible now, so replace it with correct uri
|
||||
if [[ " ${remote_list[@]} " =~ "fedora-atomic" ]]; then
|
||||
sed -i '
|
||||
/^url=/ s|=.*|=https://kojipkgs.fedoraproject.org/atomic/repo/|
|
||||
' /etc/ostree/remotes.d/fedora-atomic.conf
|
||||
fi
|
||||
|
||||
current_ostree_commit=`${ssh_cmd} rpm-ostree status | grep Commit | awk '{print $2}'`
|
||||
current_ostree_remote=`${ssh_cmd} rpm-ostree status | awk '/* ostree/{print $0}' | awk '{match($0,"* ostree://([^ ]+)",a)}END{print a[1]}'`
|
||||
|
||||
# NOTE(flwang): 1. Either deploy or rebase for only one upgrade
|
||||
# 2. Using rpm-ostree command instead of atomic command to keep the possibility of supporting fedora coreos 30
|
||||
if [ "$new_ostree_commit" != "" ] && [ "$current_ostree_commit" != "$new_ostree_commit" ]; then
|
||||
drain
|
||||
setup_uncordon
|
||||
${ssh_cmd} rpm-ostree deploy $new_ostree_commit
|
||||
shutdown --reboot --no-wall -t 1
|
||||
elif [ "$new_ostree_remote" != "" ] && [ "$current_ostree_remote" != "$new_ostree_remote" ]; then
|
||||
drain
|
||||
setup_uncordon
|
||||
${ssh_cmd} rpm-ostree rebase $new_ostree_remote
|
||||
shutdown --reboot --no-wall -t 1
|
||||
fi
|
||||
|
|
|
@ -324,6 +324,40 @@ class KubernetesDriver(HeatDriver):
|
|||
class FedoraKubernetesDriver(KubernetesDriver):
|
||||
"""Base driver for Kubernetes clusters."""
|
||||
|
||||
def get_heat_params(self, cluster_template):
|
||||
heat_params = {}
|
||||
try:
|
||||
kube_tag = cluster_template.labels["kube_tag"]
|
||||
kube_tag_params = {
|
||||
"kube_tag": kube_tag,
|
||||
"kube_version": kube_tag,
|
||||
"master_kube_tag": kube_tag,
|
||||
"minion_kube_tag": kube_tag,
|
||||
}
|
||||
heat_params.update(kube_tag_params)
|
||||
except KeyError:
|
||||
LOG.debug(("Cluster template %s does not contain a "
|
||||
"valid kube_tag"), cluster_template.name)
|
||||
|
||||
for ostree_tag in ["ostree_commit", "ostree_remote"]:
|
||||
try:
|
||||
ostree_param = {
|
||||
ostree_tag: cluster_template.labels[ostree_tag]
|
||||
}
|
||||
heat_params.update(ostree_param)
|
||||
except KeyError:
|
||||
LOG.debug("Cluster template %s does not define %s",
|
||||
(cluster_template.name, ostree_tag))
|
||||
|
||||
upgrade_labels = ['kube_tag', 'ostree_remote', 'ostree_commit']
|
||||
if not any([u in heat_params.keys() for u in upgrade_labels]):
|
||||
reason = ("Cluster template %s does not contain any supported "
|
||||
"upgrade labels: [%s]") % (cluster_template.name,
|
||||
', '.join(upgrade_labels))
|
||||
raise exception.InvalidClusterTemplateForUpgrade(reason=reason)
|
||||
|
||||
return heat_params
|
||||
|
||||
def upgrade_cluster(self, context, cluster, cluster_template,
|
||||
max_batch_size, nodegroup, scale_manager=None,
|
||||
rollback=False):
|
||||
|
@ -331,7 +365,10 @@ class FedoraKubernetesDriver(KubernetesDriver):
|
|||
osc = clients.OpenStackClients(context)
|
||||
|
||||
# Use this just to check that we are not downgrading.
|
||||
heat_params = {}
|
||||
heat_params = {
|
||||
"update_max_batch_size": max_batch_size,
|
||||
}
|
||||
|
||||
if 'kube_tag' in nodegroup.labels:
|
||||
heat_params['kube_tag'] = nodegroup.labels['kube_tag']
|
||||
|
||||
|
@ -360,19 +397,7 @@ class FedoraKubernetesDriver(KubernetesDriver):
|
|||
# hardcode what we want to send to heat.
|
||||
# Rules: 1. No downgrade 2. Explicitly override 3. Merging based on set
|
||||
# Update heat_params based on the data generated above
|
||||
try:
|
||||
heat_params = {
|
||||
"kube_tag": cluster_template.labels["kube_tag"],
|
||||
"kube_version": cluster_template.labels["kube_tag"],
|
||||
"master_kube_tag": cluster_template.labels["kube_tag"],
|
||||
"minion_kube_tag": cluster_template.labels["kube_tag"],
|
||||
"update_max_batch_size": max_batch_size
|
||||
}
|
||||
except KeyError:
|
||||
# Corner case but if the user defined an invalid CT just abort
|
||||
reason = ("Cluster template %s does not contain a "
|
||||
"valid kube_tag") % cluster_template.name
|
||||
raise exception.InvalidClusterTemplateForUpgrade(reason=reason)
|
||||
heat_params.update(self.get_heat_params(cluster_template))
|
||||
|
||||
stack_id = nodegroup.stack_id
|
||||
if nodegroup is not None and not nodegroup.is_default:
|
||||
|
|
|
@ -98,7 +98,8 @@ class K8sFedoraTemplateDefinition(k8s_template_def.K8sTemplateDefinition):
|
|||
'auto_healing_enabled', 'auto_scaling_enabled',
|
||||
'auto_healing_controller', 'magnum_auto_healer_tag',
|
||||
'draino_tag', 'autoscaler_tag',
|
||||
'min_node_count', 'max_node_count', 'npd_enabled']
|
||||
'min_node_count', 'max_node_count', 'npd_enabled',
|
||||
'ostree_remote', 'ostree_commit']
|
||||
|
||||
labels = self._get_relevant_labels(cluster, kwargs)
|
||||
|
||||
|
|
|
@ -755,6 +755,16 @@ parameters:
|
|||
default:
|
||||
true
|
||||
|
||||
ostree_remote:
|
||||
type: string
|
||||
description: The ostree remote branch to upgrade
|
||||
default: ''
|
||||
|
||||
ostree_commit:
|
||||
type: string
|
||||
description: The ostree commit to deploy
|
||||
default: ''
|
||||
|
||||
resources:
|
||||
|
||||
######################################################################
|
||||
|
@ -1069,6 +1079,8 @@ resources:
|
|||
min_node_count: {get_param: min_node_count}
|
||||
max_node_count: {get_param: max_node_count}
|
||||
npd_enabled: {get_param: npd_enabled}
|
||||
ostree_remote: {get_param: ostree_remote}
|
||||
ostree_commit: {get_param: ostree_commit}
|
||||
|
||||
kube_cluster_config:
|
||||
condition: create_cluster_resources
|
||||
|
@ -1232,6 +1244,8 @@ resources:
|
|||
auto_healing_enabled: {get_param: auto_healing_enabled}
|
||||
npd_enabled: {get_param: npd_enabled}
|
||||
auto_healing_controller: {get_param: auto_healing_controller}
|
||||
ostree_remote: {get_param: ostree_remote}
|
||||
ostree_commit: {get_param: ostree_commit}
|
||||
|
||||
outputs:
|
||||
|
||||
|
|
|
@ -530,6 +530,14 @@ parameters:
|
|||
default:
|
||||
true
|
||||
|
||||
ostree_remote:
|
||||
type: string
|
||||
description: The ostree remote branch to upgrade
|
||||
|
||||
ostree_commit:
|
||||
type: string
|
||||
description: The ostree commit to deploy
|
||||
|
||||
conditions:
|
||||
|
||||
image_based: {equals: [{get_param: boot_volume_size}, 0]}
|
||||
|
@ -539,6 +547,7 @@ conditions:
|
|||
- get_param: boot_volume_size
|
||||
- 0
|
||||
|
||||
|
||||
resources:
|
||||
######################################################################
|
||||
#
|
||||
|
@ -839,6 +848,8 @@ resources:
|
|||
group: script
|
||||
inputs:
|
||||
- name: kube_tag_input
|
||||
- name: ostree_remote_input
|
||||
- name: ostree_commit_input
|
||||
config:
|
||||
get_file: ../../common/templates/kubernetes/fragments/upgrade-kubernetes.sh
|
||||
|
||||
|
@ -851,6 +862,8 @@ resources:
|
|||
actions: ['UPDATE']
|
||||
input_values:
|
||||
kube_tag_input: {get_param: kube_tag}
|
||||
ostree_remote_input: {get_param: ostree_remote}
|
||||
ostree_commit_input: {get_param: ostree_commit}
|
||||
|
||||
outputs:
|
||||
|
||||
|
|
|
@ -312,6 +312,16 @@ parameters:
|
|||
default:
|
||||
true
|
||||
|
||||
ostree_remote:
|
||||
type: string
|
||||
description: The ostree remote branch to upgrade
|
||||
default: ''
|
||||
|
||||
ostree_commit:
|
||||
type: string
|
||||
description: The ostree commit to deploy
|
||||
default: ''
|
||||
|
||||
conditions:
|
||||
|
||||
image_based: {equals: [{get_param: boot_volume_size}, 0]}
|
||||
|
@ -321,6 +331,7 @@ conditions:
|
|||
- get_param: boot_volume_size
|
||||
- 0
|
||||
|
||||
|
||||
resources:
|
||||
|
||||
agent_config:
|
||||
|
@ -526,6 +537,8 @@ resources:
|
|||
group: script
|
||||
inputs:
|
||||
- name: kube_tag_input
|
||||
- name: ostree_remote_input
|
||||
- name: ostree_commit_input
|
||||
config:
|
||||
get_file: ../../common/templates/kubernetes/fragments/upgrade-kubernetes.sh
|
||||
|
||||
|
@ -538,6 +551,8 @@ resources:
|
|||
actions: ['UPDATE']
|
||||
input_values:
|
||||
kube_tag_input: {get_param: kube_tag}
|
||||
ostree_remote_input: {get_param: ostree_remote}
|
||||
ostree_commit_input: {get_param: ostree_commit}
|
||||
|
||||
outputs:
|
||||
|
||||
|
|
|
@ -570,6 +570,8 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
|
|||
boot_volume_size = mock_cluster.labels.get('boot_volume_size')
|
||||
boot_volume_type = mock_cluster.labels.get('boot_volume_type')
|
||||
etcd_volume_type = mock_cluster.labels.get('etcd_volume_type')
|
||||
ostree_remote = mock_cluster.labels.get('ostree_remote')
|
||||
ostree_commit = mock_cluster.labels.get('ostree_commit')
|
||||
|
||||
k8s_def = k8sa_tdef.AtomicK8sTemplateDefinition()
|
||||
|
||||
|
@ -653,7 +655,9 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
|
|||
'minion_kube_tag': kube_tag,
|
||||
'boot_volume_size': boot_volume_size,
|
||||
'boot_volume_type': boot_volume_type,
|
||||
'etcd_volume_type': etcd_volume_type
|
||||
'etcd_volume_type': etcd_volume_type,
|
||||
'ostree_remote': ostree_remote,
|
||||
'ostree_commit': ostree_commit,
|
||||
}}
|
||||
mock_get_params.assert_called_once_with(mock_context,
|
||||
mock_cluster_template,
|
||||
|
@ -1006,6 +1010,8 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
|
|||
boot_volume_size = mock_cluster.labels.get('boot_volume_size')
|
||||
boot_volume_type = mock_cluster.labels.get('boot_volume_type')
|
||||
etcd_volume_type = mock_cluster.labels.get('etcd_volume_type')
|
||||
ostree_remote = mock_cluster.labels.get('ostree_remote')
|
||||
ostree_commit = mock_cluster.labels.get('ostree_commit')
|
||||
|
||||
k8s_def = k8sa_tdef.AtomicK8sTemplateDefinition()
|
||||
|
||||
|
@ -1091,7 +1097,9 @@ class AtomicK8sTemplateDefinitionTestCase(BaseK8sTemplateDefinitionTestCase):
|
|||
'minion_kube_tag': kube_tag,
|
||||
'boot_volume_size': boot_volume_size,
|
||||
'boot_volume_type': boot_volume_type,
|
||||
'etcd_volume_type': etcd_volume_type
|
||||
'etcd_volume_type': etcd_volume_type,
|
||||
'ostree_remote': ostree_remote,
|
||||
'ostree_commit': ostree_commit,
|
||||
}}
|
||||
mock_get_params.assert_called_once_with(mock_context,
|
||||
mock_cluster_template,
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
---
|
||||
features:
|
||||
- |
|
||||
Along with the kubernetes version upgrade support we just released, we're
|
||||
adding the support to upgrade the operating system of the k8s cluster
|
||||
(including master and worker nodes). It's an inplace upgrade leveraging the
|
||||
atomic/ostree upgrade capability.
|
Loading…
Reference in New Issue