Improve tolerance for failed nodes
Raise SSH reconnects to 20 Disable any_errors_fatal Depends on https://github.com/kubernetes-incubator/kargo/pull/1201/ Change-Id: I0d20c9e0e80f87561c50957fdff1e576ec89646c
This commit is contained in:
parent
1d5dd1127e
commit
05f11e6348
|
@ -142,7 +142,8 @@ function with_ansible {
|
|||
until admin_node_command \
|
||||
ANSIBLE_CONFIG=$ADMIN_WORKSPACE/utils/kargo/ansible.cfg \
|
||||
ansible-playbook \
|
||||
--ssh-extra-args "-A\ -o\ StrictHostKeyChecking=no" -u ${ADMIN_USER} -b \
|
||||
--ssh-extra-args "-A\ -o\ StrictHostKeyChecking=no\ -o\ ConnectionAttempts=20" \
|
||||
-u ${ADMIN_USER} -b \
|
||||
--become-user=root -i $ADMIN_WORKSPACE/inventory/inventory.cfg \
|
||||
--forks=$ANSIBLE_FORKS --timeout $ANSIBLE_TIMEOUT $DEFAULT_OPTS \
|
||||
-e ansible_ssh_user=${ADMIN_USER} \
|
||||
|
|
|
@ -50,6 +50,9 @@ upstream_dns_servers:
|
|||
# has some bugs when DHCP is enabled.
|
||||
resolvconf_mode: host_resolvconf
|
||||
|
||||
# Continue deploying other hosts even if one failed
|
||||
any_errors_fatal: false
|
||||
|
||||
# Tweak kubelet monitoring parameters to node/endpoint node flapping
|
||||
kubelet_status_update_frequency: "20s"
|
||||
kube_controller_node_monitor_grace_period: "2m"
|
||||
|
|
Loading…
Reference in New Issue