Make kube-proxy liveness probe more cautious

This update makes it so list of services without endpoints detected on
the host must be static to cause failure.

This avoids race conditions for large deployments where new services are
being added over several minutes, and trigger probe failures.

Change-Id: Ie65c8613cb85bfdf61d41099540d3499ea1de817
This commit is contained in:
Mark Burnett 2018-10-10 10:02:45 -05:00
parent 83b65b358d
commit eaeb3ae250
1 changed files with 17 additions and 5 deletions

View File

@ -2,6 +2,8 @@
set -e
IPTS_DIR=/tmp/liveness
FAILURE=0
{{- if .Values.livenessProbe.whitelist }}
WHITELIST='({{- join "|" .Values.livenessProbe.whitelist -}})'
@ -15,12 +17,23 @@ if [[ $(echo -e "${REQUEST}" | socat - TCP4:localhost:10256 | grep -sc '200 OK')
FAILURE=1
fi
if [[ $(iptables-save {{- if .Values.livenessProbe.whitelist }} | grep -Ev "${WHITELIST}" {{- end }} | grep -sc 'has no endpoints') -gt 0 ]]; then
echo Some non-whitelisted services have no endpoints:
iptables-save | grep 'has no endpoints'
FAILURE=1
mkdir -p "${IPTS_DIR}"
iptables-save {{- if .Values.livenessProbe.whitelist }} | grep -Ev "${WHITELIST}" {{- end }} | grep -s 'has no endpoints' | sort > "${IPTS_DIR}/current"
if [[ $(wc -l "${IPTS_DIR}/current") -gt 0 ]]; then
if [[ "${IPTS_DIR}/previous" ]]; then
if cmp "${IPTS_DIR}/current" "${IPTS_DIR}/previous"; then
echo Some non-whitelisted services have no endpoints:
cat "${IPTS_DIR}/current"
FAILURE=1
else
echo Detected issues have changed. Passing check:
diff "${IPTS_DIR}/previous" "${IPTS_DIR}/current"
fi
fi
fi
mv "${IPTS_DIR}/current" "${IPTS_DIR}/previous"
IPTABLES_IPS=$(iptables-save | grep -E 'KUBE-SEP.*to-destination' | sed 's/.*to-destination \(.*\):.*/\1/' | sort -u)
KUBECTL_IPS=$(kubectl get --all-namespaces -o json endpoints | jq -r '.items | arrays | .[] | objects | .subsets | arrays | .[] | objects | .addresses | arrays | .[] | objects | .ip' | sort -u)
@ -31,7 +44,6 @@ if [[ $(comm -23 <(echo "${IPTABLES_IPS}") <(echo "${KUBECTL_IPS}")) ]]; then
comm -23 <(echo "${IPTABLES_IPS}") <(echo "${KUBECTL_IPS}")
fi
if [[ "${FAILURE}" == "1" ]]; then
exit 1
fi