Merge "Make K8S proxy health check more aggressive"
This commit is contained in:
commit
018496fd18
26
charts/proxy/templates/bin/_liveness-probe.sh.tpl
Normal file
26
charts/proxy/templates/bin/_liveness-probe.sh.tpl
Normal file
@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
FAILURE=0
|
||||
{{- if .Values.livenessProbe.whitelist }}
|
||||
WHITELIST='({{- join "|" .Values.livenessProbe.whitelist -}})'
|
||||
{{- end }}
|
||||
|
||||
REQUEST='GET /healthz HTTP/1.0\r\nHost: localhost:10256\r\n'
|
||||
|
||||
if [[ $(echo -e "${REQUEST}" | socat - TCP4:localhost:10256 | grep -sc '200 OK') -lt 1 ]]; then
|
||||
echo Failed proxy built-in HTTP health check.
|
||||
echo -e "${REQUEST}" | socat - TCP4:localhost:10256
|
||||
FAILURE=1
|
||||
fi
|
||||
|
||||
if [[ $(iptables-save {{- if .Values.livenessProbe.whitelist }} | grep -Ev "${WHITELIST}" {{- end }} | grep -sc 'has no endpoints') -gt 0 ]]; then
|
||||
echo Some non-whitelisted services have no endpoints:
|
||||
iptables-save | grep 'has no endpoints'
|
||||
FAILURE=1
|
||||
fi
|
||||
|
||||
if [[ "${FAILURE}" == "1" ]]; then
|
||||
exit 1
|
||||
fi
|
5
charts/proxy/templates/bin/_readiness-probe.sh.tpl
Normal file
5
charts/proxy/templates/bin/_readiness-probe.sh.tpl
Normal file
@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
iptables-save | grep 'default/kubernetes:https'
|
26
charts/proxy/templates/configmap-bin.yaml
Normal file
26
charts/proxy/templates/configmap-bin.yaml
Normal file
@ -0,0 +1,26 @@
|
||||
{{/*
|
||||
# Copyright (c) 2018 AT&T Intellectual Property. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License. */}}
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: kubernetes-proxy-bin
|
||||
data:
|
||||
liveness-probe.sh: |
|
||||
{{ tuple "bin/_liveness-probe.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
readiness-probe.sh: |
|
||||
{{ tuple "bin/_readiness-probe.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
...
|
@ -63,24 +63,23 @@ spec:
|
||||
- name: KUBERNETES_SERVICE_PORT
|
||||
value: {{ .Values.kube_service.port | quote }}
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
host: 127.0.0.1
|
||||
path: /healthz
|
||||
port: 10256
|
||||
failureThreshold: 3
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 5
|
||||
{{ toYaml .Values.livenessProbe.config | indent 10 }}
|
||||
exec:
|
||||
command:
|
||||
- /tmp/bin/liveness-probe.sh
|
||||
readinessProbe:
|
||||
exec:
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |-
|
||||
set -ex
|
||||
iptables-save | grep 'default/kubernetes:https'
|
||||
- /tmp/bin/readiness-probe.sh
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 15
|
||||
volumeMounts:
|
||||
- name: bin
|
||||
mountPath: /tmp/bin/
|
||||
serviceAccountName: kube-proxy
|
||||
volumes:
|
||||
- name: bin
|
||||
configMap:
|
||||
name: kubernetes-proxy-bin
|
||||
defaultMode: 0555
|
||||
{{- end }}
|
||||
|
@ -55,3 +55,17 @@ network:
|
||||
kube_service:
|
||||
host: 127.0.0.1
|
||||
port: 6553
|
||||
|
||||
livenessProbe:
|
||||
config:
|
||||
# NOTE(mark-burnett): To avoid cascading failure modes, it is
|
||||
# important that these values are configured to avoid the possibility
|
||||
# of CrashLoopBackoff for this pod. Otherwise, a small non-impacting
|
||||
# issue could disable kube-proxy for the entire site.
|
||||
failureThreshold: 10
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 35
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 10
|
||||
whitelist:
|
||||
# - postgres
|
||||
|
Loading…
x
Reference in New Issue
Block a user