From 174f6f5bd595aa149ac1ca7870570287b65311ab Mon Sep 17 00:00:00 2001 From: Vasyl Saienko Date: Mon, 11 Nov 2024 16:39:41 +0000 Subject: [PATCH] [mariadb] Refactor liveness/readiness probes * Move all probes into single script to reduce code duplication * Check free disk percent, fail when we consume 99% to avoid data corruption * Do not restart container when SST is in progress Change-Id: I6efc7596753dc988aa9edd7ade4d57107db98bdd --- mariadb/Chart.yaml | 2 +- mariadb/templates/bin/_health.sh.tpl | 139 ++++++++++++++++++++++++ mariadb/templates/bin/_liveness.sh.tpl | 68 ------------ mariadb/templates/bin/_readiness.sh.tpl | 60 ---------- mariadb/templates/configmap-bin.yaml | 6 +- mariadb/templates/statefulset.yaml | 20 ++-- mariadb/values.yaml | 1 + releasenotes/notes/mariadb.yaml | 1 + 8 files changed, 155 insertions(+), 142 deletions(-) create mode 100644 mariadb/templates/bin/_health.sh.tpl delete mode 100644 mariadb/templates/bin/_liveness.sh.tpl delete mode 100644 mariadb/templates/bin/_readiness.sh.tpl diff --git a/mariadb/Chart.yaml b/mariadb/Chart.yaml index 7e12ec9cc..7474e61d5 100644 --- a/mariadb/Chart.yaml +++ b/mariadb/Chart.yaml @@ -15,7 +15,7 @@ apiVersion: v1 appVersion: v10.6.7 description: OpenStack-Helm MariaDB name: mariadb -version: 0.2.59 +version: 0.2.60 home: https://mariadb.com/kb/en/ icon: http://badges.mariadb.org/mariadb-badge-180x60.png sources: diff --git a/mariadb/templates/bin/_health.sh.tpl b/mariadb/templates/bin/_health.sh.tpl new file mode 100644 index 000000000..fb4be0645 --- /dev/null +++ b/mariadb/templates/bin/_health.sh.tpl @@ -0,0 +1,139 @@ +#!/usr/bin/env bash + +########################################################################### +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +######################################################################### + +set -e + +MYSQL="mysql \ + --defaults-file=/etc/mysql/admin_user.cnf \ + --host=localhost \ +{{- if .Values.manifests.certificates }} + --ssl-verify-server-cert=false \ + --ssl-ca=/etc/mysql/certs/ca.crt \ + --ssl-key=/etc/mysql/certs/tls.key \ + --ssl-cert=/etc/mysql/certs/tls.crt \ +{{- end }} + --connect-timeout 2" + +mysql_query () { + TABLE=$1 + KEY=$2 + $MYSQL -e "show ${TABLE} like \"${KEY}\"" | \ + awk "/${KEY}/ { print \$NF; exit }" +} + +function usage { + echo "Usage: $0 [-t ] [-d ]" 1>&2 + exit 1 +} + +PROBE_TYPE='' + +while getopts ":t:d:" opt; do + case $opt in + t) + PROBE_TYPE=$OPTARG + ;; + d) + DISK_ALARM_LIMIT=$OPTARG + ;; + *) + usage + ;; + esac +done +shift $((OPTIND-1)) + +check_readiness () { + if ! $MYSQL -e 'select 1' > /dev/null 2>&1 ; then + echo "Select from mysql failed" + exit 1 + fi + + DATADIR=$(mysql_query variables datadir) + TMPDIR=$(mysql_query variables tmpdir) + for partition in ${DATADIR} ${TMPDIR}; do + if [ "$(df --output=pcent ${partition} | grep -Po '\d+')" -ge "${DISK_ALARM_LIMIT:-100}" ]; then + echo "[ALARM] Critical high disk space utilization of ${partition}" + exit 1 + fi + done + + if [ "x$(mysql_query status wsrep_ready)" != "xON" ]; then + echo "WSREP says the node can not receive queries" + exit 1 + fi + if [ "x$(mysql_query status wsrep_connected)" != "xON" ]; then + echo "WSREP not connected" + exit 1 + fi + if [ "x$(mysql_query status wsrep_cluster_status)" != "xPrimary" ]; then + echo "Not in primary cluster" + exit 1 + fi + if [ "x$(mysql_query status wsrep_local_state_comment)" != "xSynced" ]; then + echo "WSREP not synced" + exit 1 + fi +} + +check_liveness () { + if pidof mysql_upgrade > /dev/null 2>&1 ; then + echo "The process mysql_upgrade is active. Skip rest checks" + exit 0 + fi + if ! pidof mysqld > /dev/null 2>&1 ; then + echo "The mysqld pid not found" + exit 1 + fi + # NOTE(mkarpin): SST process may take significant time in case of large databases, + # killing mysqld during SST may destroy all data on the node. + local datadir="/var/lib/mysql" + if [ -f ${datadir}/sst_in_progress ]; then + echo "SST is still in progress, skip further checks as mysql won't respond" + else + # NOTE(vsaienko): in some cases maria might stuck during IST, or when neighbours + # IPs are changed. Here we check that we can connect to mysql socket to ensure + # process is alive. + if ! $MYSQL -e "show status like 'wsrep_cluster_status'" > /dev/null 2>&1 ; then + echo "Can't connect to mysql socket" + exit 1 + fi + # Detect node that is not connected to wsrep provider + if [ "x$(mysql_query status wsrep_ready)" != "xON" ]; then + echo "WSREP says the node can not receive queries" + exit 1 + fi + if [ "x$(mysql_query status wsrep_connected)" != "xON" ]; then + echo "WSREP not connected" + exit 1 + fi + fi +} + +case $PROBE_TYPE in + liveness) + check_liveness + ;; + readiness) + check_readiness + ;; + *) + echo "Unknown probe type: ${PROBE_TYPE}" + usage + ;; +esac diff --git a/mariadb/templates/bin/_liveness.sh.tpl b/mariadb/templates/bin/_liveness.sh.tpl deleted file mode 100644 index 485b61793..000000000 --- a/mariadb/templates/bin/_liveness.sh.tpl +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env bash - -{{/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/}} - -set -e - -MYSQL="mysql \ - --defaults-file=/etc/mysql/admin_user.cnf \ - --host=localhost \ -{{- if .Values.manifests.certificates }} - --ssl-verify-server-cert=false \ - --ssl-ca=/etc/mysql/certs/ca.crt \ - --ssl-key=/etc/mysql/certs/tls.key \ - --ssl-cert=/etc/mysql/certs/tls.crt \ -{{- end }} - --connect-timeout 2" - -mysql_status_query () { - STATUS=$1 - $MYSQL -e "show status like \"${STATUS}\"" | \ - awk "/${STATUS}/ { print \$NF; exit }" -} - -{{- if eq (int .Values.pod.replicas.server) 1 }} -if ! $MYSQL -e 'select 1' > /dev/null 2>&1 ; then - exit 1 -fi - -{{- else }} -if [ -f /var/lib/mysql/sst_in_progress ]; then - # SST in progress, with this node receiving a snapshot. - # MariaDB won't be up yet; avoid killing. - exit 0 -fi - -if [ "x$(mysql_status_query wsrep_ready)" != "xON" ]; then - # WSREP says the node can receive queries - exit 1 -fi - -if [ "x$(mysql_status_query wsrep_connected)" != "xON" ]; then - # WSREP connected - exit 1 -fi - -if [ "x$(mysql_status_query wsrep_cluster_status)" != "xPrimary" ]; then - # Not in primary cluster - exit 1 -fi - -wsrep_local_state_comment=$(mysql_status_query wsrep_local_state_comment) -if [ "x${wsrep_local_state_comment}" != "xSynced" ] && [ "x${wsrep_local_state_comment}" != "xDonor/Desynced" ]; then - # WSREP not synced or not sending SST - exit 1 -fi -{{- end }} diff --git a/mariadb/templates/bin/_readiness.sh.tpl b/mariadb/templates/bin/_readiness.sh.tpl deleted file mode 100644 index fd14c7783..000000000 --- a/mariadb/templates/bin/_readiness.sh.tpl +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env bash - -{{/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/}} - -set -e - -MYSQL="mysql \ - --defaults-file=/etc/mysql/admin_user.cnf \ - --host=localhost \ -{{- if .Values.manifests.certificates }} - --ssl-verify-server-cert=false \ - --ssl-ca=/etc/mysql/certs/ca.crt \ - --ssl-key=/etc/mysql/certs/tls.key \ - --ssl-cert=/etc/mysql/certs/tls.crt \ -{{- end }} - --connect-timeout 2" - -mysql_status_query () { - STATUS=$1 - $MYSQL -e "show status like \"${STATUS}\"" | \ - awk "/${STATUS}/ { print \$NF; exit }" -} - -if ! $MYSQL -e 'select 1' > /dev/null 2>&1 ; then - exit 1 -fi - -{{- if gt (int .Values.pod.replicas.server) 1 }} -if [ "x$(mysql_status_query wsrep_ready)" != "xON" ]; then - # WSREP says the node can receive queries - exit 1 -fi - -if [ "x$(mysql_status_query wsrep_connected)" != "xON" ]; then - # WSREP connected - exit 1 -fi - -if [ "x$(mysql_status_query wsrep_cluster_status)" != "xPrimary" ]; then - # Not in primary cluster - exit 1 -fi - -if [ "x$(mysql_status_query wsrep_local_state_comment)" != "xSynced" ]; then - # WSREP not synced - exit 1 -fi -{{- end }} diff --git a/mariadb/templates/configmap-bin.yaml b/mariadb/templates/configmap-bin.yaml index ed2ba827b..3e80c05cc 100644 --- a/mariadb/templates/configmap-bin.yaml +++ b/mariadb/templates/configmap-bin.yaml @@ -27,10 +27,8 @@ data: image-repo-sync.sh: | {{- include "helm-toolkit.scripts.image_repo_sync" . | indent 4 }} {{- end }} - readiness.sh: | -{{ tuple "bin/_readiness.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} - liveness.sh: | -{{ tuple "bin/_liveness.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + health.sh: | +{{ tuple "bin/_health.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} start.py: | {{ tuple "bin/_start.py.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} test.sh: | diff --git a/mariadb/templates/statefulset.yaml b/mariadb/templates/statefulset.yaml index e1cfcdfe3..467a97ef3 100644 --- a/mariadb/templates/statefulset.yaml +++ b/mariadb/templates/statefulset.yaml @@ -1,7 +1,7 @@ {{/* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. -You may obtain a copy of the License at +Y may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 @@ -15,12 +15,18 @@ limitations under the License. {{- define "mariadbReadinessProbe" }} exec: command: - - /tmp/readiness.sh + - /tmp/health.sh + - -t + - readiness + - -d + - {{ .Values.pod.probes.server.mariadb.readiness.disk_usage_percent | quote }} {{- end }} {{- define "mariadbLivenessProbe" }} exec: command: - - /tmp/liveness.sh + - /tmp/health.sh + - -t + - liveness {{- end }} {{- if (.Values.global).subchart_release_name }} @@ -226,12 +232,8 @@ spec: subPath: stop.sh readOnly: true - name: mariadb-bin - mountPath: /tmp/readiness.sh - subPath: readiness.sh - readOnly: true - - name: mariadb-bin - mountPath: /tmp/liveness.sh - subPath: liveness.sh + mountPath: /tmp/health.sh + subPath: health.sh readOnly: true - name: mariadb-etc mountPath: /etc/mysql/my.cnf diff --git a/mariadb/values.yaml b/mariadb/values.yaml index 7051a1125..9f6dfb138 100644 --- a/mariadb/values.yaml +++ b/mariadb/values.yaml @@ -65,6 +65,7 @@ pod: mariadb: readiness: enabled: true + disk_usage_percent: 99 params: initialDelaySeconds: 30 periodSeconds: 30 diff --git a/releasenotes/notes/mariadb.yaml b/releasenotes/notes/mariadb.yaml index fbdebcfe9..3f19599d8 100644 --- a/releasenotes/notes/mariadb.yaml +++ b/releasenotes/notes/mariadb.yaml @@ -75,4 +75,5 @@ mariadb: - 0.2.57 Remove useless retries on conflicts during cm update - 0.2.58 Prevent TypeError in get_active_endpoint function - 0.2.59 Give more time on resolving configmap update conflicts + - 0.2.60 Refactor liveness/readiness probes ...