Michel Thebeau adc792cd48 do not rekey when vault server pods need upgrade
Changes in the upgrade procedure cause vault server pods to require
restart in order to update to new server version.  The work for restart
pods is performed in another commit.

Defer a request for vault rekey until the server pods match the expected
version.  The rekey procedure will not proceed if vault pods are being
restarted, and so we should not start a rekey when it is anticipated
that vault pods will be restarted.

Test Plan:
PASS  bashate
PASS  unit test
PASS  vault sanity master branch, rekey
PASS  simplex upgrade (manual server pod restart)
PASS  duplex 2+1 (vault ha, 3 replicas) application-update

Story: 2011073
Task: 50814

Change-Id: I91334d0577148c1e3f7bc674ab2a3edfaced1d1c
Signed-off-by: Michel Thebeau <Michel.Thebeau@windriver.com>
2024-08-15 17:10:25 +00:00

4002 lines
130 KiB
YAML

apiVersion: v1
data:
init.sh: |
#!/bin/bash
# Get the CA path from environment vars
CERT=$CA_CERT
# Store cert as a oneliner for curl purposes
CA_ONELINE=$(awk '{printf "%s\\n", $0}' $CERT)
# Template vaules from helm
VAULT_NS={{ .Release.Namespace }}
VAULT_NAME={{ .Values.vault.name }}
VAULT_FN={{ .Values.vault.fullname }}
HA_REPLICAS={{ .Values.server.ha.replicas }}
VAULT_VERSION={{ .Values.server.version }}
# Set the domain for resolving pod names
DOMAIN="${VAULT_NS}.pod.cluster.local"
SVCDOMAIN="${VAULT_NS}.svc.cluster.local"
# define host targets and port
POD_TARGET_BASE="$DOMAIN" # requires 'DNS NAME' of pod
ACTIVE_TARGET="${VAULT_FN}-active.${SVCDOMAIN}" # only the active
TARGET_PORT=8200
# impermanent location to store files while running
WORKDIR=/workdir
# Health subdirectory. All vault manager health related files
# Will be placed here.
HEALTH_SUBDIR=$WORKDIR/health
mkdir -p $HEALTH_SUBDIR
# Selection of kubectl version from helm override
KUBECTL=kubectl
KUBECTL_HELM_OVERRIDE={{ .Values.manager.k8s.client_version }}
# Trap and trap notification file. When SIGTERM is sent to this pod
# we want to exit promptly and gracefully.
TRAPFILE=$WORKDIR/exit_on_trap
trap "touch $TRAPFILE" SIGTERM
# when specifying a trap for debug, remember it with this variable
# reserve trap '0' for disabling a debugging trap request
DEBUGGING_TRAP=0
# Pause notification file. An option to permit vault-manager to be
# paused at any of the exit_on_trap code points. The use cases may
# include:
# - running an external procedure that should not be permitted to
# conflict with vault-manager's operation
# - permitting time for a developer to setup conditions for debug
# - and test
PAUSEFILE=$WORKDIR/pause_on_trap
PAUSE_RATE=1 # rate at which to test for unpause
EARLY_PAUSE={{ .Values.manager.pause }}
# Healthcheck Fail file. If this file exists then we have decided to
# force vault manager to fail the health check
HEALTH_CHECK_FAIL=$HEALTH_SUBDIR/health_check_fail
# Healthcheck excuses.
HEALTH_CHECK_DISABLED=$HEALTH_SUBDIR/health_check_disabled
HEALTH_EXCUSE_NETWORK=$HEALTH_SUBDIR/health_excuse_network
HEALTH_EXCUSE_INIT=$HEALTH_SUBDIR/health_excuse_init
HEALTH_EXCUSE_PAUSE=$HEALTH_SUBDIR/health_excuse_pause
# Healthcheck excuse messages.
HC_MSG_DISABLED="Healthcheck is disabled."
HC_MSG_NETWORK="Vault manager has initiated a network operation."
HC_MSG_INIT="Vault manager is currently initializing."
HC_MSG_PAUSE="Vault manager is paused for external operation."
# Enable healthcheck excuses.
HC_DISABLE={{ .Values.manager.healthcheck.disableHC }}
HC_ENABLE_NETWORK={{ .Values.manager.healthcheck.enableNetwork }}
HC_ENABLE_INIT={{ .Values.manager.healthcheck.enableInit }}
HC_ENABLE_PAUSE={{ .Values.manager.healthcheck.enablePause }}
# set the default manager mode; modes include
# VAULT_MANAGER (default)
# MOUNT_HELPER
# INTERACTIVE (i.e., when this script is sourced by an author)
if [ -z "$MANAGER_MODE" ]; then
MANAGER_MODE="VAULT_MANAGER"
fi
if [[ "${BASH_SOURCE[0]}" != "${0}" ]]; then
MANAGER_MODE="INTERACTIVE"
fi
# Maximum sleep seconds for mount-helper before exiting
MOUNT_HELPER_MAX_TIME=60
# Maximum seconds to wait for mount-helper pod to start
MAX_POD_RUN_TRIES=10
# Maximum seconds to wait for vault-manager pod to exit
# Vault-manager is not responding to SIGTERM, so will take 30
# seconds
TERMINATE_TRIES_MAX={{ .Values.manager.waitTermination.maxTries }}
TERMINATE_TRIES_SLEEP={{ .Values.manager.waitTermination.sleepTime }}
# Vault key share configuration
KEY_SECRET_SHARES=5
KEY_REQUIRED_THRESHOLD=3
# Enable vault rekey upon conversion of storage from PVC to k8s
# secrets
AUTO_REKEY_CONVERT={{ .Values.manager.rekey.enableOnPVCConversion }}
# Keep track of vault-manager restarting the rekey procedure; if
# this variable is not true (0) and a rekey procedure is in
# progress, then vault-manager was restarted
REKEY_STARTED=1
# Vault manager will rekey the vault at a time when the vault
# servers are stable for a period of time specified by
# REKEY_STABLE_TIME seconds
REKEY_STABLE_TIME=300
# Global variable to share rekey status
REKEY_STATUS_JSON=''
# Keep track of shards that were last successful
SHARDS_LAST_SUCCESSFUL="cluster-key"
# Records for seal status state machine:
PODREC_F="$WORKDIR/previous_pods_status.txt"
PODREC_TMP_F="$WORKDIR/new_pods_status.txt"
# Vault server health query timeout during HA recovery scenario
QUERY_TMOUT={{ .Values.manager.api.healthQueryTimeout }}
# Default curl timout for REST API commands to vault server.
# This value is what testing shows is the default timeout.
# Specifying it explicitly for clarity.
API_TMOUT=120
# API timeout for unseal operations
API_UNSEAL_OP_TMOUT={{ .Values.manager.api.unsealOpTimeout }}
# API timeout values for rekey operations
API_REKEY_QUERY_TMOUT={{ .Values.manager.api.rekeyStatusTimeout }}
API_REKEY_OP_TMOUT={{ .Values.manager.api.rekeyOpTimeout }}
STATEFULSET_RATE=5
INIT_CONVERGE_TIME=10
JOIN_RATE=5
JOIN_CONVERGE_TIME=1
UNSEAL_RATE=10
UNSEAL_CONVERGE_TIME=3
STATUS_RATE={{ .Values.manager.statusCheckRate }}
if [ -z "$STATUS_RATE" ] || [ -n "${STATUS_RATE//[0-9]}" ] || \
[ $STATUS_RATE -lt 1 ]; then
STATUS_RATE=5
fi
# with STATUS_RATE, the period to delay unseal
# STATUS_RATE * STATEMACH_START seconds
STATEMACH_START={{ .Values.manager.unsealWaitIntervals }}
if [ -z "$STATEMACH_START" ]; then
STATEMACH_START=3
fi
# Heartbeat file location
HB_FILE=$HEALTH_SUBDIR/heartbeat
# Maximum threshold time in seconds that is allowed between
# a heartbeat call and health_check call.
HB_THRESHOLD={{ .Values.manager.healthcheck.heartbeatThreshold }}
# Log levels
DEBUG=1
INFO=2
WARNING=3
ERROR=4
FATAL=5
# Default log level and the set log level (Initially set as default).
# If the log function detects an override file, then it will switch
# the set log level and then delete it.
DEFAULT_LOG_LEVEL=$INFO
LOG_LEVEL={{ .Values.manager.log.defaultLogLevel }}
LOG_OVERRIDE_FILE="$WORKDIR/log_level"
# FUNCTIONS
# takes major/minor version of k8s and compares
# for example: v1.28 > v1.27 > v1.26
#
# Returns:
# 0 left is larger
# 1 equal
# 2 right is larger
function compareK8sVersion {
local left="$1"
local right="$2"
# strip leading 'v'
left="${left#v}"
right="${right#v}"
# compare the strings
if [ "$left" == "$right" ]; then
return 1
fi
# compare major
if [ "${left%.*}" -gt "${right%.*}" ]; then
return 0
elif [ "${left%.*}" -lt "${right%.*}" ]; then
return 2
fi
# compare the minor
if [ "${left#*.}" -gt "${right#*.}" ]; then
return 0
fi
return 2
}
# Give kubectl an opportunity to express complaints in the log
function k8sComplain {
local result
result="$( $KUBECTL version -o json 2>&1 >/dev/null )"
if [ -n "$result" ]; then
log $WARNING "kubectl: $result"
fi
}
# Double-check that the binary exists before setting the specified
# value of KUBECTL
function switchK8sVersion {
local select="$1"
local fname="kubectl.$select"
local newbin="${KUBECTL_INSTALL_PATH}/$fname"
which "$fname" >/dev/null
if [ $? -ne 0 -o ! -f "$newbin" ]; then
log $ERROR "Missing kubectl version: $select"
k8sComplain
return 1
fi
if [ "$KUBECTL" != "$fname" ]; then
KUBECTL="$fname"
log $INFO "Switching to use kubectl version $select"
fi
k8sComplain
return 0
}
# Select the version of kubectl matching the running server
function pickK8sVersion {
local result
local serverver
local majorver
local minorver
local select=""
local majmin=""
local maxver
local minver
# omit this code if the image does not support kubectl versions
if [ -z "$KUBE_VERSIONS" ]; then
k8sComplain
return
fi
if [ -n "$KUBECTL_HELM_OVERRIDE" ]; then
# pick the binary requested, if it exists
switchK8sVersion "$KUBECTL_HELM_OVERRIDE"
if [ $? -eq 0 ]; then
return
fi
log $ERROR "kubectl version from helm-override not" \
"available: $KUBECTL_HELM_OVERRIDE"
fi
# use -o json for consistent usage, as oppose to --short
result="$( $KUBECTL version -o json 2>/dev/null )"
if [ $? -ne 0 ]; then
log $ERROR "Unable to get k8s server version"
# no change in value of KUBECTL
k8sComplain
return
fi
serverver="$( jq -r '.serverVersion.gitVersion' <<<"$result" \
| grep "[0-9]" )"
majorver="$( jq -r '.serverVersion.major' <<<"$result" \
| grep "[0-9]" )"
minorver="$( jq -r '.serverVersion.minor' <<<"$result" \
| grep "[0-9]" )"
if [ -z "$serverver" -o -z "$majorver" -o -z "$minorver" ]; then
log $ERROR "Unable to detect K8s server version:" \
"["$result"]"
# no change in value of KUBECTL
k8sComplain
return
fi
# pick matching client major/minor version
for select in $KUBE_VERSIONS noverhere; do
majmin="v${majorver}.${minorver}"
if [[ "$select" =~ ^$majmin ]]; then
break
fi
done
if [ "$select" == noverhere ]; then
# Try to pick a near version. We really shouldn't be in
# this situation, but here is a compromise. This algorithm
# assumes that there are no omitted versions in the series
# of KUBE_VERSIONS, and that they are sorted largest to
# smallest in that list
maxver="$( awk '{print $1}' <<<"$KUBE_VERSIONS" )"
minver="$( awk '{print $NF}' <<<"$KUBE_VERSIONS" )"
compareK8sVersion ${serverver%.*} ${maxver%.*}
if [ "$?" -le 1 ]; then
select="$maxver"
else
compareK8sVersion ${minver%.*} ${serverver%.*}
if [ "$?" -le 1 ]; then
select="$minver"
else
log $ERROR "Could not pick nearest version for kubectl"
k8sComplain
return
fi
fi
fi
switchK8sVersion "${select%.*}"
}
# Convert log level to text for log message
function log_to_str {
local level="$1"
local logStr
case "$level" in
$INFO)
logStr="INFO"
;;
$DEBUG)
logStr="DEBUG"
;;
$WARNING)
logStr="WARNING"
;;
$ERROR)
logStr="ERROR"
;;
$FATAL)
logStr="FATAL"
;;
esac
echo "$logStr"
}
# Print the specified message to stdout if the call's specified
# level is at least the configured log level
function log {
local lvl="$1"
local logStr
local newLogLevel
# check if log override file "Exists"
if [ -f $LOG_OVERRIDE_FILE ] \
&& [ "$MANAGER_MODE" != "INTERACTIVE" ]; then
newLogLevel=$(cat $LOG_OVERRIDE_FILE)
# validation for newLogLevel
if [[ "$newLogLevel" =~ ^[1-5]$ ]]; then
LOG_LEVEL=$newLogLevel
logStr="$( log_to_str "$LOG_LEVEL" )"
echo "$(date +%Y-%m-%dT%H-%M-%S) DEBUG" \
"Log level set to $logStr"
else
echo "$(date +%Y-%m-%dT%H-%M-%S) DEBUG" \
"Invalid log level read from $LOG_OVERRIDE_FILE."
fi
rm $LOG_OVERRIDE_FILE
fi
# validate LOG_LEVEL. If it is not valid, then use
# DEFAULT_LOG_LEVEL instead.
if [[ ! "$LOG_LEVEL" =~ ^[1-5]$ ]]; then
echo "$(date +%Y-%m-%dT%H-%M-%S) DEBUG" \
"Invalid log level detected, will be set to" \
"$( log_to_str "$DEFAULT_LOG_LEVEL" )"
LOG_LEVEL=$DEFAULT_LOG_LEVEL
fi
# check if the log level for this call is equal to or higher
# than the set log level
if [ "$lvl" -ge "$LOG_LEVEL" ]; then
# print log
logStr="$( log_to_str "$lvl" )"
echo "$(date +%Y-%m-%dT%H-%M-%S) $logStr ${@:2}"
fi
}
if ! [[ "$QUERY_TMOUT" =~ ^[0-9]+$ ]]; then
log $WARNING ".Values.manager.healthQueryTimeout not an integer"
QUERY_TMOUT=""
fi
# Check the current health status for the vault manager.
# Return 0 if vault manager is healthy
# Return 1 if vault manager is unhealthy
function health_check {
local excuse_reason=()
local current_timestamp=0
local heartbeat_timestamp=0
local heartbeat_passed=false
local heartbeat_age=0
current_timestamp="$( date +%s )"
heartbeat_timestamp="$( stat -c %X $HB_FILE )"
heartbeat_age=$(( current_timestamp - heartbeat_timestamp ))
if [ $heartbeat_age -gt $HB_THRESHOLD ]; then
log $DEBUG "Heartbeat check failed"
heartbeat_passed=false
else
heartbeat_passed=true
fi
log $DEBUG "heartbeat time: $heartbeat_age"
if $heartbeat_passed && [ ! -f $HEALTH_CHECK_FAIL ]; then
return 0
else
if [ "$HC_DISABLE" = "true" ] || [ -f $HEALTH_CHECK_DISABLED ]; then
excuse_reason+=("$HC_MSG_DISABLED")
elif [ "$HC_ENABLE_PAUSE" = "true" ] && [ -f $HEALTH_EXCUSE_PAUSE ]; then
excuse_reason+=("$( cat $HEALTH_EXCUSE_PAUSE )")
elif [ "$HC_ENABLE_NETWORK" = "true" ] && [ -f $HEALTH_EXCUSE_NETWORK ]; then
excuse_reason+=("$( cat $HEALTH_EXCUSE_NETWORK )")
elif [ "$HC_ENABLE_INIT" = "true" ] && [ -f $HEALTH_EXCUSE_INIT ]; then
excuse_reason+=("$( cat $HEALTH_EXCUSE_INIT )")
fi
if [ ${#excuse_reason[@]} -gt 0 ]; then
log $INFO "Health_check fail has been excused. Reasons:"
for reason in "${excuse_reason[@]}"; do
log $INFO "$reason"
done
return 0
else
log $INFO "Health_check has failed."
return 1
fi
fi
}
# Heartbeat function touches the heartbeat file to update the timestamp,
# and updates the current heartbeat timestamp
function heartbeat {
# Do nothing if mode is not VAULT_MANAGER
if [ "$MANAGER_MODE" != "VAULT_MANAGER" ]; then
return
fi
touch $HB_FILE
}
# Create a health excuse file and log the reason.
function health_excuse_create {
local excuse_file_name="$1"
local excuse_reason="$2"
# Do nothing if mode is not VAULT_MANAGER
if [ "$MANAGER_MODE" != "VAULT_MANAGER" ]; then
return
fi
heartbeat
# check if the requested excuse file already exists.
# If not, create the excuse file and log the message
if [ -f $excuse_file_name ]; then
log $DEBUG "The excuse file $excuse_file_name already exists."
else
echo $excuse_reason > $excuse_file_name
log $DEBUG "The excuse file $excuse_file_name created." \
"Excuse reason: $excuse_reason"
fi
}
# Remove the named health excuse files.
function health_excuse_remove {
local excuse_file_name="$1"
local excuse_reason
# Do nothing if mode is not VAULT_MANAGER
if [ "$MANAGER_MODE" != "VAULT_MANAGER" ]; then
return
fi
heartbeat
# Check if the named excuse exists, if it is delete the file
if [ -f $excuse_file_name ]; then
excuse_reason="$( cat $excuse_file_name )"
rm $excuse_file_name
log $DEBUG "The excuse file $excuse_file_name is deleted. " \
"The excuse reason was: $excuse_reason"
else
log $DEBUG "The excuse file $excuse_file_name is already deleted."
fi
}
function pause_on_trap {
local thistrap="$1"
local pausenum
if [ ! -e "$PAUSEFILE" ]; then
# no pause request
return
fi
pausenum="$( cat "$PAUSEFILE" )"
if [ -n "$pausenum" ] \
&& [ "$pausenum" != "$thistrap" ]; then
# not on this trap
return
fi
log $INFO "Vault manager is paused ($thistrap)"
health_excuse_create "$HEALTH_EXCUSE_PAUSE" "$HC_MSG_PAUSE"
# Until pause file is removed by the author,
# or until the content of pause_on_trap file is
# not-empty and not matching the current trap.
#
# If the pause_on_trap file containing specific trap number is
# replaced with empty file: the pause state is maintained.
while [ -e "$PAUSEFILE" ]; do
pausenum="$( cat "$PAUSEFILE" )"
if [ -n "$pausenum" ] \
&& [ "$thistrap" != "$pausenum" ]; then
break;
fi
sleep "$PAUSE_RATE"
done
health_excuse_remove "$HEALTH_EXCUSE_PAUSE"
log $INFO "Vault manager is unpaused"
}
function exit_on_trap {
local trap="$1"
local tfnum=""
if [ "$MANAGER_MODE" == "INTERACTIVE" ]; then
# do not interfere with exit_on_trap intended for
# vault-manager pod
return
fi
heartbeat
# Debug option pause_on_trap
pause_on_trap "$trap"
if [ -e "$TRAPFILE" ]; then
tfnum=$(cat $TRAPFILE)
log $DEBUG "exit_on_trap: removing $TRAPFILE"
rm "$TRAPFILE" # for workdir on PVC
if [ -z "$tfnum" ]; then
# an empty trap file is the default expected behaviour
log $INFO "exit_on_trap: ($trap)"
exit
# handle trap debugging feature - a developer specifies the
# trap number to target a specific exit_on_trap call.
# Setting a value of 0 (zero) disables the debugging trap
elif [ "$tfnum" -eq 0 ]; then
log $DEBUG "exit_on_trap: ($trap):" \
"disable debug trap ($DEBUGGING_TRAP)"
DEBUGGING_TRAP=0
# there is no trap with value zero
return
else
DEBUGGING_TRAP="$tfnum"
log $DEBUG "exit_on_trap: ($trap): " \
"enable debug trap ($DEBUGGING_TRAP)"
# check now just in case it matches
if [ "$DEBUGGING_TRAP" -eq "$trap" ]; then
log $INFO "exit_on_trap: ($trap): matching"
exit
fi
fi
# check if there is a matching debug trap set
elif [ "$DEBUGGING_TRAP" -eq "$trap" ]; then
log $INFO "exit_on_trap: ($trap): matching"
exit
else
log $DEBUG "exit_on_trap: ($trap): no trap file, no exit"
fi
}
# splits keys into separate files. Each file contains the key and the base64 encoded version.
# root token will be stored separately
function splitShard {
local index="$1"
jq '{"keys": [.keys['$index']], "keys_base64": [.keys_base64['$index']]}'
}
# merges two split keys
function mergeKeyJson {
# the two parameters are names for variables
local jstr1="$1"
local jstr2="$2"
mkfifo "$WORKDIR"/s1
mkfifo "$WORKDIR"/s2
(
jq -Mn --argfile file1 $WORKDIR/s1 --argfile file2 $WORKDIR/s2 '
def mergek: ($file1, $file2) | .keys as $k | $k;
def mergeb: ($file1, $file2) | .keys_base64 as $b | $b;
{keys: (reduce mergek as $x ([]; . + $x)),
keys_base64: (reduce mergeb as $x ([]; . + $x))}
' & ) 2>/dev/null
echo -n "${!jstr1}" > "$WORKDIR"/s1
echo -n "${!jstr2}" > "$WORKDIR"/s2
rm -f "$WORKDIR"/s1 "$WORKDIR"/s2
}
# Prepare a json document from the k8s secrets prefixed with
# prefix, and the root token
#
# Required parameter: The prefix of the k8s secrets containing
# the shards
#
# Outputs the json document which is comparable to the original
# response for vault initialization. The calling function is
# responsible for validating the document content.
#
function reconstructInitResponse {
local prefix="$1"
local index
local keys
local mkeys
# pull secrets from k8s and merge into one json file.
for index in $( seq 0 $(( KEY_SECRET_SHARES - 1 )) ); do
keys="$( get_secret "${prefix}-$index" )"
if [ "$index" -eq 0 ]; then
mkeys="$keys"
continue
fi
mkeys=$( mergeKeyJson mkeys keys )
done
# append the root secret and echo the document
echo "$mkeys" | jq -c '{keys: .keys,
keys_base64: .keys_base64,
root_token: "'$( get_secret "cluster-key-root" )'"}'
}
# Check the structure of json data and confirm equivalence of
# the stdin with stored secrets
#
# Required parameter: The prefix of the k8s secrets containing
# the shards in stored secrets
#
# Returns the normal linux success=0, failure!=0
function validateSecrets {
local keyprefix="$1"
local text
local keys
local keys_base64
local root_token
local count
local saved
local shaA
local shaB
text=$( cat )
keys=$( echo "$text" | jq '.keys' )
keys_base64=$( echo "$text" | jq '.keys_base64' )
root_token=$( echo "$text" | jq -r '.root_token' )
# response is 'null' if the dict key is missing
# response is empty (-z) is the source document is empty
if [ -z "$keys" -o "$keys" == "null" \
-o -z "$keys_base64" -o "$keys_base64" == "null" \
-o -z "$root_token" -o "$root_token" == "null" ]; then
log $ERROR "one or more missing keys"
return 1
fi
count=$( echo "$keys" | jq '. | length' )
if [ $? -ne 0 ]; then
log $ERROR "jq did not parse keys length"
return 1
fi
if [ -z "$count" ] || [ "$count" -ne "$KEY_SECRET_SHARES" ]; then
log $ERROR "Incorrect array length for keys:" \
"$count instead of $KEY_SECRET_SHARES"
return 1
fi
count=$( echo "$keys_base64" | jq '. | length' )
if [ $? -ne 0 ]; then
log $ERROR "jq did not parse keys_base64 length"
return 1
fi
if [ -z "$count" ] || [ "$count" -ne "$KEY_SECRET_SHARES" ]; then
log $ERROR "Incorrect array length for keys_base64:" \
"$count instead of $KEY_SECRET_SHARES"
return 1
fi
saved="$( reconstructInitResponse "${keyprefix}" )"
# finally ensure that the saved secrets are the same as the
# supplied text
shaA=$( echo "$text" | sha256sum )
shaB=$( echo "$saved" | sha256sum )
if [ "$shaA" != "$shaB" ]; then
log $ERROR "saved data differs from source data"
return 1
fi
log $INFO "Verified stored secrets are the same as supplied data"
return 0
}
# Creates a list of all k8s vault pods and stores in text file.
# Converts ips from X.X.X.X or a:b:c::d to X-X-X-X for use as pod
# dns names
#
# Optional parameter:
# --ha : append vault server active/standby status (boolean)
#
# Example output with --ha
# sva-vault-0 172-16-226-97 true
function getVaultPods {
local ha="$1"
local jpath
local meta='{.metadata.name}'
local ip='{.status.podIPs[].ip}'
local active='{.metadata.labels.vault-active}'
local jfields=${meta}'{"\t"}'${ip}
if [ "$ha" == "--ha" ]; then
jfields=${jfields}'{"\t"}'${active}
fi
jpath='{range .items[*]}'"$jfields"'{"\n"}{end}'
$KUBECTL get pods \
-n "$VAULT_NS" \
-l component=server,app.kubernetes.io/name=vault \
-o=jsonpath="$jpath" \
| sed 's/\.\|:/-/g'
}
# Wait for the vault servers in the stateful set to be
# created before initializing
function waitForPods {
local jsonPath='{range .items[*]}{.metadata.name}{"\t"} \
{.status.podIPs[].ip}{"\t"}{.status.phase}{"\n"} \
{end}'
CURRENT_PODS=$($KUBECTL get pods \
-l component=server,app.kubernetes.io/name=vault \
-o=jsonpath="$jsonPath" \
| grep Running \
| wc -l)
DESIRED_PODS=$1
if ! [[ "$CURRENT_PODS" =~ ^[0-9]+$ ]]; then
log $ERROR "Invalid Running pod number ($CURRENT_PODS) from kubectl get pods"
CURRENT_PODS=0
fi
while [ $CURRENT_PODS -lt $DESIRED_PODS ]; do
sleep "$STATEFULSET_RATE"
log $INFO "Waiting for ${VAULT_FN}" \
"statefulset running pods ($CURRENT_PODS) to equal" \
"desired pods ($DESIRED_PODS)"
CURRENT_PODS=$($KUBECTL get pods \
-l component=server,app.kubernetes.io/name=vault \
-o=jsonpath="$jsonPath" \
| grep Running \
| wc -l)
done
}
# Takes the json document output from vault initialization
# and stores it into secrets for key shards and the root token
#
# Required parameter: The prefix of the k8s secrets into which to
# store the shards
#
# This only works if the secrets are not pre-existing. An error
# is printed by set_secrets.
function storeVaultInitSecrets {
local keyprefix="$1"
local secrets
local index
local split_json
secrets=$( cat )
for index in $(seq 0 $((KEY_SECRET_SHARES - 1 ))); do
split_json=$( echo -n "$secrets" | splitShard "$index" )
set_secret "${keyprefix}-$index" /dev/stdin <<< "$split_json"
done
# if the data contains root_token, save it as well
split_json=$( echo "$secrets" | jq -r '.root_token' )
if [ -n "$split_json" -a "$split_json" != 'null' ]; then
set_secret "${keyprefix}-root" /dev/stdin <<< "$split_json"
fi
}
# Address a vault server with REST API request. Capture stderr,
# stdout and result of curl commands. Print error and debug logs
#
# Required positional parameters, in order:
# Response variable : variable in which to store the response
# from vault
# http request type : GET, POST, DELETE
# vault server : FQDN
# vault REST API path : e.g., /sys/health
#
# Optional final parameter : a quoted string of data
#
# Examples:
# # get health status query for the active vault status
# vaultAPI myvar GET $ACTIVE_TARGET /sys/health
#
# # post rekey initialization with shares 5 and threshold 3
# data='{"secret_shares": 5,"secret_threshold": 3}'
# vaultAPI myvar POST $ACTIVE_TARGET /sys/rekey/init "$data"
#
# Overridable ENV variables:
# API_TMOUT: the curl timeout
# NO_HEADER: omit header (the root token) if not empty
#
# Output:
# Return the stdout and command result code
#
# Print log messages for errors. The responses from vault are
# restricted to DEBUG lovel log in case there's secret information
# in them. But a non-specific ERROR message is printed in all
# cases of errors.
function vaultAPI {
local answer="$1"
local reqarg="$2"
local server="$3"
local apipath="$4"
local data="$5"
local cmderr=""
local cmdout=""
local cmdres=1
local header=""
local errors=""
if [ -z "$NO_HEADER" ]; then
header="X-Vault-Token:$( get_secret cluster-key-root )"
fi
log $DEBUG "Executing: [curl -s -S --cacert \""$CERT"\"" \
${API_TMOUT:+"--connect-timeout" "$API_TMOUT"} \
${header:+"--header" "xxxx"} \
"--request \"$reqarg\"" \
${data:+"--data" "xxxx"} \
"\"https://${server}:${TARGET_PORT}/v1${apipath}\"]"
health_excuse_create "$HEALTH_EXCUSE_NETWORK" "$HC_MSG_NETWORK"
# Capture stderr and stdout copied from google search example
# on stack overflow. Add capture of the command result code
{
IFS=$'\n' read -r -d '' cmderr;
IFS=$'\n' read -r -d '' cmdout;
cmdres="$( echo "$cmdout" | tail -n1 )"
cmdout="$( echo "$cmdout" | head -n-1 )"
} < <((printf '\0%s\0' "$(
curl -s -S --cacert "$CERT" \
${API_TMOUT:+"--connect-timeout" "$API_TMOUT"} \
${header:+"--header" "$header"} \
--request "$reqarg" \
${data:+"--data" "$data"} \
"https://${server}:${TARGET_PORT}/v1${apipath}"
echo "$?"
)" 1>&2) 2>&1)
health_excuse_remove "$HEALTH_EXCUSE_NETWORK"
if [ "$cmdres" -ne 0 ]; then
log $ERROR "curl returns non-zero result: $cmdres"
fi
if [ -n "$cmderr" ]; then
log $ERROR "curl returns stderr"
log $DEBUG "curl returns stderr: [$cmderr]"
fi
if [ -n "$cmdout" ]; then
# errors from the REST API
errors=$( echo "$cmdout" | jq -cr '.errors' )
if [[ "$errors" != 'null' ]] && [ -n "$errors" ]; then
log $ERROR "vault REST API error"
log $DEBUG "vault REST API error: $errors"
if [ "$cmdres" -eq 0 ]; then
# this code wants to know if there was an error
cmdres=1
fi
fi
fi
eval "$answer"='$cmdout'
return $cmdres
}
# Initializes the first vault pod, only needs to be performed once
# after deploying the helm chart
# Stores the root token and master key shards in k8s secrets
function initVault {
local V0 # the zeroeth vault pod
local keys
local key_error
local shares
local threshold
V0=$(awk 'NR==1{print $2}' $WORKDIR/pods.txt)
log $INFO "Initializing $V0"
shares='"secret_shares": '$KEY_SECRET_SHARES
threshold='"secret_threshold": '$KEY_REQUIRED_THRESHOLD
NO_HEADER=true \
vaultAPI keys POST $V0.$POD_TARGET_BASE \
/sys/init "{$shares, $threshold}"
key_error=$(echo -n "$keys"| jq -r '.errors[]?')
if [ -n "$key_error" ]; then
log $ERROR "vault init request failed: $key_error"
fi
echo "$keys" | storeVaultInitSecrets cluster-key
# check if the secrets match vault's REST API response
echo "$keys" | validateSecrets cluster-key
}
# Uses the master key shards to unseal vault
function unsealVault {
local server="$1"
local prefix="$2"
local index
local b64key
local data
local response
local value
local autherror
if [ -z "$prefix" ]; then
prefix='cluster-key'
fi
# always abort an unseal in progress
data='{"reset": true}'
NO_HEADER=true \
API_TMOUT=$API_UNSEAL_OP_TMOUT \
vaultAPI response POST $server.$POD_TARGET_BASE \
/sys/unseal "$data"
if [ $? -ne 0 ]; then
# error is already printed
# Including if vault is already unsealed.
if [[ "$response" == *"vault is unsealed"* ]]; then
log $WARNING "unsealVault: server $server is" \
"already unsealed"
fi
return 1
fi
for index in $(seq 0 $((KEY_SECRET_SHARES - 1 ))); do
b64key=$( get_secret "${prefix}-$index" \
| jq -r '.keys_base64[]' )
data="{\"key\": \"$b64key\"}"
NO_HEADER=true \
API_TMOUT=$API_UNSEAL_OP_TMOUT \
vaultAPI response POST $server.$POD_TARGET_BASE \
/sys/unseal "$data"
if [ $? -ne 0 ]; then
# error is already printed, including errors from the
# vault REST API; but for debugging purposes, highlight
# the authentication error
autherror="cipher: message authentication failed"
if [[ "$response" == *"$autherror"* ]]; then
log $ERROR "Failed to authenticate /sys/unseal" \
"with $prefix"
# perhaps use this info in the future
return 2
fi
log $DEBUG "Unknown failure authenticating unseal" \
"$response"
return 1
fi
# when the unseal completes with KEY_REQUIRED_THRESHOLD then
# the response will indicate sealed=false
value="$( echo "$response" | jq -r ".sealed" )"
if [ "$value" == "false" ]; then
log $DEBUG "Success authenticating unseal"
return 0
fi
value="$( echo "$response" | jq -r ".progress" )"
log $DEBUG "Success authenticating unseal" \
"(${value}/${KEY_REQUIRED_THRESHOLD})"
# Some sleep is required to allow Raft convergence
sleep "$UNSEAL_CONVERGE_TIME"
done
log $ERROR "unsealVault completes without unseal or error"
return 1
}
# Unseal a vault server under conditions of recovery,
# including selecting and remembering alternate shard
# secrets.
#
# This algorithm remembers the last shards used to unseal the vault,
# to prioritize using those again the next time.
function unsealVaultRecover {
local server="$1"
local attempted
local use_secrets=""
if [ -n "$SHARDS_LAST_SUCCESSFUL" ]; then
# double check the keys we were using are not deleted
if assertShardSecrets "$SHARDS_LAST_SUCCESSFUL"; then
use_secrets="$SHARDS_LAST_SUCCESSFUL"
fi
fi
use_secrets="$use_secrets $( \
getOtherShardSecrets "$SHARDS_LAST_SUCCESSFUL" )"
for attempted in $use_secrets; do
log $INFO "Attempt unseal with $attempted"
unsealVault "$server" "$attempted"
case $? in
0)
SHARDS_LAST_SUCCESSFUL="$attempted"
return 0
;;
2)
# an error is already printed
# try a different set of shards
continue
;;
*)
# failure is not clear, try again later
log $ERROR "Fail to unseal $server with" \
"$attempted; try later"
return 1
;;
esac
done
log $ERROR "No set of shards unseal the server $server:" \
"attempted: $use_secrets"
return 1
}
# Takes the address of vault-0 as the cluster leader and
# joins other nodes to raft
function joinRaft {
local dnsname="$1"
local activeLink="https://${ACTIVE_TARGET}:${TARGET_PORT}"
local dataJson="{\"leader_api_addr\": \"$activeLink\", \"leader_ca_cert\": \"$CA_ONELINE\"}"
RAFT_STATUS=""
while [ "$RAFT_STATUS" != "true" ]; do
vaultAPI RAFT_STATUS POST $dnsname.$POD_TARGET_BASE \
/sys/storage/raft/join "$dataJson"
log $INFO "$dnsname $RAFT_STATUS"
RAFT_STATUS=$(echo $RAFT_STATUS | jq -r .joined)
sleep "$JOIN_CONVERGE_TIME"
done
}
function runStateMachine {
local host="$1"
local dns_name="$2"
local sealed="$3"
local status_rec
local old_rec
local counter
status_rec="/$host/$dns_name/$sealed/"
# log compression: do not print logs when status is unchanged
# omit counter when checking vault server state change
old_rec="$( grep "$status_rec" "$PODREC_F" )"
if [ $? -ne 0 ]; then
log $DEBUG "$( grep "$dns_name" $WORKDIR/pods.txt )"
log $INFO "Sealed status of $dns_name is now: $sealed"
# reread the record by hostname only
old_rec="$( grep "^/$host/" "$PODREC_F" )"
else
log $DEBUG "There is no change in pod seal status"
fi
if [ "$sealed" != "true" ]; then
# There is nothing more to do: the vault is unsealed
# or the sealed status is unclear
echo "$status_rec" >> "$PODREC_TMP_F"
return
fi
# The vault is sealed
#
# Check if there is a countdown in progress
#
# else -z old_rec: "the pod didn't have an IP address the last
# iteration, but now it does" - treat the same as "sealed
# without a countdown"
counter=""
if [ -n "$old_rec" ]; then
counter="$( echo "$old_rec" | awk -F/ '{print $5}' )"
fi
if [ -z "$counter" ]; then
# sealed without a countdown: start counting
log $DEBUG "Sealed vault $host: begin unseal delay:" \
"$( expr "$STATUS_RATE" \* "$STATEMACH_START" )s"
echo "${status_rec}${STATEMACH_START}" >> "$PODREC_TMP_F"
return
fi
# Check for end of period: 1 means "zero at this interval"
# "less than 1" for resilience
if [ "$counter" -le 1 -o "$STATEMACH_START" -eq 0 ]; then
# We've waited (STATUS_RATE * STATEMACH_START) seconds
# Or, STATEMACH_START == 0 means do not delay
log $INFO "Unsealing $dns_name"
unsealVaultRecover "$dns_name"
echo "$status_rec" >> "$PODREC_TMP_F"
return
fi
# finally, continue to countdown
counter="$( expr "$counter" - 1 )"
echo "${status_rec}${counter}" >> "$PODREC_TMP_F"
}
function vaultInitialized {
local response
local dnsname
local initialized
local text
# Wait for the pod to respond with a positive vault API response
# (i.e., not just a curl failure, and not a vault API failure)
while true; do
dnsname=$(awk 'NR==1{print $2}' $WORKDIR/pods.txt)
if [ -z "$dnsname" ]; then
log $INFO "waiting..."
sleep $STATUS_RATE
getVaultPods > $WORKDIR/pods.txt
continue
fi
log $INFO "Query server $dnsname for initialization status"
NO_HEADER=true \
API_TMOUT=$QUERY_TMOUT \
vaultAPI response GET $dnsname.$POD_TARGET_BASE /sys/health
if [ $? -ne 0 ]; then
log $INFO "waiting..."
sleep $STATUS_RATE
getVaultPods > $WORKDIR/pods.txt
continue
fi
break
done
echo -n "$response" > $WORKDIR/healthcheck.txt
initialized=$( echo "$response" | jq -r .initialized )
text="$( grep $dnsname $WORKDIR/pods.txt )"
if [ $? -eq 0 ]; then
log $DEBUG "$text"
log $DEBUG "Initialized status is $initialized"
fi
# The empty check is here as a extra safety net, but an
# investigation into in which exact conditions the result would
# be empty would be helpful.
if [ ! -z $initialized ] && [ $initialized = false ]; then
return 1
else
return 0
fi
}
function set_secret {
local secret="$1"
local contentf="$2"
local output
local result
output="$( $KUBECTL create secret generic -n "$VAULT_NS" \
"$secret" "--from-file=strdata=$contentf" 2>&1 )"
result=$?
if [ "$result" -ne 0 ]; then
log $ERROR "Failed to create secret $secret"
log $DEBUG "Output: [$output]"
fi
return $result
}
function get_secret {
local secret="$1"
$KUBECTL get secrets -n "$VAULT_NS" "$secret" \
-o jsonpath='{.data.strdata}' \
| base64 -d
}
# When vault-manager is run in "MOUNT_HELPER" mode, this function
# will not return. Instead the function will exit_on_trap or exit
# when it times-out.
#
# Basically: this function doesn't do anything except wait to be
# terminated.
#
# Vault-manager in MOUNT_HELPER has PVC mounted, allowing the real
# vault-manager to read secrets from cluster_keys.json
function mountHelper {
local count
# omit this function if this pod is not the mount helper
if [ -z "$MANAGER_MODE" -o "$MANAGER_MODE" != "MOUNT_HELPER" ]; then
log $INFO "Mode is VAULT_MANAGER"
return
fi
# When vault-manager is running in this mode, it should be
# deleted by vault-manager running in the default mode, which
# is using this pod to read secrets from mounted PVC
log $INFO "Mode is $MANAGER_MODE"
# start with some debug/error logs
if [ -f "$PVC_DIR/cluster_keys.json" ]; then
log $DEBUG "Successfully mounted secrets file"
else
log $WARNING "Secrets file not found"
fi
# sleep for MOUNT_HELPER_MAX_TIME, expecting SIGTERM signal
log $INFO "Waiting for termination request via SIGTERM"
count=0
while [ "$count" -lt "$MOUNT_HELPER_MAX_TIME" ]; do
exit_on_trap
count=$((count+1))
sleep 1
done
# Normally should exit by exit_on_trap, but here we timeout
# waiting for the real vault-manager to delete this job/pod.
log $INFO "Exiting without receiving SIGTERM request"
exit 0
}
# Check if a secret exists
#
# Returns the normal linux success=0, failure!=0
# Prints the name of the secret
function secretExists {
local name="$1"
$KUBECTL get secrets -n "$VAULT_NS" "$name" \
-o jsonpath='{.metadata.name}' 2>/dev/null \
| grep "$name"
}
# Return linux success=0 if any of the secrets exist
function secretsExistAny {
local list="$@"
local name
for name in $list; do
secretExists $name >/dev/null
if [ $? -eq 0 ]; then
return 0
fi
done
return 1
}
# Assert that the shard secrets starting with prefix exist
#
# Parameter: prefix for k8s secrets, such as 'cluster-key'
#
# Optional second parameter:
# --nokeys : failed if at least one exists
#
# Returns the normal linux success=0, failure!=0
#
# When --nokeys is selected, the failure return code is the number
# of secrets found. Zero secrets were expected.
#
# When --nokeys is omitted, the failure return code is either the
# number of secrets found or if the number of secrets found was
# zero, KEY_SECRET_SHARES is returned as error code
function assertShardSecrets {
local prefix="$1"
local nokey="$2"
local i
local count=0
for i in $( seq 0 $((KEY_SECRET_SHARES-1)) ); do
secretExists "${prefix}-$i" >/dev/null
if [ $? -eq 0 ]; then
count=$((count+1))
fi
done
if [ "$nokey" == "--nokeys" ]; then
# 0 secrets == true (0)
# Else return the number of secrets
return $count
fi
if [ "$count" -eq "$KEY_SECRET_SHARES" ]; then
return 0
elif [ "$count" -eq 0 ]; then
return "$KEY_SECRET_SHARES" # an error result
fi
return "$count"
}
# Return a list of other existing Shard secrets other than the set
# specified
#
# Sort by priority order:
# cluster-key
# cluster-rekey
# cluster-key-bk
#
function getOtherShardSecrets {
local omit="$1"
local secrets="cluster-key cluster-rekey cluster-key-bk"
local secret
local others=""
for secret in $secrets; do
if [ "$secret" == "$omit" ]; then
continue
fi
if assertShardSecrets $secret; then
others="$others $secret"
fi
done
echo $others
}
# Delete the specified list of secrets
#
# Uses a single kubectl command
function deleteSecrets {
local secrets="$@"
local text
text="$( $KUBECTL delete secrets -n "$VAULT_NS" \
$secrets 2>&1 )"
if [ $? -ne 0 ]; then
log $ERROR "Error deleting secrets: ["$text"]"
return 1
fi
log $INFO $text
return 0
}
# Check if the PVC resource exists
#
# Returns 0 if pvc does not exist
# Returns 1 if pvc exists but is terminating
# Returns 2 if pvc exists and is not terminating
# Prints the name of the PVC resource
function pvcRemoved {
local text
local jqscript
jqscript='.items
| map(select(.metadata.name | test("^manager-pvc")))
| "\(.[0].metadata.name) \(.[0].status.phase)"'
# using jq since kubernetes does not support regex
# the grep makes sure the result contains the 'manager-pvc'
# string (as opposed to 'null' for example)
text="$(
$KUBECTL get persistentvolumeclaims -n "$VAULT_NS" -o json \
| jq -r "$jqscript" 2>/dev/null \
| grep manager-pvc )"
if [ -n "$text" ]; then
readarray -d " " -t pvcInfo <<< "$text"
pvcName="${pvcInfo[0]}"
pvcStatus="${pvcInfo[1]}"
echo "$pvcName"
if [ "$pvcStatus" = "Terminating" ]; then
return 1
else
return 2
fi
fi
return 0
}
# Check if the PVC is mounted to any pod in vault namespace
#
# Returns the normal linux success=0, failure!=0
# Prints the name of the PVC resource
function testPVCMount {
local result
local cspec
local vspec
cspec=".items[*].spec.containers[*]"
vspec="volumeMounts[?(@.name=='manager-pvc')].name"
# this kubectl query returns zero whether manager-pvc is
# found or not
# result variable is either empty or 'manager-pvc'
result="$( $KUBECTL get pods -n "$VAULT_NS" \
-o jsonpath="{${cspec}.${vspec}}" )"
if [ -n "$result" ]; then
return 0
fi
return 1 # assertion 'fails'
}
# This function prints a DEBUG log of kubectl delete
function deleteMountHelper {
local text
local result
log $DEBUG "Waiting for delete of mount-helper job"
text="$( $KUBECTL delete --ignore-not-found=true --wait=true \
-f /opt/yaml/pvc-attach.yaml 2>&1 )"
result=$?
log $DEBUG "Output of deleting mount-helper: [$text]"
return $result
}
# Run shred on the file content of PVC
#
# All files a shredded, and the result is an error if
# - command return code is non-zero
# - file comparison shows unchanged file(s)
#
# A warning is issued if shred/kubectl command has any stdout or
# stderr
#
# Returns the normal linux success=0, failure!=0
function securelyWipePVC {
local helper="$1"
if [ -z "$helper" ]; then
log $ERROR "No pod specified for shredding"
return 1
fi
# get profile of the files before shredding
$KUBECTL exec -n "$VAULT_NS" "$helper" -- \
bash -c 'find /mnt/data -type f \
| sort | xargs wc | head -n-1' \
>/tmp/shred_before.txt 2>&1
log $DEBUG "Original files: [$( cat /tmp/shred_before.txt )]"
# run the shred command
#
# Shred all the files in mounted /mnt/data/
#
# The shred by default has three randomized passes, and with -z
# option will finalize with zeros. -f prompts shred to work
# around any unexpected file permissions
text="$( $KUBECTL exec -n "$VAULT_NS" "$helper" -- \
bash -c '\
result=0; \
while read fname; do \
shred -f -z "$fname"; \
[ $? -ne 0 ] && result=1; \
done <<<"$(find /mnt/data -type f )"; \
exit $result' 2>&1 )"
result=$?
# get profile of the files after shredding
$KUBECTL exec -n "$VAULT_NS" "$helper" -- \
bash -c 'find /mnt/data -type f \
| sort | xargs wc | head -n-1' \
>/tmp/shred_after.txt 2>&1
log $DEBUG "Shredded files: [$( cat /tmp/shred_after.txt )]"
# compare the profiles for error reporting
#
# If the file lists, pushed through wc, have files with the same
# character, word, and line counts then report an error: a file
# has not been shred
#
# Ignore files that were empty
difftext="$( diff -wuU100000 /tmp/shred_before.txt \
/tmp/shred_after.txt )"
unchanged="$( echo "$difftext" | grep "^ " \
| grep -v "^\([ ]\{1,\}0\)\{3\} /" )"
# Report the errors/success
if [ "$result" -ne 0 ]; then
log $ERROR "Error on shred: [$text]"
if [ -n "$unchanged" ]; then
log $ERROR "Unchanged: [$unchanged]"
fi
return 1
fi
if [ -n "$text" ]; then
log $WARNING "Output of shred is not empty: [$text]"
fi
if [ -n "$unchanged" ]; then
log $ERROR "Shred did not shred some files"
log $ERROR "Unchanged: [$unchanged]"
return 1
fi
log $INFO "Shredding of PVC data verified"
return 0
}
# Delete the PVC resource
#
# The delete will succeed even if attached to a pod, such as a
# terminating vault-manager or mount-helper - the PVC remains
# in terminating status until the pod is also terminated.
function deletePVC {
local text
local name
name="$( pvcRemoved )"
if [ $? -eq 2 ] && [[ "$name" =~ ^manager-pvc ]]; then
text="$( $KUBECTL delete persistentvolumeclaims \
-n "$VAULT_NS" "$name" 2>&1 )"
if [ $? -ne 0 ]; then
log $ERROR "Error deleting PVC: [$text]"
else
log $INFO "$text"
fi
else
log $WARNING "Request to delete PVC but PVC not found"
fi
}
# Run a job/pod, to mount the PVC resource, and retrieve the secrets
# from PVC.
#
# See also the function mountHelper and the ConfigMap named:
# {{ .Values.vault.name }}-mount-helper
#
# This function does not support overwriting an existing
# cluster-key-* secret, but it does support validating those secrets
# if they exist
function convertPVC {
local output
local pod
local count
local text
local PVCtext
local result
local waitPVCterm
if testPVCMount; then
log $ERROR "Cannot mount PVC already mounted"
return 1
fi
# run the pod
output="$( $KUBECTL apply -f /opt/yaml/pvc-attach.yaml 2>&1 )"
if [ $? -ne 0 ]; then
log $ERROR "Failed to apply mount-helper"
log $DEBUG "Output: [$output]"
deleteMountHelper
return 1
fi
# wait for pod
pod=''
count=0
log $INFO "Waiting for mount-helper pod to run"
while [ -z "$pod" -a "$count" -le "$MAX_POD_RUN_TRIES" ]; do
count=$((count+1))
text="$( $KUBECTL get pods -n "$VAULT_NS" \
| grep "mount-helper" )"
pod="$( echo "$text" | grep "Running" | awk '{print $1}' )"
if [ -z "$pod" ]; then
sleep 1
fi
done
if [ -z "$pod" ]; then
log $ERROR "Failed to run mount-helper pod"
log $DEBUG "Pod state: [$( echo $text )]"
deleteMountHelper
return 1
fi
# get the pvc data
PVCtext="$( $KUBECTL exec -n "$VAULT_NS" "$pod" \
-- cat /mnt/data/cluster_keys.json )"
if [ $? -ne 0 -o -z "$PVCtext" ]; then
log $ERROR "Failed to read cluster_keys.json"
deleteMountHelper
return 1
fi
log $INFO "Data retrieved from PVC"
# if the Root secret is pre-existing, compare the existing
# shard secrets and root secret before deleting the PVC
$KUBECTL get secrets -n "$VAULT_NS" \
cluster-key-root >/dev/null 2>&1
if [ $? -eq 0 ]; then
log $INFO "Cluster secrets exist:" \
"validating"
else
# create a secret from the data
echo "$PVCtext" | storeVaultInitSecrets cluster-key
fi
# verify the data stored versus text from PVC
echo "$PVCtext" | validateSecrets cluster-key
result=$?
if [ "$result" -eq 0 ]; then
securelyWipePVC "$pod"
# omit deleting the PVC for manual analysis and shred
# when the wipe fails
if [ $? -eq 0 ]; then
deletePVC
fi
fi
# clean up but do not care about the result
deleteMountHelper
# Sleep before finishing conversion, so that pvc termination process has started
waitPVCterm=5
sleep $waitPVCterm
return $result
}
function convertBootstrapSecrets {
local text
local count
text="$( get_secret cluster-key-bootstrap )"
echo "$text" | storeVaultInitSecrets cluster-key
# verify the split secrets versus the bootstrap text
echo "$text" | validateSecrets cluster-key
if [ $? -ne 0 ]; then
# an error is already printed
return 1
fi
deleteSecrets cluster-key-bootstrap
# Also validate and delete the PVC resource
# This procedure depends on waiting for the old version
# of vault-manager pod to exit
count="$TERMINATE_TRIES_MAX"
log $INFO "Waiting for vault-manager pod to exit"
while testPVCMount && [ "$count" -gt 0 ]; do
sleep "$TERMINATE_TRIES_SLEEP"
count=$((count-1))
done
if [ $count -eq 0 ]; then
log $WARNING "Maximum time reached waiting" \
"for the previous pod to be terminated."
fi
convertPVC
}
# When enabled, after conversion of storage from PVC to k8s secrets,
# Vault-manager will prompt itself to rekey the vault server
# storage.
function requestRekey {
local value
if [ "$AUTO_REKEY_CONVERT" != "true" ]; then
return
fi
log $INFO "Auto rekey enabled: [$AUTO_REKEY_CONVERT]"
secretExists cluster-rekey-request >/dev/null
if [ $? -eq 0 ]; then
value="$( get_secret cluster-rekey-request )"
log $WARNING "Auto rekey: rekey request exists: $value"
return
fi
value=$( uuidgen )
set_secret cluster-rekey-request /dev/stdin <<<"$value"
if [ $? -eq 0 ]; then
log $INFO "Rekey requested: $value"
else
log $ERROR "Failed to request rekey: $value"
fi
return
}
function runConversion {
if [ -n "$K8S_SECRETS_PREEXIST" ]; then
log $INFO "Cluster secrets exist"
return
elif [ -n "$BOOTSTRAP_PREEXISTS" ]; then
# this is the normal application update procedure; the
# lifecycle code retrieved the secrets from previous version
# of the application.
log $INFO "Using secrets provided in $BOOTSTRAP_PREEXISTS"
convertBootstrapSecrets
requestRekey
return
elif [ -z "$PVC_PREEXISTS" ]; then
log $INFO "No pre-existing secrets exist"
return
fi
# Finally, read the pre-existing PVC. This occurs if the
# application updates outside of application-update. For
# example if the old application is removed and deleted, and the
# new application is uploaded and applied.
convertPVC
requestRekey
}
# Test whether the specified vault server(s) agree with the
# specified status of the specified endpoint
#
# Print DEBUG logs when status is non-conforming (the function will
# be used to wait for conformance).
#
# The first parameter is the vault API endpoint to check status
# of, either /sys/rekey/init or /sys/rekey/verify
# The second parameter is the quoted string of json data returned
# from vault REST API call. The data should include these fields,
# which are tested for conformance:
# {"nonce": "S", "started": B, "progress": N,
# "verification_required": B}
#
# The other parameters are the servers to test, specified as
# dash-separated IP address output of getVaultPods (XX-XX-XX-XX)
#
# Returns the normal linux success=0, failure!=0
function assertRekeyStatus {
local endpoint="$1"
local data="$2"
shift 2
local -a servers=($@)
local -a key_arr
local required
local jscript
local key
local index
local error
local server
local response
local record
required="nonce progress started verification_required"
jscript=".nonce, .progress, .started, .verification_required"
if [ "$endpoint" == "/sys/rekey/verify" ]; then
required="nonce progress started"
jscript=".nonce, .progress, .started"
fi
# quick check to assure the data parameter is sane
key_arr=($(echo "$data" | jq -r 'keys[]' | sort))
for key in $required; do
if [[ " ${key_arr[*]} " != *" $key "* ]]; then
log $ERROR "assertRekeyStatus requires: [$required]," \
"received: ${key_arr[*]}"
return 1
fi
done
required="$( echo "$data" | jq -r "$jscript" )"
index=0
error=0
while [ "$index" -lt "${#servers[@]}" ]; do
server="${servers[$index]}"
index=$((index+1))
server="${server}.$POD_TARGET_BASE"
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI response GET "$server" "$endpoint"
if [ $? -ne 0 -o -z "$response" ]; then
# failing the REST API call is not the same
# as non-conformance
return 2
continue
fi
record="$( echo "$response" | jq -r "$jscript" )"
if [ "$record" != "$required" ]; then
log $ERROR "$server does not conform to:" \
"$( echo "$data" | jq -c '.' )"
log $DEBUG "$server does not confirm: $response"
error=1
continue
fi
log $DEBUG "$server conforms: $response"
done
return $error
}
# Test whether the vault server(s) agree about rekey status
#
# The parameter is the quoted string of json data to pass to
# assertRekeyStatus
#
# Returns the normal linux success=0, failure!=0
function assertServerStatus {
local reference="$1"
local pods
local count
pods="$( getVaultPods | awk '{print $2}' )"
count="$( echo $pods | wc -w )"
if [ "$count" -ne "$HA_REPLICAS" ]; then
log $ERROR "server without IP does not conform"
return 1
fi
assertRekeyStatus "/sys/rekey/init" "$reference" $pods
}
# Test whether the vault server(s) agree about rekey validation
# status. Warn when the active vault server changes
#
# The parameter is the quoted string of json data to pass to
# assertRekeyStatus
#
# Returns the normal linux success=0, failure!=0
function assertVerifyStatus {
local reference="$1"
local response
local pods
local result
local count
# first assert the rekey status; /sys/rekey/verify returns
# error if a server does not have rekey in progress
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI response GET $ACTIVE_TARGET /sys/rekey/init
result=$?
if [ "$result" -ne 0 ]; then
return $result
fi
assertServerStatus "$response"
result=$?
if [ $result -ne 0 ]; then
return $result
fi
pods="$( getVaultPods | awk '{print $2}' )"
count="$( echo $pods | wc -w )"
if [ "$count" -ne "$HA_REPLICAS" ]; then
log $ERROR "server without IP does not conform"
return 1
fi
assertRekeyStatus "/sys/rekey/verify" "$reference" $pods
}
# Assert that the /sys/rekey/init endpoint reports no
# rekey procedure in progress on any server
#
# Returns the normal linux success=0, failure!=0
function assertNoRekey {
local data
data='{"nonce": "", "started": false, "progress": 0'
data="$data"', "verification_required": false}'
assertServerStatus "$data"
}
# Retrieve the rekey status from active vault server
# and assert that all server conform to the status
#
# Returns the normal linux success=0, failure!=0
function assertServersConform {
local response
local value
local result
local pods
local count
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI response GET $ACTIVE_TARGET /sys/rekey/init
if [ $? -ne 0 ]; then
# cannot check conformance
log $ERROR "Cannot check server conformance to" \
"/sys/rekey/init"
return 2
fi
assertServerStatus "$response"
result="$?"
if [ "$result" -ne 0 ]; then
return $result
fi
value="$( echo "$response" | jq -r '.verification_nonce' )"
if [ -z "$value" -o "$value" == "null" ]; then
return 0
fi
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI response GET $ACTIVE_TARGET /sys/rekey/verify
if [ $? -ne 0 ]; then
# cannot check conformance
log $ERROR "Cannot check server conformance to" \
"/sys/rekey/verify"
return 2
fi
pods="$( getVaultPods | awk '{print $2}' )"
count="$( echo $pods | wc -w )"
if [ "$count" -ne "$HA_REPLICAS" ]; then
log $ERROR "server without IP does not conform"
return 1
fi
assertRekeyStatus "/sys/rekey/verify" "$response" $pods
}
# This function is used during the pre-rekey assertions
# Testing if the main loop (via PODREC_F) indicates a server
# is not running.
function allServersRunning {
local records
local count
records="$( grep "^/$VAULT_FN" "$PODREC_F" )"
count="$( awk -F/ '{print $2}' <<<"$records" | wc -w )"
if [ "$count" -ne "$HA_REPLICAS" ]; then
return 1
fi
return 0
}
# This function is used during the pre-rekey assertions
# Testing if the main loop (via PODREC_F) indicates a server
# is sealed
function allServersUnsealed {
local records
local count
records="$( grep "^/$VAULT_FN" "$PODREC_F" )"
count="$( grep "/false/" <<<"$records" \
| awk -F/ '{print $2}' | wc -w )"
if [ "$count" -ne "$HA_REPLICAS" ]; then
return 1
fi
return 0
}
# This function is used during the pre-rekey assertions
# Testing if the main loop (via PODREC_F) indicates a server
# omits IP address
function allServersHaveIP {
local records
local count
records="$( grep "^/$VAULT_FN" "$PODREC_F" )"
count="$( echo "$records" | awk -F/ '{print $3}' | wc -w )"
if [ "$count" -ne "$HA_REPLICAS" ]; then
return 1
fi
return 0
}
# Check the vault server pods' metadata label "vault-version",
# and assert that all servers are running the expected version
# which is coded in vault-manager values.yaml server.version
function allServersCurrent {
local jdata
local podcount
local i
local poddata
local name
local version
jdata="$( kubectl get pods -n "$VAULT_NS" -o json )"
podcount="$( echo "$jdata" | jq ".items | length" )"
for i in $( seq 0 $((podcount -1 )) ); do
poddata="$( echo "$jdata" | jq ".items[$i]" )"
name="$( echo "$poddata" | jq -r ".metadata.name" )"
if ! [[ "$name" =~ ^${VAULT_FN}-[0-9]$ ]]; then
# this is not a vault server pod
continue
fi
version="$( echo "$poddata" \
| jq -r '.metadata.labels["vault-version"]' )"
if [ "$version" != "$VAULT_VERSION" ]; then
log $INFO "Vault server pod $name is version $version"
return 1
fi
log $DEBUG "Vault server pod $name is version $version"
done
return 0
}
# Test the status of rekey procedure 'started' during pre-rekey
# tests for procedure progress selection (sharing a single vaultAPI
# call to GET /sys/rekey/init
#
# Return linux true (0) if the status of /sys/rekey/init includes
# started == true
#
# Optional argument --not inverts the logic, but maintains
# error response 2
function assertRekeyStarted {
local started
local not="$1"
# assert that a rekey is in progress
started="$( echo "$REKEY_STATUS_JSON" | jq -r '.started' )"
if [ "$started" == "true" ]; then
started=0
elif [ "$started" != "false" ]; then
# the rekey status is unclear
# an error is probably printed
log $DEBUG "unclear response for /sys/rekey/init:" \
"$( jq -c <<<"$REKEY_STATUS_JSON" )"
return 2
else
started=1
fi
if [ "$started" -eq 0 ]; then
if [ "$not" == "--not" ]; then
return 1
fi
return 0
fi
if [ "$not" == "--not" ]; then
return 0
fi
return 1
}
# Delete the shard secrets with speficied prefix
#
# The secrets are deleting on a single kubectl command
function deleteShardSecrets {
local prefix="$1"
local i
local list=''
for i in $( seq 0 $((KEY_SECRET_SHARES-1)) ); do
if [ -n "$( secretExists "${prefix}-$i" )" ]; then
list="$list ${prefix}-$i"
fi
done
if [ -n "$list" ]; then
deleteSecrets $list
return $?
fi
return 0
}
# Make a copy of the shard secrets with specified prefix
#
# The calling function needs to verify the result
function copyShardSecrets {
local from="$1"
local to="$2"
local i
for i in $( seq 0 $((KEY_SECRET_SHARES-1))); do
get_secret "${from}-$i" \
| set_secret "${to}-$i" /dev/stdin
if [ $? -ne 0 ]; then
# don't try anything else
log $ERROR "Failed to copy ${from}-$i to ${to}-$i"
break
fi
done
}
# Just log the content of cluster-rekey-request again
#
# Keeps track of whether vault-manager has been restarted
# with REKEY_STARTED variable, so that the rekey procedure
# status is documented in log
function rekeyResuming {
if [ "$REKEY_STARTED" -ne 0 ]; then
log $INFO "Resuming rekey:" \
"$( get_secret cluster-rekey-request )"
REKEY_STARTED=0
fi
}
# Return linux true (0) if a rekey is requested and the vault
# server pods are in a stable condition
#
# If the vault servers are not "stable" then the rekey operation
# needs that stability first. vault-manager's main runStateMachine
# will monitor pods and restore unsealed status.
function needsRekey {
local pods
local sealed
local response
local apiversion
# the first milestone to be created is cluster-rekey-request;
# the last milestone to be deleted is cluster-rekey-audit;
# proceed if any exists
secretsExistAny cluster-rekey-request \
cluster-rekey-verified \
cluster-rekey-shuffle \
cluster-rekey-audit
if [ $? -ne 0 ]; then
# rekey is not requested
return 1
fi
# progress the rekey procedure only if the servers are all
# running
if ! allServersRunning; then
log $INFO "Rekey: wait for vault servers to equal" \
"$HA_REPLICAS"
return 1
fi
# progress the rekey procedure only if the servers were
# previously unsealed.
if ! allServersUnsealed; then
log $INFO "Rekey: wait for unsealed vault servers to" \
"equal $HA_REPLICAS"
return 1
fi
# progress the rekey procedure only if the servers all have
# DNS names (IP addresses) provided by k8s
if ! allServersHaveIP; then
log $INFO "Rekey: wait for $HA_REPLICAS vault servers" \
"to have IP addresses"
return 1
fi
# progress a rekey if all server pods are running the expected
# server version
if ! allServersCurrent; then
log $INFO "Rekey: wait for vault servers to be updated" \
"to the current version $VAULT_VERSION"
return 1
fi
# The above four tests are based on output of kubectl get pods
# command. Doublecheck with REST API call to each server
pods="$( getVaultPods | grep "^$VAULT_FN" | awk '{print $2}' )"
for pod in $pods; do
NO_HEADER=true \
API_TMOUT=$QUERY_TMOUT \
vaultAPI response GET ${pod}.$POD_TARGET_BASE /sys/health
if [ $? -ne 0 ]; then
log $ERROR "$pod fails health check during rekey"
return 1
fi
sealed="$( echo "$response" | jq -r '.sealed' )"
if [ "$sealed" != "false" ]; then
log $ERROR "$pod is sealed during rekey"
return 1
fi
apiversion="$( echo "$response" | jq -r '.version' )"
if [ "$apiversion" != "$VAULT_VERSION" ]; then
log $ERROR "$pod is not version $VAULT_VERSION"
return 1
fi
done
assertServersConform
return $?
}
# Return linux true (0) if the current step of the rekey procedure
# is to send initialize request to /sys/rekey/int
#
# Initialize is the first step
#
# Will not begin initialization if there are stale cluster-rekey or
# cluster-key-bk secrets
function needsInitialization {
local progress
local count
local error=0
assertRekeyStarted --not
progress=$?
if [ "$progress" -ne 0 ]; then
return "$progress"
fi
# skip if this represents a recovery path
secretsExistAny cluster-rekey-verified \
cluster-rekey-shuffle \
cluster-rekey-audit
if [ $? -eq 0 ]; then
return 1
fi
# make assertions about the artifacts left behind by previous
# rekey procedure attempts
# assert that there are no stale keys before starting rekey
assertShardSecrets cluster-rekey --nokeys
count=$?
if [ "$count" -ne 0 ]; then
log $ERROR "Stale cluster-rekey secrets ($count) present"
# there was a possibility that vault had cancelled the rekey
# due to active server failure, so fall through to
# rekeyRecovery
return 1
fi
assertShardSecrets cluster-key-bk --nokeys
count=$?
if [ "$count" -ne 0 ]; then
log $ERROR "cluster-key-bk secrets ($count) present"
return 2
fi
return 0
}
# Start the rekey procedure
#
# Send initialize request to /sys/rekey/int
#
# Initialize is the first step
#
# Will not begin initialization if there are stale cluster-rekey or
# cluster-key-bk secrets
function rekeyInitialize {
local shares
local threshold
local verify
local data
local response
local value
log $INFO "Initializing vault rekey"
REKEY_STARTED=0
shares='"secret_shares": '$KEY_SECRET_SHARES
threshold='"secret_threshold": '$KEY_REQUIRED_THRESHOLD
verify='"require_verification": true'
data="{$shares,$threshold,$verify}"
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response POST $ACTIVE_TARGET /sys/rekey/init "$data"
if [ $? -ne 0 ]; then
return 1
fi
value="$( echo "$response" | jq -r ".started" )"
if [ 'false' == "$value" ]; then
log $ERROR "Rekey not started"
return 1
fi
# log the nonce
value="$( echo "$response" | jq -r ".nonce" )"
verify="$( echo "$response" | jq -r ".verification_required" )"
log $INFO "Rekey started: $value" \
"(verification_required==$verify)"
# just a sanity check
if [ 'true' != "$verify" ]; then
log $ERROR "Rekey started without verification_required:" \
"aborting"
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response DELETE $ACTIVE_TARGET /sys/rekey/init
return 1
fi
assertServerStatus "$response"
return $?
}
# The rekey authentication should happen when
# - there is a rekey in progress
# - there is a verification_nonce
#
# Authentication of the rekey request is the second step
#
# Omit rekey verification if:
# - there are existing cluster-rekey secrets
# - Verification is complete: cluster-rekey-verified or any later
# stage is complete
#
# Return linux true (0) if the current stage of rekey
# is to complete the rekey verification
# Return linux true (0) if the current stage of rekey
# is to authentication the rekey request
function needsAuthentication {
local progress
assertRekeyStarted
progress=$?
if [ "$progress" -ne 0 ]; then
return "$progress"
fi
progress="$( echo "$REKEY_STATUS_JSON" \
| jq -r '.verification_nonce' )"
if ! [ -z "$progress" -o "$progress" == "null" ]; then
# There is a rekey in progress with a verification nonce
# pass through to recovery
return 1
fi
# this represents a recovery path
assertShardSecrets cluster-rekey --nokeys
if [ $? -ne 0 ]; then
# There are already cluster-rekey secrets
return 1
fi
# skip if this represents a recovery path
secretsExistAny cluster-rekey-verified \
cluster-rekey-shuffle \
cluster-rekey-audit
if [ $? -eq 0 ]; then
return 1
fi
return $?
}
# Submits a keyshard for the rekey procedure
# Returns 0 on success
# Returns 1 on failure
# Returns KEY_SECRET_SHARES when authentication completes
function rekeySubmitShard {
local nonce="$1"
local index="$2"
local verifyauth="$3"
local prefix="$4"
local shard
local dnonce
local key
local data
local response
local progress
local root_token
local new_doc
if [ -z "$prefix" ]; then
prefix=cluster-key
fi
shard="$( get_secret "${prefix}-$index" | jq -r .keys[0] )"
dnonce='"nonce": "'$nonce'"'
key='"key": "'$shard'"'
data="{$dnonce,$key}"
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response POST $ACTIVE_TARGET /sys/rekey/update "$data"
if [ $? -ne 0 ]; then
return 1
fi
# Check the response for verification_nonce, which
# indicates completion
progress="$( echo "$response" | jq -r '.verification_nonce' )"
if [ -n "$progress" -a "$progress" != 'null' ]; then
log $INFO "Success authenticating:" \
"$((index+1)) of $KEY_REQUIRED_THRESHOLD"
if [ "$verifyauth" == "--verify-auth" ]; then
# delete the rekey and return success
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response DELETE $ACTIVE_TARGET /sys/rekey/init
return "$KEY_SECRET_SHARES"
fi
# Procedure to ensure that the old and new shards are
# secured in k8s secrets. Deletion of old shards will only
# occur when verification is successful.
root_token="$( get_secret cluster-key-root )"
new_doc="$( echo "$response" \
| jq -c '{"keys": .keys,
"keys_base64": .keys_base64,
"root_token": "'"$root_token"'"}' )"
# store the new shards
echo "$response" \
| jq -c '{"keys": .keys, "keys_base64": .keys_base64}' \
| storeVaultInitSecrets cluster-rekey
# check that the secrets match vault's rekey response
echo "$new_doc" | validateSecrets cluster-rekey
if [ $? -ne 0 ]; then
# calling function will abort the rekey
# and any cluster-rekey secrets
log $ERROR "Failed to store and verify shards" \
"after rekey authentication complete"
return 1
fi
# authentication of the rekey request is completed
# successfully
log $INFO "Rekey authentication successful"
return "$KEY_SECRET_SHARES"
fi
# Otherwise verify the response
progress="$( echo "$response" | jq -r '.progress' )"
index="$((index+1))"
if [ "$progress" -ne "$index" ]; then
log $ERROR "Authentication sequence mismatching" \
"($progress, $index)"
return 1
fi
# assert that the servers agree
assertServerStatus "$response"
if [ $? -ne 0 ]; then
log $ERROR "Vault server rekey status fails during" \
"authentication at $index of $KEY_REQUIRED_THRESHOLD"
return 1
fi
log $INFO "Success authenticating:" \
"$index of $KEY_REQUIRED_THRESHOLD"
return 0
}
# Return linux true (0) if the current step of the rekey procedure
# is to authenticate the request
#
# Authentication of the rekey request is the second step
#
function rekeyAuthenticate {
local verifyauth="$1"
local prefix="$2"
local response
local index
local value
local nonce
local progress
local result
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI response GET $ACTIVE_TARGET /sys/rekey/init
if [ $? -ne 0 ]; then
# an error is already printed
return 1
fi
value="$( echo "$response" | jq -r '.started' )"
if [ 'true' != "$value" ]; then
log $ERROR "Rekey authentication, but rekey not in progress"
return 1
fi
nonce="$( echo "$response" | jq -r '.nonce' )"
progress="$( echo "$response" | jq -r '.progress' )"
if ! [[ "$progress" =~ ^[0-9]{1,}$ ]]; then
log $ERROR "Rekey authentication progress not integer:" \
"$response"
return 1
elif [ "$progress" -ge "$KEY_SECRET_SHARES" ]; then
log $ERROR "Rekey authentication progress out of range:" \
"$response"
return 1
fi
if [ "$progress" -ne 0 ]; then
log $WARNING "Continue authenticating rekey at: $progress"
fi
# authenticate and store the new keys
for index in $( seq $progress $((KEY_SECRET_SHARES-1)) ); do
rekeySubmitShard "$nonce" "$index" $verifyauth $prefix
result="$?"
if [ "$result" -eq "$KEY_SECRET_SHARES" ]; then
# start the verify procedure now
if [ "$verifyauth" != "--verify-auth" ]; then
log $INFO "Starting rekey verify"
fi
break
elif [ "$result" -ne 0 ]; then
return $result
fi
done
return 0
}
# The rekey verification should happen when
# - there is a rekey in progress
# - there is a verification_nonce
#
# Omit rekey verification if:
# - there are existing cluster-rekey secrets
# - Verification is complete: cluster-rekey-verified or any later
# stage is complete
#
# Return linux true (0) if the current stage of rekey
# is to complete the rekey verification
function needsVerify {
local progress
assertRekeyStarted
progress=$?
if [ "$progress" -ne 0 ]; then
return "$progress"
fi
progress="$( echo "$REKEY_STATUS_JSON" \
| jq -r '.verification_nonce' )"
if [ -z "$progress" -o "$progress" == "null" ]; then
# There is a rekey in progress, but not with a
# verification nonce
return 1
fi
# Assert that the nonce is UUID-ish
if ! [[ "$progress" =~ ^[a-f0-9-]{36}$ ]]; then
log $ERROR "The verification_nonce is not UUID-ish:" \
"$REKEY_STATUS_JSON"
return 2
fi
assertShardSecrets cluster-rekey
if [ $? -ne 0 ]; then
# this should not happen: verify in progress but no
# cluster-rekey secrets
log $ERROR "rekey verify in progress but no cluster-rekey"
return 1
fi
# skip if this represents a recovery path
secretsExistAny cluster-rekey-verified \
cluster-rekey-shuffle \
cluster-rekey-audit
if [ $? -eq 0 ]; then
return 1
fi
return 0
}
# Submits a keyshard for the rekey verification procedure
# Returns 0 on success
# Returns 1 on failure
# Returns KEY_REQUIRED_THRESHOLD when authentication completes
function rekeyVerifySubmitShard {
local nonce="$1"
local index="$2"
local shard
local dnonce
local key
local data
local response
local progress
shard="$( get_secret cluster-rekey-$index \
| jq -r .keys[0] )"
dnonce='"nonce": "'$nonce'"'
key='"key": "'$shard'"'
data="{$dnonce,$key}"
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response POST $ACTIVE_TARGET \
/sys/rekey/verify "$data"
if [ $? -ne 0 ]; then
# an error is printed
return 1
fi
progress="$( echo "$response" | jq -r ".complete" )"
if [ "$progress" == 'true' ]; then
log $INFO "Success verifying: using new shards"
set_secret cluster-rekey-verified /dev/stdin \
<<<"$( get_secret cluster-rekey-request )"
return $KEY_REQUIRED_THRESHOLD
fi
progress="$( echo "$response" | jq -r ".progress" )"
if [ -z "$progress" -o "$progress" == "null" ]; then
log $ERROR "Expecting rekey verify progress" \
"[$((index+1))] instead of [$progress]"
return 1
fi
# Print the progress of rekey verify.
if [ "$((index+1))" -eq "$progress" ]; then
log $INFO "Success verifying:" \
"$progress of $KEY_REQUIRED_THRESHOLD"
elif [ "$((index+1))" -gt "$progress" ]; then
# A sanity check only
log $WARNING "Verify progress [$progress] less" \
"than expected [$((index+1))]"
else
# A sanity check only
log $WARNING "Verify progress [$progress]" \
"greater than expected [$((index+1))]"
fi
assertVerifyStatus "$response"
if [ $? -ne 0 ]; then
log $ERROR "Vault server verify status fails during" \
"authentication at" \
"$index of $KEY_REQUIRED_THRESHOLD"
return 1
fi
}
# Return linux true (0) if the current step of the rekey procedure
# is to verify shard secrets
#
# This step confirms that vault manager has correctly stored the
# shards received from the vault server. This allows failures of
# the procedure to be recovered:
# - receive the shards from vault
# - store the shards in k8s secrets
# - play the shards back to vault
# - upon successful verification the new shards are effective
#
# Verification of the rekey request is the Third step
#
function rekeyVerify {
local value
local nonce
local progress
local response
local shard
local dnonce
local key
local data
local index
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI response GET $ACTIVE_TARGET /sys/rekey/verify
if [ $? -ne 0 ]; then
# an error is already printed
return 1
fi
value="$( echo "$response" | jq -r '.started' )"
if [ 'true' != "$value" ]; then
log $ERROR "Rekey verify, but rekey not in progress"
return 1
fi
nonce="$( echo "$response" | jq -r '.nonce' )"
progress="$( echo "$response" | jq -r '.progress' )"
if ! [[ "$progress" =~ ^[0-9]{1,}$ ]]; then
log $ERROR "Rekey authentication progress not integer:" \
"$response"
return 1
elif [ "$progress" -ge "$KEY_SECRET_SHARES" ]; then
log $ERROR "Rekey authentication progress out of range:" \
"$response"
return 1
fi
if [ "$progress" -ne 0 ]; then
log $WARNING "Continue verifying rekey at: $progress"
fi
# assert that the servers agree on verify status
assertVerifyStatus "$response"
if [ $? -ne 0 ]; then
return 1
fi
# authenticate the verify procedure
for index in $( seq $progress $((KEY_SECRET_SHARES-1)) ); do
rekeyVerifySubmitShard "$nonce" "$index"
result=$?
if [ "$result" -eq "$KEY_REQUIRED_THRESHOLD" ]; then
# rekeyVerifySubmitShard returns KEY_REQUIRED_THRESHOLD
# when .complete == true was received
return 0
elif [ "$result" -ne 0 ]; then
# any other non-zero result is a failure
return 1
fi
done
log $ERROR "Verify procedure ended without completion"
return 1
}
# The shuffling of keys shards in k8s secrets should happen when
# th cluster-rekey-verified procedure step is completed.
#
# Omit shuffling if:
# - vault server reports rekey in progress (unclear status)
# - shuffling is already complete: cluster-rekey-shuffle or later
# stage is complete
# - there are no cluster-rekey secrets
# - there are cluster-key-bk secrets
#
# Return linux true (0) if the current stage of rekey
# is to complete the swapping of validated shards
function needsShuffle {
local progress
# assert that a rekey is not in progress
assertRekeyStarted --not
progress=$?
if [ "$progress" -ne 0 ]; then
# 1 - maintain the status of rekey in progress
# 2 - api error, try again later
return "$progress"
fi
secretExists cluster-rekey-verified >/dev/null
if [ $? -ne 0 ]; then
# proceeds to next procedure step
return 1
fi
# skip if this represents a recovery path
secretsExistAny cluster-rekey-shuffle \
cluster-rekey-audit
if [ $? -eq 0 ]; then
return 1
fi
assertShardSecrets cluster-rekey
case $? in
0)
# There is no rekey in progress, and there is a set
# of cluster-rekey shards recorded
;;
$KEY_SECRET_SHARES)
# There is no rekey in progress, and there are no
# cluster-rekey shards recorded
return 1
;;
*)
# with cluster-rekey-verified, an incomplete set of
# cluster-rekey indicates partial deletion after copying
# to cluster-key
# will want to audit the cluster-key secrets before
# deleting cluster-rekey
log $WARNING "The number key shard secrets for" \
"cluster-rekey is not complete"
return 1
;;
esac
# otherwise allow rekeyShuffleKeys to be re-entrant to
# the existance of or lack of cluster-key and cluster-key-bk
# cluster-rekey is only deleted when confirmed to be copied to
# cluster-key
return 0
}
# This procedure shuffles the shard secrets from cluster-rekey to
# cluster-key to cluster-bk
#
# The function intends to be resolve failures of the vault manager
# process where it is interrupted abruptly such as with kill -9.
# In combination with needsShuffle it can be re-run until it
# completes the shuffle:
# - cluster-key shards are copied to cluster-key-bk
# - cluster-key shards are delete
# - cluster-rekey is copied to cluster-key
# - cluster-rekey is delete
#
# A subsequent step audits the new keys before deleting the
# cluster-key-bk secrets
function rekeyShuffleKeys {
local key_exists
local rekey_exists
local bk_exists
local key_doc=""
local rekey_doc=""
assertShardSecrets cluster-key
key_exists=$?
assertShardSecrets cluster-rekey
rekey_exists=$?
assertShardSecrets cluster-key-bk
bk_exists=$?
if [ "$key_exists" -eq 0 ]; then
key_doc="$( reconstructInitResponse cluster-key )"
echo "$key_doc" | validateSecrets cluster-key
if [ $? -ne 0 ]; then
log $ERRROR "Failed to read cluster-key"
return 1
fi
fi
if [ "$rekey_exists" -eq 0 ]; then
rekey_doc="$( reconstructInitResponse cluster-rekey )"
echo "$rekey_doc" | validateSecrets cluster-rekey
if [ $? -ne 0 ]; then
log $ERROR "Failed to read cluster-rekey"
return 1
fi
else
# this is recovery path
if [ -n "key_doc" ]; then
log $WARNING "Progress cluster-rekey-shuffle without" \
"cluster-rekey"
set_secret cluster-rekey-shuffle /dev/stdin \
<<<"$( get_secret cluster-rekey-request )"
return
fi
log $ERROR "No cluster-key or cluster-rekey"
return 1
fi
if [ "$bk_exists" -lt "$KEY_SECRET_SHARES" \
-a "$bk_exists" -ne 0 ]; then
# this is a recovery path
# an incomplete copy of cluster-key secrets
if [ -n "$key_doc" ]; then
deleteShardSecrets cluster-key-bk
assertShardSecrets cluster-key-bk
bk_exists=$?
if [ "$bk_exists" -lt "$KEY_SECRET_SHARES" ]; then
log $ERROR "Failed to delete incomplete" \
"cluster-key-bk"
return 1
fi
else
# this shouldn't happen;
# either not both failures is anticipated
log $ERROR "Sanity: incomplete both cluster-key-bk" \
"and missing/incomplete cluster-key secrets"
return 1
fi
fi
if [ "$bk_exists" -eq 0 ]; then
# this is a recovery path
if [ -n "$key_doc" ]; then
# Assert that cluster-key and cluster-key-bk are the
# same
log $INFO "Recovering from pre-existing cluster-key-bk"
echo "$key_doc" | validateSecrets cluster-key-bk
if [ $? -eq 0 ]; then
# cluster-key-bk == cluster-key
deleteShardSecrets cluster-key
assertShardSecrets cluster-key
key_exists=$?
key_doc=""
else
echo "$key_doc" | validateSecrets cluster-rekey
if [ $? -eq 0 ]; then
# Recovering cluster-key == cluster-rekey
log $INFO "Recovering with cluster-key"
deleteShardSecrets cluster-rekey
set_secret cluster-rekey-shuffle /dev/stdin \
<<<"$( get_secret cluster-rekey-request )"
return 0
else
log $ERROR "Three different sets of keys" \
"in k8s secrets"
return 1
fi
fi
fi
# else: there is no cluster-key to backup
else
# this is the normal procedure path
log $INFO "Copying cluster-key secrets to cluster-key-bk"
copyShardSecrets cluster-key cluster-key-bk
echo "$key_doc" | validateSecrets cluster-key-bk
if [ $? -ne 0 ]; then
log $ERROR "Failed to copy cluster-key to cluster-key-bk"
deleteShardSecrets cluster-key-bk
return 1
fi
deleteShardSecrets cluster-key
if [ $? -ne 0 ]; then
log $ERROR "Failed to delete cluster-key secrets"
return 1
fi
assertShardSecrets cluster-key
key_exists=$?
key_doc=""
fi
# cluster-key-bk exists here
# cluster-rekey rekey_doc is valid here
# if cluster-key exists, such as number of secrets less than
# KEY_SECRET_SHARES, then delete them; deleteShardSecrets is a
# no-op if there are none there
deleteShardSecrets cluster-key
if [ $? -ne 0 ]; then
log $ERROR "Failed to delete cluster-key"
return 1
# try again later
fi
log $INFO "Copying cluster-rekey secrets to cluster-key"
copyShardSecrets cluster-rekey cluster-key
echo "$rekey_doc" | validateSecrets cluster-key
if [ $? -ne 0 ]; then
log $ERROR "Failed to copy cluster-rekey to cluster-key"
return 1
fi
deleteShardSecrets cluster-rekey
set_secret cluster-rekey-shuffle /dev/stdin \
<<<"$( get_secret cluster-rekey-request )"
return 0
}
# The audit of cluster-key should happen when these other procedure
# steps are completed:
# - cluster-rekey-verified
# - cluster-rekey-shuffle
#
# Omit audit if:
# - vault server reports rekey in progress (failed previous audit?)
# - audit is already complete: cluster-rekey-audit exists
#
# Return linux true (0) if the current stage of rekey
# is to run the audit
function needsAudit {
local progress
# assert that a rekey is not in progress
assertRekeyStarted --not
progress=$?
if [ "$progress" -ne 0 ]; then
return "$progress"
fi
# Select recovery path with response '3'
secretExists cluster-rekey-audit >/dev/null
if [ $? -eq 0 ]; then
# this path indicates a failure to complete
# finalizeRekey. cluster-rekey-audit is the last
# milestone to be deleted
log $INFO "rekey audit already completed"
return 3
fi
secretExists cluster-rekey-request >/dev/null
if [ $? -ne 0 ]; then
return 1
fi
secretExists cluster-rekey-verified >/dev/null
if [ $? -ne 0 ]; then
return 1
fi
secretExists cluster-rekey-shuffle >/dev/null
if [ $? -ne 0 ]; then
return 1
fi
assertShardSecrets cluster-key
if [ $? -ne 0 ]; then
log $ERROR "rekey audit requested but cluster-keys absent"
return 1
fi
}
# Audit that the active vault server authenticates with the cluster
# keys specified by prefix
#
# Returns 0 on success
# Returns 1 if the audit failes
# Returns 2 if there was a failure unrelated to authentication
function rekeyAudit {
local prefix="$1"
local value
local response
if [ -z "$prefix" ]; then
prefix="cluster-key"
fi
log $INFO "Auditing the shards in $prefix secrets"
assertNoRekey
if [ $? -ne 0 ]; then
log $ERROR "Cannot audit with rekey in progress"
return 2
fi
assertShardSecrets "$prefix"
if [ $? -ne 0 ]; then
log $ERROR "Audit fails with absent $prefix secrets"
return 1
fi
rekeyInitialize
if [ $? -ne 0 ]; then
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response DELETE $ACTIVE_TARGET /sys/rekey/init
return 2
fi
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI response GET $ACTIVE_TARGET /sys/rekey/init
if [ $? -ne 0 ]; then
# There's no reason to believe this one will succeed where
# the other hadn't
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response DELETE $ACTIVE_TARGET /sys/rekey/init
return 2
fi
value="$( echo "$response" | jq -r ".verification_required" )"
if [ "$value" != "true" ]; then
log $ERROR "Audit sanity: verification_required not set:" \
"$response"
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response DELETE $ACTIVE_TARGET /sys/rekey/init
return 1
fi
rekeyAuthenticate --verify-auth "$prefix"
result="$?"
if [ "$result" -eq 0 ]; then
log $INFO "Audit of cluster-key secrets passes"
else
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response DELETE $ACTIVE_TARGET /sys/rekey/init
fi
return $result
}
# clean up the artifacts from rekey procedure
# The audit procedure proves the shards in cluster-key
# secrets will unseal the vault.
#
# If vault-manager is killed during this procedure step it should
# continue to try to delete the artifacts until finally deleting
# cluster-rekey-audit
function finalizeRekey {
local secrettext
secrettext="$( get_secret cluster-rekey-audit )"
log $INFO "removing artifacts of the rekey procedure:" \
"$secrettext"
assertShardSecrets cluster-rekey --nokeys
if [ $? -ne 0 ]; then
log $WARNING "removing cluster-rekey secrets" \
"after audit"
deleteShardSecrets cluster-rekey
fi
deleteShardSecrets cluster-key-bk
deleteSecrets cluster-rekey-verified
deleteSecrets cluster-rekey-shuffle
deleteSecrets cluster-rekey-request
deleteSecrets cluster-rekey-audit
log $INFO "Rekey request complete: $secrettext"
}
# This procedure handle a few cases where the vault active server or
# vault-manager were killed.
#
# - rekey authentication completed by vault-manager was killed
# before the shards could be stored
# - rekey verification may be cancelled by the failure of the active
# vault server
#
function rekeyRecovery {
local key_exists
local rekey_exists
local bk_exists
local verified_exists
local shuffle_exists
local audit_exists
local inprogress
local verifyprogress
log $INFO "Recovering the rekey procedure"
# assert that the vault server are all up and agree
# about the rekey status
allServersRunning \
&& allServersHaveIP \
&& allServersUnsealed \
|| return 1
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI REKEY_STATUS_JSON GET $ACTIVE_TARGET /sys/rekey/init
if [ $? -ne 0 ]; then
# an error is printed
# wait for recovery
REKEY_STATUS_JSON=''
return 1
fi
assertServerStatus "$REKEY_STATUS_JSON"
if [ $? -ne 0 ]; then
# wait for the vault servers to sync
return 1
fi
inprogress="$( echo "$REKEY_STATUS_JSON" | jq -r '.started' )"
verifyprogress="$( echo "$REKEY_STATUS_JSON" \
| jq -r '.verification_nonce' )"
if [ "$inprogress" == "true" ]; then
# If a rekey is in progress, then cancel it
# - an authentication will reinitialize
# - a verification will reinitialtize
# - a rekeyAudit will retry
log $INFO "Cancelling rekey in progress"
NO_HEADER=true \
API_TMOUT=$API_REKEY_OP_TMOUT \
vaultAPI response DELETE $ACTIVE_TARGET /sys/rekey/init
if [ $? -ne 0 ]; then
# retry later
return 1
fi
fi
assertShardSecrets cluster-key
key_exists=$?
assertShardSecrets cluster-rekey
rekey_exists=$?
assertShardSecrets cluster-key-bk
bk_exists=$?
secretExists cluster-rekey-verified >/dev/null
verified_exists=$?
secretExists cluster-rekey-shuffle >/dev/null
shuffle_exists=$?
secretExists cluster-rekey-audit >/dev/null
audit_exists=$?
# review each of the milestones to discern the failure point
if [ "$audit_exists" -eq 0 ]; then
true
# no recovery options here
# pass through
elif [ "$shuffle_exists" -eq 0 ]; then
true
# no recovery options here
# pass through
elif [ "$verified_exists" -eq 0 ]; then
if [ "$rekey_exists" -gt 0 ]; then
if [ "$rekey_exists" -lt "$KEY_SECRET_SHARES" ]; then
# with verified_exists, indicates partial deletion
# of the cluster-rekey secrets after copying to
# cluster-key. Audit the cluster-key secrets before
# deleting rekey
rekeyAudit cluster-key
if [ $? -ne 0 ]; then
log $ERROR "Audit cluster-key fails with a" \
"partial set of cluster-rekey"
return 1
fi
deleteShardSecrets cluster-rekey
fi
# Handle condition where secrets were shuffled but
# vault-manager failed before recording the
# milestone cluster-rekey-shuffle
# auditRekey will double-check that cluster-key is
# in use
set_secret cluster-rekey-shuffle /dev/stdin \
<<<"$( get_secret cluster-rekey-request )"
log $INFO "Continuing rekey procedure with audit" \
"of cluster-key"
return 0
fi
# else: pass through
else
if [ "$rekey_exists" -eq 0 ]; then
# Handle condition where an active server fails during
# verification: vault may have cancelled the rekey procedure
# This question is: which shards are the vault servers
# using?
log $INFO "Recovering from mismatch of cluster-rekey" \
"and verified status"
# Audit the existing shards to see which ones the
# vault servers are keyed for.
# Most likely that the verification failed due to
# active server failing, start with cluster-key
rekeyAudit cluster-key
if [ $? -eq 0 ]; then
# The rekey verification did not complete
# remove cluster-rekey secrets
# The rekey procedure should restart
deleteShardSecrets cluster-rekey
log $INFO "Restart rekey procedure"
return 0
fi
# this happens when vault-manager process is killed
rekeyAudit cluster-rekey
if [ $? -eq 0 ]; then
set_secret cluster-rekey-verified /dev/null \
<<<$( get_secret cluster-rekey-request )
log $INFO "Continue rekey procedure with cluster-rekey"
return 0
fi
# else: pass through
elif [ "$rekey_exists" -eq 5 ]; then
# There are no cluster-rekey secrets; and the rekey is
# cancelled: the rekey procedure will restart
log $INFO "Continue rekey procedure with initialization"
return 0
else # cluster-rekey secrets are incomplete
# Handle condition where verification is needed but
# vault-manager did not store shards. The rekey was
# canceled above
# assert cluster-key before deleteing rekey
rekeyAudit cluster-key
if [ $? -eq 0 ]; then
# the rekey procedure will restart
log $INFO "Deleting partial set of" \
"cluster-rekey secrets"
deleteShardSecrets cluster-rekey
return 0
fi
# else: pass through
fi
fi
log $ERROR "Did not recover from current rekey status"
}
# The state machine for rekeying the vault server
#
# The overall procedure for rekey request includes:
# - wait for stability of vault servers
# - initialize the procedure
# - authenticate the rekey procedure by supplying shards
# - store the new shards
# - verify the rekey with the new shards read from k8s secrets
# - rotate the shard secrets:
# cluster-rekey - cluster-key - cluster-key-bk
# - Audit the new shards with active vault server
# - Remove artifacts of rekey procedure:
# cluster-key-bk, milestone secrets
#
function vaultRekey {
local records
local count
local result
local secrettext
if ! needsRekey; then
return
fi
# Retrieve and record the rekey status once for the tests that
# follow
NO_HEADER=true \
API_TMOUT=$API_REKEY_QUERY_TMOUT \
vaultAPI REKEY_STATUS_JSON GET $ACTIVE_TARGET /sys/rekey/init
if [ $? -ne 0 ]; then
# an error is printed
REKEY_STATUS_JSON=''
return
fi
needsAudit
case $? in
0)
rekeyResuming
rekeyAudit
if [ $? -eq 0 ]; then
set_secret cluster-rekey-audit /dev/stdin \
<<<$( get_secret cluster-rekey-request )
finalizeRekey
fi
return
;;
1) # continue to procedure step
;;
3) # audit is already completed
secretExists cluster-rekey-audit >/dev/null
if [ $? -eq 0 ]; then
# the cluster-key secrets were audit, but vault
# manager didn't get a chance to set
# cluster-rekey-audit milestone
finalizeRekey
return
fi
log $ERROR "Discrepancy between needsAudit and" \
"rekeyVault"
return
;;
*)
# an error occurs for which the procedure should not
# continue
return
;;
esac
needsShuffle
case $? in
0)
rekeyResuming
rekeyShuffleKeys
return
;;
1) # continue to procedure step
;;
*)
# an error occurs for which the procedure should not
# continue
return
;;
esac
needsVerify
case $? in
0)
rekeyResuming
rekeyVerify
return
;;
1) # continue to procedure step
;;
*)
# an error occurs for which the procedure should not
# continue
return
;;
esac
needsAuthentication
case $? in
0)
rekeyResuming
rekeyAuthenticate
return
;;
1) # continue to procedure step
;;
*)
# an error occurs for which the procedure should not
# continue
return
;;
esac
needsInitialization
case $? in
0)
secrettext="$( get_secret cluster-rekey-request )"
log $INFO "Rekey request started: $secrettext"
rekeyInitialize
return
;;
1) # continue to failure
;;
*)
# an error occurs for which the procedure should not
# continue
return
;;
esac
# falling through the case statements requires remediation
rekeyResuming
rekeyRecovery
}
# Return 0 (true) if either the vault server status shows a rekey
# is in progress, or if vault-manager is engaged in the process of
# rekeying the vault
#
# Vault manager rekey is in progress if either of these secrets
# exists:
# cluster-rekey-request - the first to be created
# cluster-rekey-audit - the last to be removed
function rekeyInProgress {
# query the vault server
assertNoRekey
if [ $? -ne 0 ]; then
return 0
fi
# look for vault-manager's milestone secrets
secretsExistAny cluster-rekey-request cluster-rekey-audit
return $?
}
# Check conditions that need to be met before taking a snapshot of
# the vault. The same conditions apply for snapshot restore.
#
# The required conditions are:
# - vault server pods matches HA_REPLICAS
# - vault server pods are unsealed
# - there is no rekey in progress
#
# Returns 0 for success, or >0 for conditions not met
# The fail conditions are logged to stdout/stderr
function snapshotPreCheck {
local errors=0
local pods
local podcount
local host
local dnsname
local server_status
local sealed
pods="$( getVaultPods | grep "^$VAULT_FN" )"
podcount="$( echo "$pods" | awk '{print $1}' | wc -w )"
if [ "$podcount" -ne "$HA_REPLICAS" ]; then
log $ERROR "snapshotPreCheck: vault pods ($podcount)" \
"does not match replicas ($HA_REPLICAS)"
errors=$(( errors + 1 ))
fi
while read host dnsname; do
NO_HEADER=true \
API_TMOUT=$QUERY_TMOUT \
vaultAPI server_status GET $dnsname.$POD_TARGET_BASE \
/sys/health
sealed="$( echo "$server_status" | jq .sealed )"
if [ "$sealed" != "false" ]; then
log $ERROR "snapshotPreCheck: $host ($dnsname)" \
"sealed status is [$sealed]"
errors=$(( errors + 1 ))
else
log $DEBUG "snapshotPreCheck: $host ($dnsname)" \
"sealed status is [$sealed]"
fi
done <<<"$pods"
if rekeyInProgress; then
log $ERROR "snapshotPreCheck: a rekey is in progress"
errors=$(( errors + 1 ))
fi
return $errors
}
# Take a snapshot of the vault, which is output to stdout
function snapshotCreate {
local apipath=/sys/storage/raft/snapshot
curl -s -S --cacert "$CERT" \
--connect-timeout $QUERY_TMOUT \
--header "X-Vault-Token:$( get_secret cluster-key-root )" \
--request "GET" \
"https://$ACTIVE_TARGET:${TARGET_PORT}/v1${apipath}"
}
# Store the init response and metadata associated with a vault
# snapshot into the specified k8s secret.
#
# metadata should be a dictionary type structure in this form:
# {"date":"xxx","snapshot_sum":"yyy","secret":"zzz"}
#
# The 'snapshot' of the init response should be taken promptly with
# the snapshot of the vault. Especially, consider pausing vault
# manager, in addition to using snapshotPreCheck, to ensure the
# two are consistent.
#
# In practice the metadata can contain any information; the
# procedure only requires the value of 'secret', as in:
# echo "$metadata" | jq -r .secret
function snapshotSetSecret {
local secret="$1"
local metadata="$2"
local jqlog
local result
local keys
local data
# make sure the user supplied data is ok
jqlog="$( echo "$metadata" | jq . 2>&1 >/dev/null )"
result=$?
if [ $result -ne 0 ]; then
log $ERROR "snapshotSetSecret: error parsing metadata:" \
"[$result] [$jqlog]"
return 1
fi
# check that the user supplied metadata contains 'secret',
# which is the only value the procedure requires.
jqlog="$( echo "$metadata" | jq -r .secret 2>&1 )"
if [ $? -ne 0 -o -z "$jqlog" -o "$jqlog" == "null" ]; then
log $WARNING "snapshotSetSecret: metadata omits 'secret'"
fi
keys="$( reconstructInitResponse cluster-key )"
data="{\"metadata\":$metadata,\"init\":$keys}"
# make sure the assembled secret data is ok
echo "$data" | jq . >/dev/null 2>&1
result=$?
if [ $result -ne 0 ]; then
log $ERROR "snapshotSetSecret: error parsing secret data:" \
"[$result]"
return 1
fi
echo "$data" | jq -c . | set_secret "$secret" /dev/stdin
# verify the copy of shards secrets
get_secret "$secret" | jq -c .init | validateSecrets cluster-key
if [ $? -ne 0 ]; then
return 1
fi
return 0
}
# POST stdin to the active vault server API endpoint for restoring
# the snapshot. stdin is the snapshot file of the vault cluster.
#
# The required parameter is the metadata associated with the
# snapshot, which contains the name of the k8s secret which has
# the unseal shards for the vault data being restored. The metadata
# needs to contain at least '{"secret":"xxx"}', and this secret
# needs to exist in the vault namespace.
#
# The content of the secret will be used to restore the unseal
# shards for the vault that is being restored.
function snapshotRestore {
local metadata="$1"
local secret
local logs
local result
local initdata
local apipath="/sys/storage/raft/snapshot-force"
# check that the associated secret exists
secret="$( echo "$metadata" | jq -r .secret 2>/dev/null )"
if [ -z "$secret" -o "$secret" == "null" ]; then
log $ERROR "Metadata omits the k8s secret associated with" \
"the snapshot"
return 1
fi
secretExists "$secret" >/dev/null
if [ $? -ne 0 ]; then
log $ERROR "K8s secret [$secret] associated with the" \
"snapshot does not exist"
return 1
fi
# check the init response associated with the snapshot
initdata="$( get_secret "$secret" | jq -c .init 2>/dev/null )"
if [ -z "$initdata" -o "$initdata" == 'null' ]; then
log $ERROR "Failed to retrieve init response from" \
"k8s secret [$secret]"
return 1
fi
# The snapshot API success does not give a response. On vault
# API error the return code is also 0. If there is a log, then
# there was an error.
logs="$( curl -s -S --cacert "$CERT" \
--connect-timeout $QUERY_TMOUT \
--header "X-Vault-Token:$( get_secret cluster-key-root )" \
--request POST \
--data-binary @/dev/stdin \
"https://$ACTIVE_TARGET:${TARGET_PORT}/v1${apipath}" 2>&1 )"
result=$?
log $INFO "Snapshot restore API response: $result"
if [ "$result" -ne 0 -o -n "$logs" ]; then
log $ERROR "Snapshot restore: [$logs]"
return 1
fi
# Restore the secrets associated with the snapshot
# We're done if the secrets haven't changed.
echo "$initdata" | validateSecrets cluster-key
if [ $? -eq 0 ]; then
return 0
fi
# replace vault's init response in k8s secrets
deleteShardSecrets cluster-key
deleteSecrets cluster-key-root
echo "$initdata" | storeVaultInitSecrets cluster-key
# finally, verify the storage was successful
echo "$initdata" | validateSecrets cluster-key
return $?
}
# function that calls exit_on_trap for every second of sleep
# takes total sleep time as parameter
function trap_sleep {
local sleep_time="$1"
for i in $(seq 1 $sleep_time); do
sleep 1
exit_on_trap 22
done
}
#
# LOGIC
#
if [[ "${BASH_SOURCE[0]}" != "${0}" ]]; then
# This script was sourced
return 0
fi
health_excuse_create "$HEALTH_EXCUSE_INIT" "$HC_MSG_INIT"
if [ -n "$EARLY_PAUSE" ]; then
echo -n "$EARLY_PAUSE" > $PAUSEFILE
fi
exit_on_trap 1
# Match kubectl version to server version (or etc)
pickK8sVersion
# check if this pod is helping to convert storage from pvc to k8s
# secrets
mountHelper
exit_on_trap 15
# check if there are existing key shard secrets, boot strap secret,
# or pre-existing resource
K8S_SECRETS_PREEXIST="$( secretExists cluster-key-root )"
exit_on_trap 16
BOOTSTRAP_PREEXISTS="$( secretExists cluster-key-bootstrap )"
exit_on_trap 17
PVC_PREEXISTS="$( pvcRemoved )"
exit_on_trap 18
runConversion
exit_on_trap 19
# check if PVC still persisted after conversion, and if so issue a warning.
PVC_PREEXISTS="$( pvcRemoved )"
PVC_STATUS=$?
if [ $PVC_STATUS -eq 1 ]; then
log $DEBUG "PVC storage $PVC_PREEXISTS is currently terminating"
elif [ $PVC_STATUS -eq 2 ]; then
log $WARNING "PVC storage $PVC_PREEXISTS deletion has failed during conversion"
fi
# Waiting for at least one vault server, to check initialization
waitForPods 1
exit_on_trap 2
log $DEBUG "Putting a list of vault pods and ip in $WORKDIR/pods.txt"
getVaultPods > $WORKDIR/pods.txt
exit_on_trap 3
vaultInitialized
IS_VAULT_INITIALIZED=$?
if [ $IS_VAULT_INITIALIZED -eq 1 ]; then
exit_on_trap 4
desired_pods=$HA_REPLICAS
# Waiting for vault servers to come up
waitForPods $desired_pods
exit_on_trap 5
log $INFO "Putting a list of vault pods and IPs in $WORKDIR/pods.txt"
getVaultPods > $WORKDIR/pods.txt
exit_on_trap 6
log $DEBUG "Initializing the vault on vault-0 and" \
"storing keys in k8s secrets"
initVault
#Some sleep required to allow convergence"
sleep "$INIT_CONVERGE_TIME"
log $DEBUG "Unsealing vault-0 using the init shards"
for row in $(awk 'NR==1{print $2}' $WORKDIR/pods.txt); do
unsealVault "$row"
done
log $DEBUG "Joining other vault servers to the HA Raft cluster"
for row in $(awk 'NR>1{print $2}' $WORKDIR/pods.txt); do
log $DEBUG "$( grep $row $WORKDIR/pods.txt )"
joinRaft "$row"
sleep "$JOIN_RATE"
done
exit_on_trap 7
log $INFO "Unsealing the remaining vaults"
for row in $(awk 'NR>1{print $2}' $WORKDIR/pods.txt); do
log $DEBUG "$( grep $row $WORKDIR/pods.txt )"
unsealVault "$row"
sleep "$UNSEAL_RATE"
exit_on_trap 8
done
else
log $INFO "Vault is initialized"
fi
exit_on_trap 9
# initialize the state machine - vault server status records
echo "" > "$PODREC_F"
while read host dns_name; do
if [ -z "$host" ]; then
continue
fi
status_rec="/$host/$dns_name//"
echo "$status_rec" >> "$PODREC_F"
done <$WORKDIR/pods.txt
health_excuse_remove "$HEALTH_EXCUSE_INIT"
# Loop forever to check the seal status of vaults and
# unseal if required
log $INFO "Checking vault pods seal status in perpetuity..."
while true; do
exit_on_trap 10
trap_sleep "$STATUS_RATE"
exit_on_trap 20
pickK8sVersion # check if the k8s server version is changed
count=$( kubectl get pods -n "${VAULT_NS}" \
-o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' \
| grep "^${VAULT_FN}-manager" | wc -w )
if [ "$count" -gt 1 ]; then
log $ERROR "Multiple instances of vault manager detected. Waiting until one left"
exit_on_trap 21
continue
fi
rm $WORKDIR/pods.txt
echo "" > "$PODREC_TMP_F"
exit_on_trap 11
getVaultPods > $WORKDIR/pods.txt
exit_on_trap 12
while read host dnsname; do
if [ -z "$dnsname" ]; then
# probably a recovering pod waiting for an IP address
log $DEBUG "pod list has empty data: [$host] [$dnsname]"
continue
fi
NO_HEADER=true \
API_TMOUT=$QUERY_TMOUT \
vaultAPI server_status GET $dnsname.$POD_TARGET_BASE \
/sys/health
echo -n "$server_status" > $WORKDIR/healthcheck.txt
TEMP=$( echo "$server_status" | jq -r .sealed )
exit_on_trap 13
# Decide when to unseal the vault server; includes
# Adding records to new_pods_status.txt
runStateMachine "$host" "$dnsname" "$TEMP"
exit_on_trap 14
done <$WORKDIR/pods.txt
mv "$PODREC_TMP_F" "$PODREC_F"
vaultRekey
done
kind: ConfigMap
metadata:
managedFields:
- apiVersion: v1
fieldsType: FieldsV1
fieldsV1:
f:data:
.: {}
f:init.sh: {}
manager: vault-init-unseal
name: vault-init-unseal-3
namespace: {{ .Release.Namespace }}
---
apiVersion: v1
kind: ConfigMap
metadata:
managedFields:
- apiVersion: v1
fieldsType: FieldsV1
fieldsV1:
f:data:
.: {}
f:pvc-attach.yaml: {}
manager: {{ .Values.vault.name }}-mount-helper
name: {{ .Values.vault.name }}-mount-helper
namespace: {{ .Release.Namespace }}
data:
pvc-attach.yaml: |
---
apiVersion: batch/v1
kind: Job
metadata:
name: {{ .Values.vault.fullname }}-mount-helper
namespace: vault
spec:
activeDeadlineSeconds: 600
completions: 1
parallelism: 1
ttlSecondsAfterFinished: 0
template:
spec:
restartPolicy: Never
serviceAccountName: "{{ .Values.vault.fullname }}-manager-1"
{{- if .Values.manager.imagePullSecrets }}
imagePullSecrets:
{{- toYaml .Values.manager.imagePullSecrets | nindent 12 }}
{{- end }}
{{- if .Values.manager.tolerations }}
tolerations:
{{- tpl .Values.manager.tolerations . | nindent 12 }}
{{- end }}
securityContext:
runAsUser: 0
runAsGroup: 0
containers:
- name: mount
image: "{{ .Values.manager.image.repository }}:{{ .Values.manager.image.tag }}"
imagePullPolicy: "{{ .Values.manager.image.pullPolicy }}"
args:
- bash
- /opt/script/init.sh
env:
- name: MANAGER_MODE
value: MOUNT_HELPER
- name: PVC_DIR
value: /mnt/data
volumeMounts:
- name: mount-helper
mountPath: /opt/script
readOnly: true
- name: manager-pvc
mountPath: /mnt/data
readOnly: false
volumes:
- name: mount-helper
configMap:
name: vault-init-unseal-3
- name: manager-pvc
persistentVolumeClaim:
claimName: manager-pvc-sva-vault-manager-0
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
namespace: {{ .Release.Namespace }}
name: {{ .Values.vault.fullname }}-manager-1
rules:
- apiGroups: [""] # "" indicates the core API group
resources: ["pods"]
verbs: ["get", "watch", "list"]
- apiGroups: [""] # "" indicates the core API group
resources: ["pods/exec"]
verbs: ["create"]
- apiGroups: [""] # "" indicates the core API group
resources: ["secrets"]
verbs: ["get", "create", "delete"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["get", "create", "delete"]
- apiGroups: [""] # "" indicates the core API group
resources: ["persistentvolumeclaims"]
verbs: ["list", "delete"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ .Values.vault.fullname }}-manager-1
namespace: {{ .Release.Namespace }}
labels:
helm.sh/chart: {{ .Values.manager.chart }}
app.kubernetes.io/name: {{ .Values.vault.name }}-manager
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ .Values.vault.fullname }}-manager-1
namespace: {{ .Release.Namespace }}
subjects:
- kind: ServiceAccount
name: {{ .Values.vault.fullname }}-manager-1
roleRef:
kind: Role
name: {{ .Values.vault.fullname }}-manager-1
apiGroup: rbac.authorization.k8s.io
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: {{ .Values.vault.fullname }}-manager-3
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: {{ .Values.vault.name }}-manager
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
component: webhook
spec:
serviceName: {{ .Values.vault.fullname }}
replicas: 1
selector:
matchLabels:
app.kubernetes.io/instance: {{ .Release.Name }}
component: webhook
template:
metadata:
labels:
app.kubernetes.io/name: {{ .Values.vault.name }}-manager
app.kubernetes.io/instance: {{ .Release.Name }}
component: webhook
{{- if .Values.manager.extraLabels }}
{{- toYaml .Values.manager.extraLabels | nindent 8 -}}
{{- end }}
spec:
serviceAccountName: "{{ .Values.vault.fullname }}-manager-1"
{{- if .Values.manager.imagePullSecrets }}
imagePullSecrets:
{{- toYaml .Values.manager.imagePullSecrets | nindent 8 }}
{{- end }}
{{- if .Values.manager.tolerations }}
tolerations:
{{- tpl .Values.manager.tolerations . | nindent 8 }}
{{- end }}
containers:
- name: manager
image: "{{ .Values.manager.image.repository }}:{{ .Values.manager.image.tag }}"
imagePullPolicy: "{{ .Values.manager.image.pullPolicy }}"
args:
- bash
- /opt/script/init.sh
env:
- name: CA_CERT
value: /mnt/data/ca/tls.crt
livenessProbe:
exec:
command:
- bash
- -c
- "source /opt/script/init.sh; health_check"
initialDelaySeconds: {{ .Values.manager.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.manager.livenessProbe.periodSeconds }}
timeoutSeconds: {{ .Values.manager.livenessProbe.timeoutSeconds }}
successThreshold: {{ .Values.manager.livenessProbe.successThreshold }}
failureThreshold: {{ .Values.manager.livenessProbe.failureThreshold }}
terminationGracePeriodSeconds: {{ .Values.manager.livenessProbe.terminationGracePeriodSeconds }}
volumeMounts:
- name: vault-init-unseal-3
mountPath: /opt/script
readOnly: false
- name: mount-helper-yaml
mountPath: /opt/yaml
readOnly: true
- name: vault-ca
mountPath: /mnt/data/ca
readOnly: true
volumes:
- name: vault-init-unseal-3
configMap:
name: vault-init-unseal-3
- name: mount-helper-yaml
configMap:
name: {{ .Values.vault.name }}-mount-helper
- name: vault-ca
secret:
secretName: vault-ca