Add Openabo Backup and Restore

Adding backup and restore for openbao. As openbao is a fork of hashicorp
vault, it uses identical procedure to make backup and restore itself.
The process used here is same as the one used for hashicorp vault. The
code is copied over to keep the code separated from the vault version.
This is to keep the work simple once vault backup and restore is
removed.

Test Plan:
PASS    Openbao standalone backup is successful without errors
PASS    Openbao platform backup is successful without errors
PASS    Openbao standalone restore is successful without errors

Story: 2011244
Task: 51419

Change-Id: I49caf7f300563d511a5f609b40defd79420f110c
Signed-off-by: Tae Park <tae.park@windriver.com>
This commit is contained in:
Tae Park 2024-11-27 15:03:41 -05:00
parent 1a2d069055
commit 71d19daef9
10 changed files with 1382 additions and 3 deletions

View File

@ -70,16 +70,18 @@ backup_encryption_enabled: false
backup_encryption_passphrase: ""
# A list of identifiers indicating which backup files to encrypt:
# [platform, openstack, user_images, dc_vault, registry, hc_vault]
# [platform, openstack, user_images, dc_vault, registry, hc_vault, openbao]
backup_encyption_include:
- platform
- hc_vault
- openbao
# Internal boolean variables for encryption to simplify logic. These
# will be adjusted later when the overriden parameters above are
# considered.
platform_tarball_encrypted: false
hc_vault_tarball_encrypted: false
openbao_tarball_encrypted: false
# The platform backup tarball will be named in this format:
# <platform_backup_filename_prefix>_<timestamp>.tgz
@ -126,10 +128,22 @@ dc_vault_backup_filename_prefix: "{{ inventory_hostname }}_dc_vault_backup"
backup_hc_vault: false
# The hashicorp vault backup tarball will be named in this format:
# <dc_vault_backup_filename_prefix>_<timestamp>.tgz
# <hc_vault_backup_filename_prefix>_<timestamp>.tgz
#
hc_vault_backup_filename_prefix: "{{ inventory_hostname }}_hc_vault_backup"
# This is the default value for including openbao into the platform backup process.
# This value can be overridden by the user when calling for platform backup playbook,
# to include or not include the openbao backup.
# If the openbao application is either uploaded only or non-existent,
# the backup process will be omitted regardless of what this value is.
backup_openbao: false
# The openbao backup tarball will be named in this format:
# <openbao_backup_filename_prefix>_<timestamp>.tgz
#
openbao_backup_filename_prefix: "{{ inventory_hostname }}_openbao_backup"
restore_cinder_glance_data: false
# Default directory where the system backup tarballs fetched from the

View File

@ -0,0 +1,32 @@
---
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# requires one variable passed:
# initial_backup_dir = The resulting backup package will be found here.
- hosts: all
gather_facts: no
# Specify defaults including:
# backup_encryption_enabled
# backup_encryption_passphrase
vars_files:
- host_vars/backup-restore/default.yml
vars:
password_change: false
openbao_encrypt: "{{ backup_encryption_enabled|bool }}"
encrypt_openbao_secret: "{{ backup_encryption_passphrase | default('') }}"
openbao_mode: "backup"
op_mode: "standalone"
roles:
- role: common/prepare-env
- role: openbao/prepare_env
become: yes
- role: openbao/openbao_backup
become: yes

View File

@ -0,0 +1,31 @@
---
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# requires two variables passed:
# initial_backup_dir = the directory where the openbao backup package will be found
# backup_filename = filename for openbao backup package
- hosts: all
gather_facts: no
# Specify defaults including:
# backup_encryption_enabled
# backup_encryption_passphrase
vars_files:
- host_vars/backup-restore/default.yml
vars:
password_change: false
openbao_encrypt: "{{ backup_encryption_enabled|bool }}"
encrypt_openbao_secret: "{{ backup_encryption_passphrase | default('') }}"
openbao_mode: "restore"
op_mode: "standalone"
roles:
- role: common/prepare-env
- role: openbao/prepare_env
- role: openbao/openbao_restore

View File

@ -1,6 +1,6 @@
---
#
# Copyright (c) 2019-2024 Wind River Systems, Inc.
# Copyright (c) 2019-2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -18,6 +18,8 @@
should_use_old_image_backup: "{{ backup_user_images|bool == true }}"
include_hc_vault: "{{ backup_hc_vault | bool }}"
omit_hc_vault: false
include_openbao: "{{ backup_openbao | bool }}"
omit_openbao: false
# The feature is enabled by backup_encryption_enabled variable.
# The backup tarballs are included in the feature by
@ -45,6 +47,11 @@
set_fact:
hc_vault_tarball_encrypted: true
when: '"hc_vault" in backup_encyption_include'
- name: Set Openbao tar encryption enabled
set_fact:
openbao_tarball_encrypted: true
when: '"openbao" in backup_encyption_include'
when: backup_encryption_enabled|bool
- name: Do StarlingX backup
@ -148,6 +155,61 @@
when: vault_system_health.rc != 0
when: include_hc_vault | bool
- name: Check openbao status
block:
- name: Check if openbao is applied
shell: |
source /etc/platform/openrc
system application-show openbao --format value --column status
register: openbao_applied_exists
- name: Omit openbao if status is empty or uploaded
set_fact:
include_openbao: false
omit_openbao: true
when: >-
openbao_applied_exists.stdout | length == 0 or
openbao_applied_exists.stdout == "uploaded"
- name: Fail openbao if status is not applied
fail:
msg: "Openbao application is {{ openbao_applied_exists.stdout }}, not applied."
when: openbao_applied_exists.stdout != "applied"
when: include_openbao | bool
- name: Indicate if openbao is omitted from status check
debug:
msg: "Openbao backup will be omitted because openbao application is not applied."
when: omit_openbao | bool
- name: Openbao precheck
block:
- name: Find openbao manager pod
shell: >-
kubectl get pods -n openbao | grep "openbao-manager" | cut -d " " -f 1
register: openbao_manager_pod_name
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
- name: Fail if openbao manager pod is not found
fail:
msg: "Openbao manager pod is not found"
when: openbao_manager_pod_name.stdout | length == 0
- name: Check openbao system health
shell: >-
kubectl exec -n "openbao" "{{ openbao_manager_pod_name.stdout }}" --
bash -c "source /opt/script/init.sh; snapshotPreCheck" 2>&1
register: openbao_system_health
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
- name: Fail if openbao health check returns error
fail:
msg: "Openbao system health check returned error"
when: openbao_system_health.rc != 0
when: include_openbao | bool
- name: Send application lifecycle notifications for pre-backup semantic check
command: /usr/bin/sysinv-utils notify backup-semantic-check
register: backup_semantic_check_notification_result
@ -234,6 +296,13 @@
register: hc_vault_dir
when: include_hc_vault | bool
- name: Create openbao temp dir
file:
path: "{{ tempdir.path }}/openbao_dir"
state: directory
register: openbao_dir
when: include_openbao | bool
- name: Backup roles, table spaces and schemas for databases.
shell: >-
sudo -u postgres pg_dumpall
@ -649,6 +718,33 @@
when: hc_vault_backup_result.matched != 2
when: include_hc_vault | bool
# Openbao snapshot should be taken before the backup of etcd database.
# A k8s secret is created that is associated with the snapshot.
- name: Run Openbao backup
block:
- name: Include openbao backup role
include_role:
name: openbao/openbao_backup
vars:
openbao_backup_dir: "{{ openbao_dir.path }}"
openbao_encrypt: "{{ openbao_tarball_encrypted|bool }}"
encrypt_openbao_secret: "{{ backup_encryption_passphrase }}"
op_mode: "platform"
- name: Find result files
find:
paths: "{{ openbao_dir.path }}"
patterns: "openbao-snapshot-*.tar*"
register: openbao_backup_result
- name: Fail if incorrect number of file created from openbao backup
fail:
msg: >
There was an error with the openbao backup process.
Incorrect number of files produced.
when: openbao_backup_result.matched != 2
when: include_openbao | bool
- name: Create etcd snapshot temp dir
file:
path: "{{ tempdir.path }}/etcd-snapshot"
@ -741,6 +837,7 @@
openstack_backup_file: "{{ openstack_backup_filename_prefix }}_{{ backup_timestamp }}.tgz"
dc_vault_backup_file: "{{ dc_vault_backup_filename_prefix }}_{{ backup_timestamp }}.tgz"
hc_vault_backup_file: "{{ hc_vault_backup_filename_prefix }}_{{ backup_timestamp }}.tgz"
openbao_backup_file: "{{ openbao_backup_filename_prefix }}_{{ backup_timestamp }}.tgz"
- name: Set backup files absolute path
set_fact:
@ -749,6 +846,7 @@
openstack_backup_file_path: "{{ backup_dir }}/{{ openstack_backup_file }}"
dc_vault_backup_file_path: "{{ backup_dir }}/{{ dc_vault_backup_file }}"
hc_vault_backup_file_path: "{{ backup_dir }}/{{ hc_vault_backup_file }}"
openbao_backup_file_path: "{{ backup_dir }}/{{ openbao_backup_file }}"
- name: Save user uploaded images from local registry to an archive
include_tasks: export-user-local-registry-images.yml
@ -866,6 +964,24 @@
failed_when: tar_cmd.rc >= 2 or tar_cmd.rc < 0
when: include_hc_vault | bool
- name: Create a tgz archive for openbao backup
shell: >-
tar
--use-compress-program={{ compress_program }}
--exclude {{ exclude_targets | map('regex_replace', '^/', '')
| list | join(' --exclude ') }}
-cf {{ openbao_backup_file_path }}
$(ls -d
{{ openbao_dir.path }}
2> /dev/null)
args:
warn: false
# Changing the failed_when behavior to prevent the backup to fail on "file changed as we read it", which
# makes tar return 1
register: tar_cmd
failed_when: tar_cmd.rc >= 2 or tar_cmd.rc < 0
when: include_openbao | bool
- name: Set default backup files absolute path
set_fact:
platform_backup_file_path_final: "{{ platform_backup_file_path }}"
@ -924,6 +1040,14 @@
when: include_hc_vault | bool
no_log: true
- name: Transfer for openbao backup tar files to the local machine if it exists
fetch:
src: "{{ openbao_backup_file_path }}"
dest: "{{ host_backup_dir }}/"
flat: yes
when: include_openbao | bool
no_log: true
# TODO transfer docker image archive which may be very big during remote play.
# Fetch module fills the memory and has a very slow transfer rate due to base64 encoding
# Maybe use synchronize module after upgrading ansible, backup-restore/transfer-file

View File

@ -0,0 +1,264 @@
#!/bin/bash
# Script to take a snapshot of the openbao
###
# Globals
#
NAME="$( basename $0 )"
KUBECMD="kubectl"
SCRIPT="source /opt/script/init.sh"
MAXATTEMPTS=10
GPGSLEEP=6
K8S_SECRET_PREFIX="snapshot-metadata"
OPENBAO_NS="openbao"
MANAGER_PREFIX="stx-openbao-manager"
# get openbao manager pod
JSONPATH='{range .items[*]}{.metadata.name}{"\n"}{end}'
POD="$( $KUBECMD get pods -n "$OPENBAO_NS" -o jsonpath="$JSONPATH" \
| grep "^$MANAGER_PREFIX" )"
if [ -z "$POD" ]; then
echo "Openbao manager not found" >&2
exit 1
fi
###
# Functions
#
function usage {
echo -e "Usage: \n" \
"\n" \
"$NAME <output_dir> [--encrypt <variable> ]\n" \
"\n" \
"All parameters are positional:\n" \
" output_dir: required, location to output snapshot tarball\n" \
" --encrypt: optional\n" \
" variable: required if --encrypt is specified, the name\n" \
" of a variable containing a secret with which\n" \
" encrypt the snapshot\n" >&2
}
# Exit with the specified code after unpausing the openbao manager
function unpause_exit {
local toreturn="$1"
# don't worry about the result
kubectl exec -n "$OPENBAO_NS" "$POD" -- \
bash -c "${SCRIPT}; rm \"\${PAUSEFILE}\""
exit $toreturn
}
# The stdout is a tarball
function get_snapshot {
kubectl exec -n "$OPENBAO_NS" "$POD" -- \
bash -c "${SCRIPT}; snapshotCreate"
}
# Intended for deleting the fifo files
function cleanup {
rm $2 2>/dev/null
rmdir $1 2>/dev/null
}
# Retrieve a snapshot for the openbao, using openbao-manager's code, and
# encrypt the file using the user-supplied passphrase
#
# The snapshot is received as stdin from openbao-manager, whereas the
# passphrase is provided to gpg via fifo file.
function get_encrypted_snapshot {
local secret="$1"
local outf="$2"
local tmpf
local tmpd
local gpgpid
local attempts
local result
tmpd="$( mktemp -d )"
tmpf="${tmpd}/.snapshot"
# try our best to make sure the fifo file is deleted.
trap "cleanup $tmpd $tmpf" SIGTERM
trap "cleanup $tmpd $tmpf" SIGINT
trap "cleanup $tmpd $tmpf" EXIT
trap "cleanup $tmpd $tmpf" RETURN
mkfifo -m 600 "$tmpf"
# run gpg in the background, waiting for passphrase on fifo file
get_snapshot \
| gpg --symmetric \
--output="$outf" \
--passphrase-file "$tmpf" \
--batch \
--pinentry-mode loopback \
/dev/stdin &
gpgpid=$!
echo -n "${!secret}" > "$tmpf"
# wait for gpgpid
attempts=0
while [ "$attempts" -lt "$MAXATTEMPTS" ]; do
ps -p $gpgpid >/dev/null 2>&1
if [ $? -ne 0 ]; then
break
fi
attempts=$(( attempts + 1 ))
sleep $GPGSLEEP
done
if [ "$attempts" -ge "$MAXATTEMPTS" ]; then
echo "failed to wait for gpg" >&2
kill $gpgpid
return 1
fi
wait $gpgpid
result=$?
# don't leave a passphrase laying around, in case the fifo
# was unread
rm -r "$tmpd" 2>/dev/null >/dev/null
return $result
}
# Use mktemp to get a random string and test to see if a k8s secret
# already exists with that suffix within the openbao namespace
#
# Try a few times before giving up; unpause the openbao-manager and
# exit on failure.
#
# Return the random string via stdout
function get_unique_string {
local attempts
local rndtmp
local secret
local secrets
# the loop below runs really fast, ready the secret names
# once should be fine
secrets="$( kubectl get secrets -n "$OPENBAO_NS" \
-o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' \
| grep "^${K8S_SECRET_PREFIX}" )"
attempts=0
while [ "$attempts" -lt "$MAXATTEMPTS" ]; do
rndtmp="$( mktemp --dry-run \
| cut -f 2 -d'.' \
| tr '[:upper:]' '[:lower:]' )"
secret="${K8S_SECRET_PREFIX}-$rndtmp"
if [[ " $secrets " != *"$secret"* ]]; then
break
fi
attempts=$(( attempts + 1 ))
done
if [ "$attempts" -ge "$MAXATTEMPTS" ]; then
echo "Failed to get a unique string for the snapshot" >&2
unpause_exit 1
fi
echo -n "$rndtmp"
}
###
# Main
#
OUTPUTDIR="$1"
ENCRYPT="$2"
SECRET="$3"
if [ -z "$OUTPUTDIR" -o ! -d "$OUTPUTDIR" ]; then
echo "Non-existing output directory: [$OUTPUTDIR]" >&2
usage
exit 1
fi
if [ -n "$ENCRYPT" ]; then
if [ ! "$ENCRYPT" = "--encrypt" ]; then
echo "Unrecognized parameter: [$ENCRYPT]" >&2
usage
exit 1
elif [ -z "$SECRET" ]; then
echo "Required variable name when --encrypt is used" >&2
usage
exit 1
elif [ -z "${!SECRET}" ]; then
echo "Required secret when --encrypt is used" \
"(is '$SECRET' variable exported?)" >&2
usage
exit 1
fi
fi
# Pause openbao manager
logs="$( kubectl exec -n "$OPENBAO_NS" "$POD" -- \
bash -c "${SCRIPT}; touch \"\${PAUSEFILE}\"" 2>&1 )"
if [ $? -ne 0 ]; then
echo "Failed to pause openbao-manager: [$logs]" >&2
exit 1
fi
# ensure that openbao is in a good state for taking the snapshot
logs="$( kubectl exec -n "$OPENBAO_NS" "$POD" -- \
bash -c "${SCRIPT}; snapshotPreCheck" 2>&1 )"
if [ $? -ne 0 ]; then
echo "$logs" >&2
unpause_exit 1
fi
rndtmp="$( get_unique_string )"
secret="${K8S_SECRET_PREFIX}-$rndtmp"
fname="${OUTPUTDIR}/openbao-snapshot-${rndtmp}.tar"
metaf="${fname}.metadata"
# get the snapshot
if [ "$ENCRYPT" == "--encrypt" ]; then
encrypted=true
get_encrypted_snapshot "$SECRET" "$fname"
if [ $? -ne 0 ]; then
unpause_exit 1
fi
else
encrypted=false
get_snapshot > "$fname"
if [ $? -ne 0 ]; then
unpause_exit 1
fi
fi
# Prepare metadata file. This procedure only uses 'secret',
# but I'm sure the other information will be useful to humans
sum="$( sha256sum "$fname" | cut -f 1 -d' ' )"
now="$( date )"
metadata="{\"date\":\"$now\",
\"snapshot_sum\":\"$sum\",
\"secret\":\"$secret\",
\"user_encrypted\":\"$encrypted\"}"
echo "$metadata" > "${metaf}"
# write the metadata to k8s secret, along with the shards
# associated with the snapshot
kubectl exec -n "$OPENBAO_NS" "$POD" -- \
bash -c "${SCRIPT}; snapshotSetSecret '$secret' '$metadata'"
if [ $? -ne 0 ]; then
echo "Failed to set k8s secret for snapshot" >&2
unpause_exit 1
fi
unpause_exit 0

View File

@ -0,0 +1,114 @@
---
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
- name: Check openbao system health
shell: >-
kubectl exec -n "openbao" "{{ openbao_manager_pod_name.stdout }}" --
bash -c "source /opt/script/init.sh; snapshotPreCheck" 2>&1
register: openbao_system_health
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
failed_when: openbao_system_health.rc != 0
- name: Create openbao snapshot
block:
- name: Create openbao snapshot with default encryption
script: openbao_snapshot.sh {{ openbao_backup_dir }}
when: not openbao_encrypt
register: openbao_snapshot_script
failed_when: openbao_snapshot_script.rc != 0
- name: Create openbao snapshot with custom encryption
script: openbao_snapshot.sh {{ openbao_backup_dir }} '--encrypt' "custom_var"
when: openbao_encrypt
register: openbao_snapshot_script
failed_when: openbao_snapshot_script.rc != 0
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
custom_var: "{{ encrypt_openbao_secret }}"
always:
- name: Unpause openbao manager
shell: >-
kubectl exec -n "openbao" "{{ openbao_manager_pod_name.stdout }}" --
bash -c "source /opt/script/init.sh; if [ -f $PAUSEFILE ]; then rm -f $PAUSEFILE; fi" 2>&1
rescue:
- name: Clean up openbao subdir if in standalone mode
file:
path: "{{ openbao_backup_dir }}"
state: absent
when: op_mode == "standalone"
- name: Package openbao if running in standalone mode
block:
- name: Check if pigz package is installed
block:
- name: Issue command to pkg manager
command: "{{ 'rpm -q' if os_release == 'centos' else 'dpkg -l' }} pigz"
args:
warn: false
failed_when: false
register: check
- set_fact:
pigz_check: "{{ 'succeeded' if check.rc == 0 else 'failed' }}"
when: os_release in ["centos", "debian"]
- name: Check if pigz package is installed
package:
name: pigz
state: present
check_mode: true
register: pigz_check
when: os_release not in ["centos", "debian"]
- name: Check number of platform cores
shell: |
source /etc/platform/openrc
system host-cpu-list $(hostname) --nowrap | grep " Platform " | wc -l
register: num_platform_cores
- name: Set compress program for backup tarball
set_fact:
compress_program: >-
"{{ 'pigz' if num_platform_cores.stdout | int >= 4 and
pigz_check is succeeded else 'gzip' }}"
- name: Use current timestamp as backups timestamp
set_fact:
backup_timestamp: "{{ lookup('pipe', 'date +%Y_%m_%d_%H_%M_%S') }}"
- name: Attach timestamp to backups filename
set_fact:
openbao_backup_file: "{{ openbao_backup_filename_prefix }}_{{ backup_timestamp }}.tgz"
- name: Set backup files absolute path
set_fact:
openbao_backup_file_path: "{{ initial_backup_dir }}/{{ openbao_backup_file }}"
- name: Create a tgz archive for Hashicorp openbao backup
shell: >-
tar
--use-compress-program={{ compress_program }}
-cf {{ openbao_backup_file_path }}
$(ls -d
{{ openbao_backup_dir }}
2> /dev/null)
args:
warn: false
# Changing the failed_when behavior to prevent the backup to fail on "file changed as we read it", which
# makes tar return 1
register: tar_cmd
failed_when: tar_cmd.rc >= 2 or tar_cmd.rc < 0
- name: Cleanup openbao subdir
file:
path: "{{ openbao_backup_dir }}"
state: absent
when: op_mode == "standalone"

View File

@ -0,0 +1,151 @@
#!/bin/bash
# Script to restore a snapshot to the openbao
###
# Globals
#
NAME="$( basename $0 )"
KUBECMD="kubectl"
SCRIPT="source /opt/script/init.sh"
OPENBAO_NS="openbao"
MANAGER_PREFIX="stx-openbao-manager"
# get openbao manager pod
JSONPATH='{range .items[*]}{.metadata.name}{"\n"}{end}'
POD="$( $KUBECMD get pods -n "$OPENBAO_NS" -o jsonpath="$JSONPATH" \
| grep "^$MANAGER_PREFIX" )"
if [ -z "$POD" ]; then
echo "Openbao manager not found" >&2
exit 1
fi
###
# Functions
#
function usage {
echo -e "Usage: \n" \
"\n" \
"$NAME <input_file> [--decrypt <variable> ]\n" \
"\n" \
"All parameters are positional:\n" \
" input_file: required, snapshot file to restore from\n" \
" --decrypt: optional\n" \
" variable: required if --decrypt is specified, the name\n" \
" of a variable containing a secret with which\n" \
" decrypt the snapshot file\n" >&2
}
# Exit with the specified code after unpausing the openbao manager
function unpause_exit {
local toreturn="$1"
# don't worry about the result
kubectl exec -n "$OPENBAO_NS" "$POD" -- \
bash -c "${SCRIPT}; rm \"\${PAUSEFILE}\""
exit $toreturn
}
###
# Main
#
INPUTFILE="$1"
DECRYPT="$2"
SECRET="$3"
if [ -z "$INPUTFILE" -o ! -f "$INPUTFILE" ]; then
echo "Non-existing snapshot file: [$INPUTFILE]" >&2
usage
exit 1
fi
if [ -n "$DECRYPT" ]; then
if [ ! "$DECRYPT" = "--decrypt" ]; then
echo "Unrecognized parameter: [$DECRYPT]" >&2
usage
exit 1
elif [ -z "$SECRET" ]; then
echo "Required variable name when --decrypt is used" >&2
usage
exit 1
elif [ -z "${!SECRET}" ]; then
echo "Required secret when --decrypt is used" \
"(is '$SECRET' variable exported?)" >&2
usage
exit 1
fi
fi
# get the metadata, and snapshot secret associated with the snapshot
# file. This is expected to be in the same directory as the snapshot
METADATAF="${INPUTFILE}.metadata"
if [ ! -f "$METADATAF" ]; then
echo "The metadata file associated with snapshot file" \
"$INPUTFILE is not found: $METADATAF" >&2
exit 1
fi
# openbao manager code will do more sanity on the json, make sure
# at least that it is not empty
METADATA="$( cat "$METADATAF" )"
if [ -z "$METADATA" ]; then
echo "The metadata should at least contain:" \
'{"secret":"name_of_k8s_secret"}' >&2
exit 1
fi
# Pause openbao manager
logs="$( kubectl exec -n "$OPENBAO_NS" "$POD" -- \
bash -c "${SCRIPT}; touch \"\${PAUSEFILE}\"" 2>&1 )"
if [ $? -ne 0 ]; then
echo "Failed to pause openbao-manager: [$logs]" >&2
exit 1
fi
# ensure that openbao is in a good state for restoring the snapshot
logs="$( kubectl exec -n "$OPENBAO_NS" "$POD" -- \
bash -c "${SCRIPT}; snapshotPreCheck" 2>&1 )"
if [ $? -ne 0 ]; then
echo "$logs" >&2
unpause_exit 1
fi
# restore the snapshot
if [ "$DECRYPT" == "--decrypt" ]; then
logs="$( echo "${!SECRET}" \
| gpg --no-symkey-cache \
-q \
--batch \
--passphrase-fd 0 \
--decrypt "$INPUTFILE" \
| kubectl exec -n "$OPENBAO_NS" "$POD" -i -- \
bash -c "${SCRIPT}; \
snapshotRestore '$METADATA'" )"
if [ $? -ne 0 ]; then
echo "Failed to restore snapshot: [$logs]" >&2
unpause_exit 1
fi
else
logs="$( cat "$INPUTFILE" \
| kubectl exec -n "$OPENBAO_NS" "$POD" -i -- \
bash -c "${SCRIPT}; \
snapshotRestore '$METADATA'" )"
if [ $? -ne 0 ]; then
echo "Failed to restore snapshot: [$logs]" >&2
unpause_exit 1
fi
fi
echo "Snapshot restore complete." >&2
unpause_exit 0

View File

@ -0,0 +1,66 @@
---
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
- name: Unpackage the backup tarball
command: >-
tar --use-compress-program=pigz -C {{ openbao_backup_dir }} -xpf {{ backup_filepath }}
--wildcards --transform='s,.*/,,'
args:
warn: false
become: yes
- name: Find the snapshot file
command: >-
find {{ openbao_backup_dir }} -name "openbao-snapshot-*.tar"
register: backup_snapshot_file
become: yes
- name: Fail if snapshot file was not found
fail:
msg: "Backup snapshot was not found in {{ backup_filepath }}"
when: backup_snapshot_file.stdout | length == 0
- name: Change snapshot file permissions
file:
path: "{{ backup_snapshot_file.stdout }}"
mode: 0755
become: yes
- name: Find openbao manager pod
shell: >-
kubectl get pods -n openbao | grep "openbao-manager" | cut -d " " -f 1
register: openbao_manager_pod_name
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
# call openbao_restore.sh
- name: Restore openbao from the snapshot
block:
- name: Restore openbao snapshot with default encryption
script: openbao_restore.sh {{ backup_snapshot_file.stdout }}
when: not openbao_encrypt
register: openbao_restore_script
failed_when: openbao_restore_script.rc != 0
- name: Restore openbao snapshot with custom encryption
script: openbao_restore.sh {{ backup_snapshot_file.stdout }} '--decrypt' "custom_var"
when: openbao_encrypt
register: openbao_restore_script
failed_when: openbao_restore_script.rc != 0
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
custom_var: "{{ encrypt_openbao_secret }}"
always:
- name: Unpause openbao manager
shell: >-
kubectl exec -n "openbao" "{{ openbao_manager_pod_name.stdout }}" --
bash -c "source /opt/script/init.sh; if [ -f $PAUSEFILE ]; then rm -f $PAUSEFILE; fi" 2>&1
- name: Clean up openbao subdir
file:
path: "{{ openbao_backup_dir }}"
state: absent
become: yes

View File

@ -0,0 +1,448 @@
#!/bin/bash
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# The unsealed state of all openbao server pods is required for the openbao
# snapshot restore procedure.
#
# Under normal circumstances the openbao restore procedure does not
# require the user to put the openbao application into the required state.
# This script attempts to put the openbao server pods into an unsealed
# state - this includes deleting PVCs and shard secrets.
#
# The script ends by verifying the required state, or failing.
OPENBAO_NS="openbao"
OPENBAO_REAPPLIED=false
APP_TAR_PATH="/usr/local/share/applications/helm"
# List of pauses
# app upload:
# 60s == OPENBAO_UPLOAD_TRIES @ OPENBAO_UPLOAD_SLEEP intervals
# app abort:
# 120s == OPENBAO_ABORT_TRIES @ OPENBAO_ABORT_SLEEP intervals
# app remove:
# 60s == OPENBAO_REMOVE_TRIES @ OPENBAO_REAPPLY_WAITTIME intervals
# PVC delete:
# 120s == PVC_DELETE_TRIES @ OPENBAO_REAPPLY_WAITTIME intervals
# cluster-key delete:
# 60s == CLUSTER_KEY_DELETE_TRIES @ OPENBAO_REAPPLY_WAITTIME intervals
# app apply:
# 300s == OPENBAO_APPLY_TRIES @ OPENBAO_REAPPLY_WAITTIME intervals
# post apply wait time:
# 30s == OPENBAO_UNSEAL_WAITTIME
# unseal per pod:
# 60s == SEALED_STATUS_TRIES @ SEALED_STATUS_WAITTIME intervals
# Number of tries for each action
MAIN_TRIES=2
SEALED_STATUS_TRIES=6
OPENBAO_REMOVE_TRIES=5
PVC_DELETE_TRIES=12
CLUSTER_KEY_DELETE_TRIES=6
OPENBAO_APPLY_TRIES=30
OPENBAO_UPLOAD_TRIES=12
OPENBAO_ABORT_TRIES=24
# Wait times
SEALED_STATUS_WAITTIME=10
OPENBAO_REAPPLY_WAITTIME=10
OPENBAO_UNSEAL_WAITTIME=30
OPENBAO_UPLOAD_SLEEP=5
OPENBAO_ABORT_SLEEP=5
# variables for interpreting application state
# These states are handled by reapplyOpenbao():
APP_STATES="uploading uploaded removing applying applied apply-failed"
REGEX_DELETED="application not found: openbao"
REGEX_NORESOURCES="No resources found in openbao namespace."
APP_STATUS_DEBUG=""
# Generic instruction to the user
GENERIC_INSTRUCTION="$( echo "Resolve the application/platform status" \
"before running the restore procedure again." )"
# Function to get the application status, insert custom states for
# "deleted" (not-uploaded), and "unknown" for application states this
# script does not address
function getOpenbaoStatus {
local status
local result
# capture both stdout and stderr; When the application is not
# uploaded then the stderr indicates this response
status="$( system application-show openbao \
--format value --column status 2>&1 )"
result="$?"
APP_STATUS_DEBUG="$status"
if [ "$result" -ne 0 ]; then
if [[ "$status" == *"$REGEX_DELETED"* ]]; then
status="deleted"
fi
fi
if [[ " $APP_STATES deleted " != *" ${status// /_} "* ]]; then
status="unknown"
fi
echo "$status"
}
function uploadopenbao {
local status="$1"
local count=1
local uploaded
# The platform may upload the application. Ignore a failed result
# for application-upload
if [ "$status" == "deleted" ]; then
system application-upload "$APP_TAR_PATH"/openbao*.tgz
fi
# A small wait before checking the upload status.
# Start counting at 1 to get OPENBAO_UPLOAD_TRIES sleeps total
sleep $OPENBAO_UPLOAD_SLEEP
while [ "$count" -lt "$OPENBAO_UPLOAD_TRIES" ]; do
uploaded="$( getOpenbaoStatus )"
echo "openbao application status: $uploaded"
if [ "$uploaded" == "uploaded" ]; then
break;
elif [ "$uploaded" == "deleted" ]; then
true # pass, the platform is sloooow today
elif [ "$uploaded" != "uploading" ]; then
# invoke the failure path
count="$OPENBAO_UPLOAD_TRIES"
break
fi
count="$(( count + 1 ))"
sleep $OPENBAO_UPLOAD_SLEEP
done
if [ "$count" -ge "$OPENBAO_UPLOAD_TRIES" ]; then
echo "Failed to upload openbao in" \
"$(( $OPENBAO_UPLOAD_TRIES * $OPENBAO_UPLOAD_SLEEP ))s." \
"$GENERIC_INSTRUCTION"
echo "Application status: [$APP_STATUS_DEBUG]"
exit 1
fi
echo "Application uploaded."
}
function abortOpenbao {
local count=0
local aborted
# "applying" was the trigger state for this function.
# Expect: applying, applied, apply-failed
# And ignore the result of system application-abort
system application-abort openbao
# Normally the abort will happen promptly, such as when the app was
# applying for some time already. A short initial sleep is
# not required. But when running application-apply and
# application-abort in quick succession the actual time is observed
# at 60s typical for that case.
while [ "$count" -lt "$OPENBAO_ABORT_TRIES" ]; do
aborted="$( getOpenbaoStatus )"
echo "openbao application status: $aborted"
if [ "$aborted" == "apply-failed" ]; then
# either interpretation of apply-failed is ok
break;
elif [ "$aborted" == "applying" ]; then
true # pass, abort can take a while
elif [ "$aborted" == "applied" ]; then
# race condition probably between seeing 'applying' and
# running application-abort
break
else
# invoke the failure path
count="$OPENBAO_ABORT_TRIES"
break;
fi
count="$(( count + 1 ))"
sleep $OPENBAO_ABORT_SLEEP
done
if [ "$count" -ge "$OPENBAO_ABORT_TRIES" ]; then
echo "Failed to abort apply of openbao app within" \
"$(( $OPENBAO_ABORT_TRIES * $OPENBAO_ABORT_SLEEP ))s." \
"$GENERIC_INSTRUCTION"
echo "Application status: [$APP_STATUS_DEBUG]"
exit 1
fi
echo "Application apply aborted."
}
# Function to clean openbao and reapply.
function reapplyOpenbao {
local state
local tries
local remainingPVC
local deleteSecrets
local key
local keyDelete
local remaining
local pods
if $OPENBAO_REAPPLIED; then
echo "openbao reapply already tried. Previous apply likely failed."
return 1
fi
# Do not try to fix openbao more than once
OPENBAO_REAPPLIED=true
state="$( getOpenbaoStatus )"
echo "openbao application status: $state"
if [[ " deleted uploading " == *" $state "* ]]; then
# exits on failure; else the state is "uploaded"
uploadopenbao $state
state="$( getOpenbaoStatus )"
echo "openbao application status: $state"
elif [ "$state" == "applying" ]; then
# Handle this abortable state without giving the app the benefit
# of the doubt: during restore we anticipate that the
# application may be waiting for openbao server pods that cannot
# unseal.
#
# exits on failure; else the state is "uploaded", or possibly
# the state is "applied" due to race
abortOpenbao
state="$( getOpenbaoStatus )"
echo "openbao application status: $state"
fi
if [[ " applied apply-failed " == *" $state "* ]]; then
system application-remove openbao
fi
# Seeing the 'removing' status from a previous operation is
# unlikely, as in practice system application-show does not run fast
# enough to catch it. But it should be accounted for.
if [[ " applied apply-failed removing " == *" $state "* ]]; then
for tries in $(seq $OPENBAO_REMOVE_TRIES); do
sleep $OPENBAO_REAPPLY_WAITTIME
state="$( getOpenbaoStatus )"
echo "openbao application status: $state"
if [[ "$state" == "uploaded" ]]; then
echo "openbao remove completed"
break
fi
done
# state is updated within the loop
fi
# also wait for pods to be removed, for example: before trying
# to delete the persistentvolumeclaims
# but ignore if there are still pods
for tries in $(seq $OPENBAO_REMOVE_TRIES); do
sleep $OPENBAO_REAPPLY_WAITTIME
pods="$( kubectl get pods -n $OPENBAO_NS 2>&1 )"
if [ "$pods" == "$REGEX_NORESOURCES" ]; then
break;
fi
done
# the state of the application should be "uploaded"
if [ "$state" != "uploaded" ]; then
# Other states that we're not handling include: missing,
# upload-failed, remove-failed, updating, recovering
# restore-requested
echo "Failed to put the openbao application into uploaded state." \
"$GENERIC_INSTRUCTION" \
"Application status: $state [$APP_STATUS_DEBUG]"
exit 1
fi
# remove PVC resource
kubectl delete pvc -n $OPENBAO_NS --all
remainingPVC=-1
for tries in $(seq $PVC_DELETE_TRIES); do
sleep $OPENBAO_REAPPLY_WAITTIME
remainingPVC="$(kubectl get pvc -n $OPENBAO_NS \
--no-headers=true | wc -l)"
if [[ $remainingPVC -eq 0 ]]; then
echo "openbao PVC removal completed"
break
fi
done
if [[ $remainingPVC -ne 0 ]]; then
echo "remove pvc resource failed"
return 1
fi
# remove openbao cluster-key and the root CA secrets
deleteSecrets="$( kubectl get secrets -n $OPENBAO_NS \
-o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' \
| grep '^cluster-key\|^openbao-ca$' )"
for key in $deleteSecrets; do
kubectl delete secret -n $OPENBAO_NS "$key"
keyDelete=$?
if [[ $keyDelete -ne 0 ]]; then
echo "kubectl-delete-secret returned error"
return 1
fi
done
remaining=-1
for tries in $(seq $CLUSTER_KEY_DELETE_TRIES); do
sleep $OPENBAO_REAPPLY_WAITTIME
remaining="$( kubectl get secrets -n $OPENBAO_NS \
-o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' \
| grep '^cluster-key\|^openbao-ca$' | wc -l )"
if [[ remaining -eq 0 ]]; then
echo "openbao secret removal completed"
break
fi
done
if [[ $remaining -ne 0 ]]; then
echo "remove secrets failed"
return 1
fi
# application-apply
system application-apply openbao
for tries in $(seq $OPENBAO_APPLY_TRIES); do
sleep $OPENBAO_REAPPLY_WAITTIME
state="$( getOpenbaoStatus )"
echo "openbao application status: $state"
if [[ "$state" == "applied" ]]; then
echo "openbao apply completed"
break
fi
done
if [[ "$state" != "applied" ]]; then
echo "openbao Reapply: application-apply failed"
return 1
fi
# The openbao server pods remain in unready state until the server is
# unsealed due to using the healthz endpoint for pod readiness
# probe. The openbao application remains in applying status until the
# first openbao server pod transitions to ready state.
#
# For the case of replicas>1, openbao server unseal validation is done
# in the main
return 0
}
###
# Main
#
JPATHFULL='{range .items[*]}{.metadata.name}{" "}'\
'{.metadata.labels.openbao-sealed}{"\n"}{end}'
JPATH="$(printf '%s\n' $JPATHFULL | tr '\n' ' ')"
echo "Validating openbao status"
source "/etc/platform/openrc"
for validateTries in $(seq $MAIN_TRIES); do
echo "Attempting validation number $validateTries"
# check if openbao application is applied or applying
rst="$( getOpenbaoStatus )"
echo "openbao application status: $rst"
if [ "$rst" != "applied" -a "$rst" != "applying" ]; then
# if not, run recovery
echo "openbao not applied. Attempting reapply..."
reapplyOpenbao
reapplyOpenbaoRC=$?
if [[ reapplyOpenbaoRC -eq 0 ]]; then
echo "openbao reapply completed. Reattempting validation."
continue
else
echo "openbao reapply failed for trying to" \
"fix not-applied openbao application." \
"Unable to ready openbao for restore."
exit 1
fi
fi
# Whether 'applied' or 'applying', we expect to see a running openbao
# server pod. In the applying case, there is a window where the
# applying procedure hasn't gotten that far. Ignore this possibility
# when it comes from outside this procedure - run abort as if the
# app is stuck.
#
# Check if there is a running openbao pod:
numRunningPods="$(kubectl get pods -n $OPENBAO_NS | \
grep "^stx-openbao-[0-9] " | grep "Running" | wc -l)"
if [[ $numRunningPods -eq 0 ]]; then
# if not, run recovery
echo "No openbao pods are running. Attempting reapply..."
reapplyOpenbao
reapplyOpenbaoRC=$?
if [[ $reapplyOpenbaoRC -eq 0 ]]; then
echo "openbao reapply completed. Reattempting validation."
continue
else
echo "openbao reapply failed for trying to" \
"fix no running openbao pods." \
"Unable to ready openbao for restore."
exit 1
fi
fi
# Whether applied or applying, in both cases it is possible for a
# openbao server pod to be waiting to be unsealed. Wait upon the
# sealed status of all pods.
sealedPods=0
prevSealedPods=0
sealedExists=true
triesCount=$SEALED_STATUS_TRIES
while [[ $triesCount -gt 0 ]]; do
# get number of sealed pods
# When pods are starting they have no seal status (empty
# string). So search for and omit unsealed pods instead.
sealedPods="$( kubectl get pods -n $OPENBAO_NS -o jsonpath="$JPATH" \
| grep "^stx-openbao-[0-9] " \
| grep -v "false$" | wc -l )"
# check if there are no sealed pods, if so mark success and break loop
if [[ $sealedPods -eq 0 ]]; then
sealedExists=false
break
fi
# if number of sealed pods decreased, reset wait counter
if [[ $sealedPods -lt $prevSealedPods ]]; then
triesCount=$SEALED_STATUS_TRIES
else
triesCount=$(( triesCount - 1 ))
fi
# wait for pods to unseal
sleep $SEALED_STATUS_WAITTIME
prevSealedPods=$sealedPods
done
# if there are still sealed pods, attempt reapply
if $sealedExists; then
echo "There are sealed pods. Attempting reapply..."
reapplyOpenbao
reapplyOpenbaoRC=$?
if [[ $reapplyOpenbaoRC -eq 0 ]]; then
echo "openbao reapply completed. Reattempting validation."
continue
else
echo "openbao reapply failed for trying to" \
"fix sealed openbao pods." \
"Unable to ready openbao for restore."
exit 1
fi
fi
# all test passed. exit
echo "All validation passed. openbao application is ready to be restored."
exit 0
done
echo "All tries exhausted. Unable to ready openbao for restore."
exit 1

View File

@ -0,0 +1,135 @@
---
#
# Copyright (c) 2025 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
- name: Set default target where backup tarball inspection takes place
set_fact:
inspection_target: "{{ inventory_hostname }}"
# Set inspection target to Ansible control machine if the backup tarball
# is off-box.
- name: Update target if backup data are off-box
set_fact:
inspection_target: localhost
when: on_box_data|bool == false
- name: Set initial_backup_dir if running on target and no value was supplied
set_fact:
initial_backup_dir: /opt/platform-backup
when:
- initial_backup_dir is not defined or initial_backup_dir is none
- on_box_data|bool
- name: Validate initial_backup_dir exists
stat:
path: "{{ initial_backup_dir }}"
delegate_to: "{{ inspection_target }}"
register: initial_backup_dir_exists
- name: Fail if initial_backup_dir does not exists
fail:
msg: "Directory initial_backup_dir: {{ initial_backup_dir }} does not exist"
when: not initial_backup_dir_exists.stat.exists
- name: Set openbao backup directory fact for on box
set_fact:
openbao_backup_dir: "{{ initial_backup_dir }}/openbao"
when: on_box_data|bool == true
- name: Set openbao backup directory fact for off box
set_fact:
openbao_backup_dir: "{{ target_backup_dir }}/openbao"
when: on_box_data|bool == false
- name: Fail if passphrase is omitted
fail:
msg: >
A passphrase is required for encryption; set variable override
backup_encryption_passphrase. To disable encryption set
override backup_encryption_enabled=false
when:
- openbao_encrypt|bool
- encrypt_openbao_secret | length == 0
- name: Check openbao apply for backup
block:
- name: Check if openbao is applied
shell: |
source /etc/platform/openrc
system application-show openbao --format value --column status
register: openbao_applied_exists
- name: Fail if openbao is not applied
fail:
msg: "Openbao application is not applied"
when: openbao_applied_exists.stdout != "applied"
when: openbao_mode == "backup"
- name: Validate openbao health for restore.
block:
- name: Transfer backup tarball to {{ target_backup_dir }} on the target
copy:
src: "{{ initial_backup_dir }}/{{ backup_filename }}"
dest: "{{ target_backup_dir }}"
owner: root
group: root
mode: 0755
become: yes
when: on_box_data | bool == false
- name: Set backup file path for on box
set_fact:
backup_filepath: "{{ initial_backup_dir }}/{{ backup_filename }}"
when: on_box_data | bool == true
- name: Set backup file path for off box
set_fact:
backup_filepath: "{{ target_backup_dir }}/{{ backup_filename }}"
when: on_box_data | bool == false
- name: Find backup tarball
shell: |
ls {{ backup_filepath }}
register: backup_tarball
- name: Fail if openbao backup tarball not found
fail:
msg: "Openbao snapshot tarball: {{ backup_filename }} was not found"
when: backup_tarball.stdout | length == 0
- name: Run application validation
block:
- name: Validate if openbao application is ready to be restored
script: validate_recover_openbao.sh
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
register: validate_openbao_result
failed_when: validate_openbao_result.rc != 0
always:
- name: Display openbao validation script output if it exists
debug:
msg: "{{ validate_openbao_result.stdout }}"
when: validate_openbao_result is defined
when: openbao_mode == "restore"
- name: Find openbao manager pod
shell: >-
kubectl get pods -n openbao | grep "openbao-manager" | cut -d " " -f 1
register: openbao_manager_pod_name
environment:
KUBECONFIG: /etc/kubernetes/admin.conf
- name: Fail if openbao manager pod is not found
fail:
msg: "Openbao manager pod is not found"
when: openbao_manager_pod_name.stdout | length == 0
- name: Create openbao subdirectory in initial_backup_dir
file:
path: "{{ openbao_backup_dir }}"
state: directory
mode: 0755
become: yes