From 71d19daef9573aa591a1d779f3f13db2f26f09a6 Mon Sep 17 00:00:00 2001 From: Tae Park Date: Wed, 27 Nov 2024 15:03:41 -0500 Subject: [PATCH] Add Openabo Backup and Restore Adding backup and restore for openbao. As openbao is a fork of hashicorp vault, it uses identical procedure to make backup and restore itself. The process used here is same as the one used for hashicorp vault. The code is copied over to keep the code separated from the vault version. This is to keep the work simple once vault backup and restore is removed. Test Plan: PASS Openbao standalone backup is successful without errors PASS Openbao platform backup is successful without errors PASS Openbao standalone restore is successful without errors Story: 2011244 Task: 51419 Change-Id: I49caf7f300563d511a5f609b40defd79420f110c Signed-off-by: Tae Park --- .../host_vars/backup-restore/default.yml | 18 +- .../src/playbooks/openbao_backup.yml | 32 ++ .../src/playbooks/openbao_restore.yml | 31 ++ .../roles/backup/backup-system/tasks/main.yml | 126 ++++- .../openbao_backup/files/openbao_snapshot.sh | 264 +++++++++++ .../openbao/openbao_backup/tasks/main.yml | 114 +++++ .../openbao_restore/files/openbao_restore.sh | 151 ++++++ .../openbao/openbao_restore/tasks/main.yml | 66 +++ .../files/validate_recover_openbao.sh | 448 ++++++++++++++++++ .../roles/openbao/prepare_env/tasks/main.yml | 135 ++++++ 10 files changed, 1382 insertions(+), 3 deletions(-) create mode 100644 playbookconfig/src/playbooks/openbao_backup.yml create mode 100644 playbookconfig/src/playbooks/openbao_restore.yml create mode 100644 playbookconfig/src/playbooks/roles/openbao/openbao_backup/files/openbao_snapshot.sh create mode 100644 playbookconfig/src/playbooks/roles/openbao/openbao_backup/tasks/main.yml create mode 100644 playbookconfig/src/playbooks/roles/openbao/openbao_restore/files/openbao_restore.sh create mode 100644 playbookconfig/src/playbooks/roles/openbao/openbao_restore/tasks/main.yml create mode 100644 playbookconfig/src/playbooks/roles/openbao/prepare_env/files/validate_recover_openbao.sh create mode 100644 playbookconfig/src/playbooks/roles/openbao/prepare_env/tasks/main.yml diff --git a/playbookconfig/src/playbooks/host_vars/backup-restore/default.yml b/playbookconfig/src/playbooks/host_vars/backup-restore/default.yml index 86a406ba9..55e0b4bbe 100644 --- a/playbookconfig/src/playbooks/host_vars/backup-restore/default.yml +++ b/playbookconfig/src/playbooks/host_vars/backup-restore/default.yml @@ -70,16 +70,18 @@ backup_encryption_enabled: false backup_encryption_passphrase: "" # A list of identifiers indicating which backup files to encrypt: -# [platform, openstack, user_images, dc_vault, registry, hc_vault] +# [platform, openstack, user_images, dc_vault, registry, hc_vault, openbao] backup_encyption_include: - platform - hc_vault + - openbao # Internal boolean variables for encryption to simplify logic. These # will be adjusted later when the overriden parameters above are # considered. platform_tarball_encrypted: false hc_vault_tarball_encrypted: false +openbao_tarball_encrypted: false # The platform backup tarball will be named in this format: # _.tgz @@ -126,10 +128,22 @@ dc_vault_backup_filename_prefix: "{{ inventory_hostname }}_dc_vault_backup" backup_hc_vault: false # The hashicorp vault backup tarball will be named in this format: -# _.tgz +# _.tgz # hc_vault_backup_filename_prefix: "{{ inventory_hostname }}_hc_vault_backup" +# This is the default value for including openbao into the platform backup process. +# This value can be overridden by the user when calling for platform backup playbook, +# to include or not include the openbao backup. +# If the openbao application is either uploaded only or non-existent, +# the backup process will be omitted regardless of what this value is. +backup_openbao: false + +# The openbao backup tarball will be named in this format: +# _.tgz +# +openbao_backup_filename_prefix: "{{ inventory_hostname }}_openbao_backup" + restore_cinder_glance_data: false # Default directory where the system backup tarballs fetched from the diff --git a/playbookconfig/src/playbooks/openbao_backup.yml b/playbookconfig/src/playbooks/openbao_backup.yml new file mode 100644 index 000000000..6eff19f0f --- /dev/null +++ b/playbookconfig/src/playbooks/openbao_backup.yml @@ -0,0 +1,32 @@ +--- +# +# Copyright (c) 2025 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# requires one variable passed: +# initial_backup_dir = The resulting backup package will be found here. + +- hosts: all + gather_facts: no + + # Specify defaults including: + # backup_encryption_enabled + # backup_encryption_passphrase + vars_files: + - host_vars/backup-restore/default.yml + + vars: + password_change: false + openbao_encrypt: "{{ backup_encryption_enabled|bool }}" + encrypt_openbao_secret: "{{ backup_encryption_passphrase | default('') }}" + openbao_mode: "backup" + op_mode: "standalone" + + roles: + - role: common/prepare-env + - role: openbao/prepare_env + become: yes + - role: openbao/openbao_backup + become: yes diff --git a/playbookconfig/src/playbooks/openbao_restore.yml b/playbookconfig/src/playbooks/openbao_restore.yml new file mode 100644 index 000000000..5522399d4 --- /dev/null +++ b/playbookconfig/src/playbooks/openbao_restore.yml @@ -0,0 +1,31 @@ +--- +# +# Copyright (c) 2025 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# requires two variables passed: +# initial_backup_dir = the directory where the openbao backup package will be found +# backup_filename = filename for openbao backup package + +- hosts: all + gather_facts: no + + # Specify defaults including: + # backup_encryption_enabled + # backup_encryption_passphrase + vars_files: + - host_vars/backup-restore/default.yml + + vars: + password_change: false + openbao_encrypt: "{{ backup_encryption_enabled|bool }}" + encrypt_openbao_secret: "{{ backup_encryption_passphrase | default('') }}" + openbao_mode: "restore" + op_mode: "standalone" + + roles: + - role: common/prepare-env + - role: openbao/prepare_env + - role: openbao/openbao_restore diff --git a/playbookconfig/src/playbooks/roles/backup/backup-system/tasks/main.yml b/playbookconfig/src/playbooks/roles/backup/backup-system/tasks/main.yml index 185b65d6f..60bf0136d 100644 --- a/playbookconfig/src/playbooks/roles/backup/backup-system/tasks/main.yml +++ b/playbookconfig/src/playbooks/roles/backup/backup-system/tasks/main.yml @@ -1,6 +1,6 @@ --- # -# Copyright (c) 2019-2024 Wind River Systems, Inc. +# Copyright (c) 2019-2025 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -18,6 +18,8 @@ should_use_old_image_backup: "{{ backup_user_images|bool == true }}" include_hc_vault: "{{ backup_hc_vault | bool }}" omit_hc_vault: false + include_openbao: "{{ backup_openbao | bool }}" + omit_openbao: false # The feature is enabled by backup_encryption_enabled variable. # The backup tarballs are included in the feature by @@ -45,6 +47,11 @@ set_fact: hc_vault_tarball_encrypted: true when: '"hc_vault" in backup_encyption_include' + + - name: Set Openbao tar encryption enabled + set_fact: + openbao_tarball_encrypted: true + when: '"openbao" in backup_encyption_include' when: backup_encryption_enabled|bool - name: Do StarlingX backup @@ -148,6 +155,61 @@ when: vault_system_health.rc != 0 when: include_hc_vault | bool + - name: Check openbao status + block: + - name: Check if openbao is applied + shell: | + source /etc/platform/openrc + system application-show openbao --format value --column status + register: openbao_applied_exists + + - name: Omit openbao if status is empty or uploaded + set_fact: + include_openbao: false + omit_openbao: true + when: >- + openbao_applied_exists.stdout | length == 0 or + openbao_applied_exists.stdout == "uploaded" + + - name: Fail openbao if status is not applied + fail: + msg: "Openbao application is {{ openbao_applied_exists.stdout }}, not applied." + when: openbao_applied_exists.stdout != "applied" + when: include_openbao | bool + + - name: Indicate if openbao is omitted from status check + debug: + msg: "Openbao backup will be omitted because openbao application is not applied." + when: omit_openbao | bool + + - name: Openbao precheck + block: + - name: Find openbao manager pod + shell: >- + kubectl get pods -n openbao | grep "openbao-manager" | cut -d " " -f 1 + register: openbao_manager_pod_name + environment: + KUBECONFIG: /etc/kubernetes/admin.conf + + - name: Fail if openbao manager pod is not found + fail: + msg: "Openbao manager pod is not found" + when: openbao_manager_pod_name.stdout | length == 0 + + - name: Check openbao system health + shell: >- + kubectl exec -n "openbao" "{{ openbao_manager_pod_name.stdout }}" -- + bash -c "source /opt/script/init.sh; snapshotPreCheck" 2>&1 + register: openbao_system_health + environment: + KUBECONFIG: /etc/kubernetes/admin.conf + + - name: Fail if openbao health check returns error + fail: + msg: "Openbao system health check returned error" + when: openbao_system_health.rc != 0 + when: include_openbao | bool + - name: Send application lifecycle notifications for pre-backup semantic check command: /usr/bin/sysinv-utils notify backup-semantic-check register: backup_semantic_check_notification_result @@ -234,6 +296,13 @@ register: hc_vault_dir when: include_hc_vault | bool + - name: Create openbao temp dir + file: + path: "{{ tempdir.path }}/openbao_dir" + state: directory + register: openbao_dir + when: include_openbao | bool + - name: Backup roles, table spaces and schemas for databases. shell: >- sudo -u postgres pg_dumpall @@ -649,6 +718,33 @@ when: hc_vault_backup_result.matched != 2 when: include_hc_vault | bool + # Openbao snapshot should be taken before the backup of etcd database. + # A k8s secret is created that is associated with the snapshot. + - name: Run Openbao backup + block: + - name: Include openbao backup role + include_role: + name: openbao/openbao_backup + vars: + openbao_backup_dir: "{{ openbao_dir.path }}" + openbao_encrypt: "{{ openbao_tarball_encrypted|bool }}" + encrypt_openbao_secret: "{{ backup_encryption_passphrase }}" + op_mode: "platform" + + - name: Find result files + find: + paths: "{{ openbao_dir.path }}" + patterns: "openbao-snapshot-*.tar*" + register: openbao_backup_result + + - name: Fail if incorrect number of file created from openbao backup + fail: + msg: > + There was an error with the openbao backup process. + Incorrect number of files produced. + when: openbao_backup_result.matched != 2 + when: include_openbao | bool + - name: Create etcd snapshot temp dir file: path: "{{ tempdir.path }}/etcd-snapshot" @@ -741,6 +837,7 @@ openstack_backup_file: "{{ openstack_backup_filename_prefix }}_{{ backup_timestamp }}.tgz" dc_vault_backup_file: "{{ dc_vault_backup_filename_prefix }}_{{ backup_timestamp }}.tgz" hc_vault_backup_file: "{{ hc_vault_backup_filename_prefix }}_{{ backup_timestamp }}.tgz" + openbao_backup_file: "{{ openbao_backup_filename_prefix }}_{{ backup_timestamp }}.tgz" - name: Set backup files absolute path set_fact: @@ -749,6 +846,7 @@ openstack_backup_file_path: "{{ backup_dir }}/{{ openstack_backup_file }}" dc_vault_backup_file_path: "{{ backup_dir }}/{{ dc_vault_backup_file }}" hc_vault_backup_file_path: "{{ backup_dir }}/{{ hc_vault_backup_file }}" + openbao_backup_file_path: "{{ backup_dir }}/{{ openbao_backup_file }}" - name: Save user uploaded images from local registry to an archive include_tasks: export-user-local-registry-images.yml @@ -866,6 +964,24 @@ failed_when: tar_cmd.rc >= 2 or tar_cmd.rc < 0 when: include_hc_vault | bool + - name: Create a tgz archive for openbao backup + shell: >- + tar + --use-compress-program={{ compress_program }} + --exclude {{ exclude_targets | map('regex_replace', '^/', '') + | list | join(' --exclude ') }} + -cf {{ openbao_backup_file_path }} + $(ls -d + {{ openbao_dir.path }} + 2> /dev/null) + args: + warn: false + # Changing the failed_when behavior to prevent the backup to fail on "file changed as we read it", which + # makes tar return 1 + register: tar_cmd + failed_when: tar_cmd.rc >= 2 or tar_cmd.rc < 0 + when: include_openbao | bool + - name: Set default backup files absolute path set_fact: platform_backup_file_path_final: "{{ platform_backup_file_path }}" @@ -924,6 +1040,14 @@ when: include_hc_vault | bool no_log: true + - name: Transfer for openbao backup tar files to the local machine if it exists + fetch: + src: "{{ openbao_backup_file_path }}" + dest: "{{ host_backup_dir }}/" + flat: yes + when: include_openbao | bool + no_log: true + # TODO transfer docker image archive which may be very big during remote play. # Fetch module fills the memory and has a very slow transfer rate due to base64 encoding # Maybe use synchronize module after upgrading ansible, backup-restore/transfer-file diff --git a/playbookconfig/src/playbooks/roles/openbao/openbao_backup/files/openbao_snapshot.sh b/playbookconfig/src/playbooks/roles/openbao/openbao_backup/files/openbao_snapshot.sh new file mode 100644 index 000000000..b3033da91 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/openbao/openbao_backup/files/openbao_snapshot.sh @@ -0,0 +1,264 @@ +#!/bin/bash + +# Script to take a snapshot of the openbao + +### +# Globals +# + +NAME="$( basename $0 )" + +KUBECMD="kubectl" +SCRIPT="source /opt/script/init.sh" +MAXATTEMPTS=10 +GPGSLEEP=6 + +K8S_SECRET_PREFIX="snapshot-metadata" +OPENBAO_NS="openbao" +MANAGER_PREFIX="stx-openbao-manager" + +# get openbao manager pod +JSONPATH='{range .items[*]}{.metadata.name}{"\n"}{end}' +POD="$( $KUBECMD get pods -n "$OPENBAO_NS" -o jsonpath="$JSONPATH" \ + | grep "^$MANAGER_PREFIX" )" + +if [ -z "$POD" ]; then + echo "Openbao manager not found" >&2 + exit 1 +fi + +### +# Functions +# + +function usage { + echo -e "Usage: \n" \ + "\n" \ + "$NAME [--encrypt ]\n" \ + "\n" \ + "All parameters are positional:\n" \ + " output_dir: required, location to output snapshot tarball\n" \ + " --encrypt: optional\n" \ + " variable: required if --encrypt is specified, the name\n" \ + " of a variable containing a secret with which\n" \ + " encrypt the snapshot\n" >&2 + +} + +# Exit with the specified code after unpausing the openbao manager +function unpause_exit { + local toreturn="$1" + + # don't worry about the result + kubectl exec -n "$OPENBAO_NS" "$POD" -- \ + bash -c "${SCRIPT}; rm \"\${PAUSEFILE}\"" + + exit $toreturn +} + +# The stdout is a tarball +function get_snapshot { + kubectl exec -n "$OPENBAO_NS" "$POD" -- \ + bash -c "${SCRIPT}; snapshotCreate" +} + +# Intended for deleting the fifo files +function cleanup { + rm $2 2>/dev/null + rmdir $1 2>/dev/null +} + +# Retrieve a snapshot for the openbao, using openbao-manager's code, and +# encrypt the file using the user-supplied passphrase +# +# The snapshot is received as stdin from openbao-manager, whereas the +# passphrase is provided to gpg via fifo file. +function get_encrypted_snapshot { + local secret="$1" + local outf="$2" + local tmpf + local tmpd + local gpgpid + local attempts + local result + + tmpd="$( mktemp -d )" + tmpf="${tmpd}/.snapshot" + + # try our best to make sure the fifo file is deleted. + trap "cleanup $tmpd $tmpf" SIGTERM + trap "cleanup $tmpd $tmpf" SIGINT + trap "cleanup $tmpd $tmpf" EXIT + trap "cleanup $tmpd $tmpf" RETURN + + mkfifo -m 600 "$tmpf" + + # run gpg in the background, waiting for passphrase on fifo file + get_snapshot \ + | gpg --symmetric \ + --output="$outf" \ + --passphrase-file "$tmpf" \ + --batch \ + --pinentry-mode loopback \ + /dev/stdin & + + gpgpid=$! + + echo -n "${!secret}" > "$tmpf" + + # wait for gpgpid + attempts=0 + while [ "$attempts" -lt "$MAXATTEMPTS" ]; do + ps -p $gpgpid >/dev/null 2>&1 + if [ $? -ne 0 ]; then + break + fi + attempts=$(( attempts + 1 )) + sleep $GPGSLEEP + done + + if [ "$attempts" -ge "$MAXATTEMPTS" ]; then + echo "failed to wait for gpg" >&2 + kill $gpgpid + + return 1 + fi + + wait $gpgpid + result=$? + + # don't leave a passphrase laying around, in case the fifo + # was unread + rm -r "$tmpd" 2>/dev/null >/dev/null + + return $result +} + +# Use mktemp to get a random string and test to see if a k8s secret +# already exists with that suffix within the openbao namespace +# +# Try a few times before giving up; unpause the openbao-manager and +# exit on failure. +# +# Return the random string via stdout +function get_unique_string { + local attempts + local rndtmp + local secret + local secrets + + # the loop below runs really fast, ready the secret names + # once should be fine + secrets="$( kubectl get secrets -n "$OPENBAO_NS" \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' \ + | grep "^${K8S_SECRET_PREFIX}" )" + + attempts=0 + while [ "$attempts" -lt "$MAXATTEMPTS" ]; do + rndtmp="$( mktemp --dry-run \ + | cut -f 2 -d'.' \ + | tr '[:upper:]' '[:lower:]' )" + secret="${K8S_SECRET_PREFIX}-$rndtmp" + if [[ " $secrets " != *"$secret"* ]]; then + break + fi + attempts=$(( attempts + 1 )) + done + + if [ "$attempts" -ge "$MAXATTEMPTS" ]; then + echo "Failed to get a unique string for the snapshot" >&2 + unpause_exit 1 + fi + + echo -n "$rndtmp" +} + + +### +# Main +# + +OUTPUTDIR="$1" +ENCRYPT="$2" +SECRET="$3" + +if [ -z "$OUTPUTDIR" -o ! -d "$OUTPUTDIR" ]; then + echo "Non-existing output directory: [$OUTPUTDIR]" >&2 + usage + exit 1 +fi +if [ -n "$ENCRYPT" ]; then + if [ ! "$ENCRYPT" = "--encrypt" ]; then + echo "Unrecognized parameter: [$ENCRYPT]" >&2 + usage + exit 1 + elif [ -z "$SECRET" ]; then + echo "Required variable name when --encrypt is used" >&2 + usage + exit 1 + elif [ -z "${!SECRET}" ]; then + echo "Required secret when --encrypt is used" \ + "(is '$SECRET' variable exported?)" >&2 + usage + exit 1 + fi +fi + +# Pause openbao manager +logs="$( kubectl exec -n "$OPENBAO_NS" "$POD" -- \ + bash -c "${SCRIPT}; touch \"\${PAUSEFILE}\"" 2>&1 )" +if [ $? -ne 0 ]; then + echo "Failed to pause openbao-manager: [$logs]" >&2 + exit 1 +fi + +# ensure that openbao is in a good state for taking the snapshot +logs="$( kubectl exec -n "$OPENBAO_NS" "$POD" -- \ + bash -c "${SCRIPT}; snapshotPreCheck" 2>&1 )" +if [ $? -ne 0 ]; then + echo "$logs" >&2 + unpause_exit 1 +fi + +rndtmp="$( get_unique_string )" +secret="${K8S_SECRET_PREFIX}-$rndtmp" +fname="${OUTPUTDIR}/openbao-snapshot-${rndtmp}.tar" +metaf="${fname}.metadata" + +# get the snapshot +if [ "$ENCRYPT" == "--encrypt" ]; then + encrypted=true + get_encrypted_snapshot "$SECRET" "$fname" + if [ $? -ne 0 ]; then + unpause_exit 1 + fi +else + encrypted=false + get_snapshot > "$fname" + if [ $? -ne 0 ]; then + unpause_exit 1 + fi +fi + +# Prepare metadata file. This procedure only uses 'secret', +# but I'm sure the other information will be useful to humans +sum="$( sha256sum "$fname" | cut -f 1 -d' ' )" +now="$( date )" + +metadata="{\"date\":\"$now\", + \"snapshot_sum\":\"$sum\", + \"secret\":\"$secret\", + \"user_encrypted\":\"$encrypted\"}" + +echo "$metadata" > "${metaf}" + +# write the metadata to k8s secret, along with the shards +# associated with the snapshot +kubectl exec -n "$OPENBAO_NS" "$POD" -- \ + bash -c "${SCRIPT}; snapshotSetSecret '$secret' '$metadata'" +if [ $? -ne 0 ]; then + echo "Failed to set k8s secret for snapshot" >&2 + unpause_exit 1 +fi + +unpause_exit 0 diff --git a/playbookconfig/src/playbooks/roles/openbao/openbao_backup/tasks/main.yml b/playbookconfig/src/playbooks/roles/openbao/openbao_backup/tasks/main.yml new file mode 100644 index 000000000..2a70d64c2 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/openbao/openbao_backup/tasks/main.yml @@ -0,0 +1,114 @@ +--- +# +# Copyright (c) 2025 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +- name: Check openbao system health + shell: >- + kubectl exec -n "openbao" "{{ openbao_manager_pod_name.stdout }}" -- + bash -c "source /opt/script/init.sh; snapshotPreCheck" 2>&1 + register: openbao_system_health + environment: + KUBECONFIG: /etc/kubernetes/admin.conf + failed_when: openbao_system_health.rc != 0 + +- name: Create openbao snapshot + block: + - name: Create openbao snapshot with default encryption + script: openbao_snapshot.sh {{ openbao_backup_dir }} + when: not openbao_encrypt + register: openbao_snapshot_script + failed_when: openbao_snapshot_script.rc != 0 + + - name: Create openbao snapshot with custom encryption + script: openbao_snapshot.sh {{ openbao_backup_dir }} '--encrypt' "custom_var" + when: openbao_encrypt + register: openbao_snapshot_script + failed_when: openbao_snapshot_script.rc != 0 + environment: + KUBECONFIG: /etc/kubernetes/admin.conf + custom_var: "{{ encrypt_openbao_secret }}" + always: + - name: Unpause openbao manager + shell: >- + kubectl exec -n "openbao" "{{ openbao_manager_pod_name.stdout }}" -- + bash -c "source /opt/script/init.sh; if [ -f $PAUSEFILE ]; then rm -f $PAUSEFILE; fi" 2>&1 + rescue: + - name: Clean up openbao subdir if in standalone mode + file: + path: "{{ openbao_backup_dir }}" + state: absent + when: op_mode == "standalone" + + +- name: Package openbao if running in standalone mode + block: + - name: Check if pigz package is installed + block: + + - name: Issue command to pkg manager + command: "{{ 'rpm -q' if os_release == 'centos' else 'dpkg -l' }} pigz" + args: + warn: false + failed_when: false + register: check + + - set_fact: + pigz_check: "{{ 'succeeded' if check.rc == 0 else 'failed' }}" + + when: os_release in ["centos", "debian"] + + - name: Check if pigz package is installed + package: + name: pigz + state: present + check_mode: true + register: pigz_check + when: os_release not in ["centos", "debian"] + + - name: Check number of platform cores + shell: | + source /etc/platform/openrc + system host-cpu-list $(hostname) --nowrap | grep " Platform " | wc -l + register: num_platform_cores + + - name: Set compress program for backup tarball + set_fact: + compress_program: >- + "{{ 'pigz' if num_platform_cores.stdout | int >= 4 and + pigz_check is succeeded else 'gzip' }}" + + - name: Use current timestamp as backups timestamp + set_fact: + backup_timestamp: "{{ lookup('pipe', 'date +%Y_%m_%d_%H_%M_%S') }}" + + - name: Attach timestamp to backups filename + set_fact: + openbao_backup_file: "{{ openbao_backup_filename_prefix }}_{{ backup_timestamp }}.tgz" + + - name: Set backup files absolute path + set_fact: + openbao_backup_file_path: "{{ initial_backup_dir }}/{{ openbao_backup_file }}" + + - name: Create a tgz archive for Hashicorp openbao backup + shell: >- + tar + --use-compress-program={{ compress_program }} + -cf {{ openbao_backup_file_path }} + $(ls -d + {{ openbao_backup_dir }} + 2> /dev/null) + args: + warn: false + # Changing the failed_when behavior to prevent the backup to fail on "file changed as we read it", which + # makes tar return 1 + register: tar_cmd + failed_when: tar_cmd.rc >= 2 or tar_cmd.rc < 0 + + - name: Cleanup openbao subdir + file: + path: "{{ openbao_backup_dir }}" + state: absent + when: op_mode == "standalone" diff --git a/playbookconfig/src/playbooks/roles/openbao/openbao_restore/files/openbao_restore.sh b/playbookconfig/src/playbooks/roles/openbao/openbao_restore/files/openbao_restore.sh new file mode 100644 index 000000000..167171974 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/openbao/openbao_restore/files/openbao_restore.sh @@ -0,0 +1,151 @@ +#!/bin/bash + +# Script to restore a snapshot to the openbao + +### +# Globals +# + +NAME="$( basename $0 )" + +KUBECMD="kubectl" +SCRIPT="source /opt/script/init.sh" + +OPENBAO_NS="openbao" +MANAGER_PREFIX="stx-openbao-manager" + +# get openbao manager pod +JSONPATH='{range .items[*]}{.metadata.name}{"\n"}{end}' +POD="$( $KUBECMD get pods -n "$OPENBAO_NS" -o jsonpath="$JSONPATH" \ + | grep "^$MANAGER_PREFIX" )" + +if [ -z "$POD" ]; then + echo "Openbao manager not found" >&2 + exit 1 +fi + +### +# Functions +# + +function usage { + echo -e "Usage: \n" \ + "\n" \ + "$NAME [--decrypt ]\n" \ + "\n" \ + "All parameters are positional:\n" \ + " input_file: required, snapshot file to restore from\n" \ + " --decrypt: optional\n" \ + " variable: required if --decrypt is specified, the name\n" \ + " of a variable containing a secret with which\n" \ + " decrypt the snapshot file\n" >&2 +} + +# Exit with the specified code after unpausing the openbao manager +function unpause_exit { + local toreturn="$1" + + # don't worry about the result + kubectl exec -n "$OPENBAO_NS" "$POD" -- \ + bash -c "${SCRIPT}; rm \"\${PAUSEFILE}\"" + + exit $toreturn +} + + +### +# Main +# + +INPUTFILE="$1" +DECRYPT="$2" +SECRET="$3" + +if [ -z "$INPUTFILE" -o ! -f "$INPUTFILE" ]; then + echo "Non-existing snapshot file: [$INPUTFILE]" >&2 + usage + exit 1 +fi + +if [ -n "$DECRYPT" ]; then + if [ ! "$DECRYPT" = "--decrypt" ]; then + echo "Unrecognized parameter: [$DECRYPT]" >&2 + usage + exit 1 + elif [ -z "$SECRET" ]; then + echo "Required variable name when --decrypt is used" >&2 + usage + exit 1 + elif [ -z "${!SECRET}" ]; then + echo "Required secret when --decrypt is used" \ + "(is '$SECRET' variable exported?)" >&2 + usage + exit 1 + fi +fi + + +# get the metadata, and snapshot secret associated with the snapshot +# file. This is expected to be in the same directory as the snapshot +METADATAF="${INPUTFILE}.metadata" +if [ ! -f "$METADATAF" ]; then + echo "The metadata file associated with snapshot file" \ + "$INPUTFILE is not found: $METADATAF" >&2 + exit 1 +fi + +# openbao manager code will do more sanity on the json, make sure +# at least that it is not empty +METADATA="$( cat "$METADATAF" )" +if [ -z "$METADATA" ]; then + echo "The metadata should at least contain:" \ + '{"secret":"name_of_k8s_secret"}' >&2 + exit 1 +fi + +# Pause openbao manager +logs="$( kubectl exec -n "$OPENBAO_NS" "$POD" -- \ + bash -c "${SCRIPT}; touch \"\${PAUSEFILE}\"" 2>&1 )" +if [ $? -ne 0 ]; then + echo "Failed to pause openbao-manager: [$logs]" >&2 + exit 1 +fi + +# ensure that openbao is in a good state for restoring the snapshot +logs="$( kubectl exec -n "$OPENBAO_NS" "$POD" -- \ + bash -c "${SCRIPT}; snapshotPreCheck" 2>&1 )" +if [ $? -ne 0 ]; then + echo "$logs" >&2 + unpause_exit 1 +fi + +# restore the snapshot +if [ "$DECRYPT" == "--decrypt" ]; then + logs="$( echo "${!SECRET}" \ + | gpg --no-symkey-cache \ + -q \ + --batch \ + --passphrase-fd 0 \ + --decrypt "$INPUTFILE" \ + | kubectl exec -n "$OPENBAO_NS" "$POD" -i -- \ + bash -c "${SCRIPT}; \ + snapshotRestore '$METADATA'" )" + + if [ $? -ne 0 ]; then + echo "Failed to restore snapshot: [$logs]" >&2 + unpause_exit 1 + fi +else + logs="$( cat "$INPUTFILE" \ + | kubectl exec -n "$OPENBAO_NS" "$POD" -i -- \ + bash -c "${SCRIPT}; \ + snapshotRestore '$METADATA'" )" + + if [ $? -ne 0 ]; then + echo "Failed to restore snapshot: [$logs]" >&2 + unpause_exit 1 + fi +fi + +echo "Snapshot restore complete." >&2 +unpause_exit 0 diff --git a/playbookconfig/src/playbooks/roles/openbao/openbao_restore/tasks/main.yml b/playbookconfig/src/playbooks/roles/openbao/openbao_restore/tasks/main.yml new file mode 100644 index 000000000..c2995e1dd --- /dev/null +++ b/playbookconfig/src/playbooks/roles/openbao/openbao_restore/tasks/main.yml @@ -0,0 +1,66 @@ +--- +# +# Copyright (c) 2025 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +- name: Unpackage the backup tarball + command: >- + tar --use-compress-program=pigz -C {{ openbao_backup_dir }} -xpf {{ backup_filepath }} + --wildcards --transform='s,.*/,,' + args: + warn: false + become: yes + +- name: Find the snapshot file + command: >- + find {{ openbao_backup_dir }} -name "openbao-snapshot-*.tar" + register: backup_snapshot_file + become: yes + +- name: Fail if snapshot file was not found + fail: + msg: "Backup snapshot was not found in {{ backup_filepath }}" + when: backup_snapshot_file.stdout | length == 0 + +- name: Change snapshot file permissions + file: + path: "{{ backup_snapshot_file.stdout }}" + mode: 0755 + become: yes + +- name: Find openbao manager pod + shell: >- + kubectl get pods -n openbao | grep "openbao-manager" | cut -d " " -f 1 + register: openbao_manager_pod_name + environment: + KUBECONFIG: /etc/kubernetes/admin.conf + +# call openbao_restore.sh +- name: Restore openbao from the snapshot + block: + - name: Restore openbao snapshot with default encryption + script: openbao_restore.sh {{ backup_snapshot_file.stdout }} + when: not openbao_encrypt + register: openbao_restore_script + failed_when: openbao_restore_script.rc != 0 + + - name: Restore openbao snapshot with custom encryption + script: openbao_restore.sh {{ backup_snapshot_file.stdout }} '--decrypt' "custom_var" + when: openbao_encrypt + register: openbao_restore_script + failed_when: openbao_restore_script.rc != 0 + environment: + KUBECONFIG: /etc/kubernetes/admin.conf + custom_var: "{{ encrypt_openbao_secret }}" + always: + - name: Unpause openbao manager + shell: >- + kubectl exec -n "openbao" "{{ openbao_manager_pod_name.stdout }}" -- + bash -c "source /opt/script/init.sh; if [ -f $PAUSEFILE ]; then rm -f $PAUSEFILE; fi" 2>&1 + - name: Clean up openbao subdir + file: + path: "{{ openbao_backup_dir }}" + state: absent + become: yes diff --git a/playbookconfig/src/playbooks/roles/openbao/prepare_env/files/validate_recover_openbao.sh b/playbookconfig/src/playbooks/roles/openbao/prepare_env/files/validate_recover_openbao.sh new file mode 100644 index 000000000..254811d50 --- /dev/null +++ b/playbookconfig/src/playbooks/roles/openbao/prepare_env/files/validate_recover_openbao.sh @@ -0,0 +1,448 @@ +#!/bin/bash +# +# Copyright (c) 2025 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# The unsealed state of all openbao server pods is required for the openbao +# snapshot restore procedure. +# +# Under normal circumstances the openbao restore procedure does not +# require the user to put the openbao application into the required state. +# This script attempts to put the openbao server pods into an unsealed +# state - this includes deleting PVCs and shard secrets. +# +# The script ends by verifying the required state, or failing. + +OPENBAO_NS="openbao" +OPENBAO_REAPPLIED=false +APP_TAR_PATH="/usr/local/share/applications/helm" + +# List of pauses +# app upload: +# 60s == OPENBAO_UPLOAD_TRIES @ OPENBAO_UPLOAD_SLEEP intervals +# app abort: +# 120s == OPENBAO_ABORT_TRIES @ OPENBAO_ABORT_SLEEP intervals +# app remove: +# 60s == OPENBAO_REMOVE_TRIES @ OPENBAO_REAPPLY_WAITTIME intervals +# PVC delete: +# 120s == PVC_DELETE_TRIES @ OPENBAO_REAPPLY_WAITTIME intervals +# cluster-key delete: +# 60s == CLUSTER_KEY_DELETE_TRIES @ OPENBAO_REAPPLY_WAITTIME intervals +# app apply: +# 300s == OPENBAO_APPLY_TRIES @ OPENBAO_REAPPLY_WAITTIME intervals +# post apply wait time: +# 30s == OPENBAO_UNSEAL_WAITTIME +# unseal per pod: +# 60s == SEALED_STATUS_TRIES @ SEALED_STATUS_WAITTIME intervals + +# Number of tries for each action +MAIN_TRIES=2 +SEALED_STATUS_TRIES=6 +OPENBAO_REMOVE_TRIES=5 +PVC_DELETE_TRIES=12 +CLUSTER_KEY_DELETE_TRIES=6 +OPENBAO_APPLY_TRIES=30 +OPENBAO_UPLOAD_TRIES=12 +OPENBAO_ABORT_TRIES=24 + +# Wait times +SEALED_STATUS_WAITTIME=10 +OPENBAO_REAPPLY_WAITTIME=10 +OPENBAO_UNSEAL_WAITTIME=30 +OPENBAO_UPLOAD_SLEEP=5 +OPENBAO_ABORT_SLEEP=5 + +# variables for interpreting application state +# These states are handled by reapplyOpenbao(): +APP_STATES="uploading uploaded removing applying applied apply-failed" +REGEX_DELETED="application not found: openbao" +REGEX_NORESOURCES="No resources found in openbao namespace." +APP_STATUS_DEBUG="" + +# Generic instruction to the user +GENERIC_INSTRUCTION="$( echo "Resolve the application/platform status" \ + "before running the restore procedure again." )" + +# Function to get the application status, insert custom states for +# "deleted" (not-uploaded), and "unknown" for application states this +# script does not address +function getOpenbaoStatus { + local status + local result + + # capture both stdout and stderr; When the application is not + # uploaded then the stderr indicates this response + status="$( system application-show openbao \ + --format value --column status 2>&1 )" + result="$?" + APP_STATUS_DEBUG="$status" + if [ "$result" -ne 0 ]; then + if [[ "$status" == *"$REGEX_DELETED"* ]]; then + status="deleted" + fi + fi + if [[ " $APP_STATES deleted " != *" ${status// /_} "* ]]; then + status="unknown" + fi + + echo "$status" +} + +function uploadopenbao { + local status="$1" + local count=1 + local uploaded + + # The platform may upload the application. Ignore a failed result + # for application-upload + if [ "$status" == "deleted" ]; then + system application-upload "$APP_TAR_PATH"/openbao*.tgz + fi + + # A small wait before checking the upload status. + # Start counting at 1 to get OPENBAO_UPLOAD_TRIES sleeps total + sleep $OPENBAO_UPLOAD_SLEEP + while [ "$count" -lt "$OPENBAO_UPLOAD_TRIES" ]; do + uploaded="$( getOpenbaoStatus )" + echo "openbao application status: $uploaded" + if [ "$uploaded" == "uploaded" ]; then + break; + elif [ "$uploaded" == "deleted" ]; then + true # pass, the platform is sloooow today + elif [ "$uploaded" != "uploading" ]; then + # invoke the failure path + count="$OPENBAO_UPLOAD_TRIES" + break + fi + + count="$(( count + 1 ))" + sleep $OPENBAO_UPLOAD_SLEEP + done + + if [ "$count" -ge "$OPENBAO_UPLOAD_TRIES" ]; then + echo "Failed to upload openbao in" \ + "$(( $OPENBAO_UPLOAD_TRIES * $OPENBAO_UPLOAD_SLEEP ))s." \ + "$GENERIC_INSTRUCTION" + echo "Application status: [$APP_STATUS_DEBUG]" + exit 1 + fi + + echo "Application uploaded." +} + +function abortOpenbao { + local count=0 + local aborted + + # "applying" was the trigger state for this function. + # Expect: applying, applied, apply-failed + # And ignore the result of system application-abort + system application-abort openbao + + # Normally the abort will happen promptly, such as when the app was + # applying for some time already. A short initial sleep is + # not required. But when running application-apply and + # application-abort in quick succession the actual time is observed + # at 60s typical for that case. + while [ "$count" -lt "$OPENBAO_ABORT_TRIES" ]; do + aborted="$( getOpenbaoStatus )" + echo "openbao application status: $aborted" + if [ "$aborted" == "apply-failed" ]; then + # either interpretation of apply-failed is ok + break; + elif [ "$aborted" == "applying" ]; then + true # pass, abort can take a while + elif [ "$aborted" == "applied" ]; then + # race condition probably between seeing 'applying' and + # running application-abort + break + else + # invoke the failure path + count="$OPENBAO_ABORT_TRIES" + break; + fi + + count="$(( count + 1 ))" + sleep $OPENBAO_ABORT_SLEEP + done + + if [ "$count" -ge "$OPENBAO_ABORT_TRIES" ]; then + echo "Failed to abort apply of openbao app within" \ + "$(( $OPENBAO_ABORT_TRIES * $OPENBAO_ABORT_SLEEP ))s." \ + "$GENERIC_INSTRUCTION" + echo "Application status: [$APP_STATUS_DEBUG]" + exit 1 + fi + + echo "Application apply aborted." +} + +# Function to clean openbao and reapply. +function reapplyOpenbao { + local state + local tries + local remainingPVC + local deleteSecrets + local key + local keyDelete + local remaining + local pods + + if $OPENBAO_REAPPLIED; then + echo "openbao reapply already tried. Previous apply likely failed." + return 1 + fi + + # Do not try to fix openbao more than once + OPENBAO_REAPPLIED=true + + state="$( getOpenbaoStatus )" + echo "openbao application status: $state" + if [[ " deleted uploading " == *" $state "* ]]; then + # exits on failure; else the state is "uploaded" + uploadopenbao $state + state="$( getOpenbaoStatus )" + echo "openbao application status: $state" + elif [ "$state" == "applying" ]; then + # Handle this abortable state without giving the app the benefit + # of the doubt: during restore we anticipate that the + # application may be waiting for openbao server pods that cannot + # unseal. + # + # exits on failure; else the state is "uploaded", or possibly + # the state is "applied" due to race + abortOpenbao + state="$( getOpenbaoStatus )" + echo "openbao application status: $state" + fi + + if [[ " applied apply-failed " == *" $state "* ]]; then + system application-remove openbao + fi + + # Seeing the 'removing' status from a previous operation is + # unlikely, as in practice system application-show does not run fast + # enough to catch it. But it should be accounted for. + if [[ " applied apply-failed removing " == *" $state "* ]]; then + for tries in $(seq $OPENBAO_REMOVE_TRIES); do + sleep $OPENBAO_REAPPLY_WAITTIME + state="$( getOpenbaoStatus )" + echo "openbao application status: $state" + if [[ "$state" == "uploaded" ]]; then + echo "openbao remove completed" + break + fi + done + + # state is updated within the loop + fi + # also wait for pods to be removed, for example: before trying + # to delete the persistentvolumeclaims + # but ignore if there are still pods + for tries in $(seq $OPENBAO_REMOVE_TRIES); do + sleep $OPENBAO_REAPPLY_WAITTIME + pods="$( kubectl get pods -n $OPENBAO_NS 2>&1 )" + if [ "$pods" == "$REGEX_NORESOURCES" ]; then + break; + fi + done + + # the state of the application should be "uploaded" + if [ "$state" != "uploaded" ]; then + # Other states that we're not handling include: missing, + # upload-failed, remove-failed, updating, recovering + # restore-requested + echo "Failed to put the openbao application into uploaded state." \ + "$GENERIC_INSTRUCTION" \ + "Application status: $state [$APP_STATUS_DEBUG]" + exit 1 + fi + + # remove PVC resource + kubectl delete pvc -n $OPENBAO_NS --all + remainingPVC=-1 + for tries in $(seq $PVC_DELETE_TRIES); do + sleep $OPENBAO_REAPPLY_WAITTIME + remainingPVC="$(kubectl get pvc -n $OPENBAO_NS \ + --no-headers=true | wc -l)" + if [[ $remainingPVC -eq 0 ]]; then + echo "openbao PVC removal completed" + break + fi + done + if [[ $remainingPVC -ne 0 ]]; then + echo "remove pvc resource failed" + return 1 + fi + + # remove openbao cluster-key and the root CA secrets + deleteSecrets="$( kubectl get secrets -n $OPENBAO_NS \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' \ + | grep '^cluster-key\|^openbao-ca$' )" + for key in $deleteSecrets; do + kubectl delete secret -n $OPENBAO_NS "$key" + keyDelete=$? + if [[ $keyDelete -ne 0 ]]; then + echo "kubectl-delete-secret returned error" + return 1 + fi + done + remaining=-1 + for tries in $(seq $CLUSTER_KEY_DELETE_TRIES); do + sleep $OPENBAO_REAPPLY_WAITTIME + remaining="$( kubectl get secrets -n $OPENBAO_NS \ + -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' \ + | grep '^cluster-key\|^openbao-ca$' | wc -l )" + if [[ remaining -eq 0 ]]; then + echo "openbao secret removal completed" + break + fi + done + if [[ $remaining -ne 0 ]]; then + echo "remove secrets failed" + return 1 + fi + + # application-apply + system application-apply openbao + for tries in $(seq $OPENBAO_APPLY_TRIES); do + sleep $OPENBAO_REAPPLY_WAITTIME + state="$( getOpenbaoStatus )" + echo "openbao application status: $state" + if [[ "$state" == "applied" ]]; then + echo "openbao apply completed" + break + fi + done + if [[ "$state" != "applied" ]]; then + echo "openbao Reapply: application-apply failed" + return 1 + fi + + # The openbao server pods remain in unready state until the server is + # unsealed due to using the healthz endpoint for pod readiness + # probe. The openbao application remains in applying status until the + # first openbao server pod transitions to ready state. + # + # For the case of replicas>1, openbao server unseal validation is done + # in the main + + return 0 +} + +### +# Main +# + +JPATHFULL='{range .items[*]}{.metadata.name}{" "}'\ +'{.metadata.labels.openbao-sealed}{"\n"}{end}' +JPATH="$(printf '%s\n' $JPATHFULL | tr '\n' ' ')" + +echo "Validating openbao status" +source "/etc/platform/openrc" + +for validateTries in $(seq $MAIN_TRIES); do + echo "Attempting validation number $validateTries" + # check if openbao application is applied or applying + rst="$( getOpenbaoStatus )" + echo "openbao application status: $rst" + if [ "$rst" != "applied" -a "$rst" != "applying" ]; then + # if not, run recovery + echo "openbao not applied. Attempting reapply..." + reapplyOpenbao + reapplyOpenbaoRC=$? + if [[ reapplyOpenbaoRC -eq 0 ]]; then + echo "openbao reapply completed. Reattempting validation." + continue + else + echo "openbao reapply failed for trying to" \ + "fix not-applied openbao application." \ + "Unable to ready openbao for restore." + exit 1 + fi + + fi + + # Whether 'applied' or 'applying', we expect to see a running openbao + # server pod. In the applying case, there is a window where the + # applying procedure hasn't gotten that far. Ignore this possibility + # when it comes from outside this procedure - run abort as if the + # app is stuck. + # + # Check if there is a running openbao pod: + numRunningPods="$(kubectl get pods -n $OPENBAO_NS | \ + grep "^stx-openbao-[0-9] " | grep "Running" | wc -l)" + if [[ $numRunningPods -eq 0 ]]; then + # if not, run recovery + echo "No openbao pods are running. Attempting reapply..." + reapplyOpenbao + reapplyOpenbaoRC=$? + if [[ $reapplyOpenbaoRC -eq 0 ]]; then + echo "openbao reapply completed. Reattempting validation." + continue + else + echo "openbao reapply failed for trying to" \ + "fix no running openbao pods." \ + "Unable to ready openbao for restore." + exit 1 + fi + fi + + # Whether applied or applying, in both cases it is possible for a + # openbao server pod to be waiting to be unsealed. Wait upon the + # sealed status of all pods. + sealedPods=0 + prevSealedPods=0 + sealedExists=true + triesCount=$SEALED_STATUS_TRIES + while [[ $triesCount -gt 0 ]]; do + # get number of sealed pods + # When pods are starting they have no seal status (empty + # string). So search for and omit unsealed pods instead. + sealedPods="$( kubectl get pods -n $OPENBAO_NS -o jsonpath="$JPATH" \ + | grep "^stx-openbao-[0-9] " \ + | grep -v "false$" | wc -l )" + + # check if there are no sealed pods, if so mark success and break loop + if [[ $sealedPods -eq 0 ]]; then + sealedExists=false + break + fi + + # if number of sealed pods decreased, reset wait counter + if [[ $sealedPods -lt $prevSealedPods ]]; then + triesCount=$SEALED_STATUS_TRIES + else + triesCount=$(( triesCount - 1 )) + fi + + # wait for pods to unseal + sleep $SEALED_STATUS_WAITTIME + prevSealedPods=$sealedPods + done + + # if there are still sealed pods, attempt reapply + if $sealedExists; then + echo "There are sealed pods. Attempting reapply..." + reapplyOpenbao + reapplyOpenbaoRC=$? + if [[ $reapplyOpenbaoRC -eq 0 ]]; then + echo "openbao reapply completed. Reattempting validation." + continue + else + echo "openbao reapply failed for trying to" \ + "fix sealed openbao pods." \ + "Unable to ready openbao for restore." + exit 1 + fi + fi + + # all test passed. exit + echo "All validation passed. openbao application is ready to be restored." + exit 0 +done + +echo "All tries exhausted. Unable to ready openbao for restore." +exit 1 diff --git a/playbookconfig/src/playbooks/roles/openbao/prepare_env/tasks/main.yml b/playbookconfig/src/playbooks/roles/openbao/prepare_env/tasks/main.yml new file mode 100644 index 000000000..7916b025b --- /dev/null +++ b/playbookconfig/src/playbooks/roles/openbao/prepare_env/tasks/main.yml @@ -0,0 +1,135 @@ +--- +# +# Copyright (c) 2025 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +- name: Set default target where backup tarball inspection takes place + set_fact: + inspection_target: "{{ inventory_hostname }}" + +# Set inspection target to Ansible control machine if the backup tarball +# is off-box. +- name: Update target if backup data are off-box + set_fact: + inspection_target: localhost + when: on_box_data|bool == false + +- name: Set initial_backup_dir if running on target and no value was supplied + set_fact: + initial_backup_dir: /opt/platform-backup + when: + - initial_backup_dir is not defined or initial_backup_dir is none + - on_box_data|bool + +- name: Validate initial_backup_dir exists + stat: + path: "{{ initial_backup_dir }}" + delegate_to: "{{ inspection_target }}" + register: initial_backup_dir_exists + +- name: Fail if initial_backup_dir does not exists + fail: + msg: "Directory initial_backup_dir: {{ initial_backup_dir }} does not exist" + when: not initial_backup_dir_exists.stat.exists + +- name: Set openbao backup directory fact for on box + set_fact: + openbao_backup_dir: "{{ initial_backup_dir }}/openbao" + when: on_box_data|bool == true + +- name: Set openbao backup directory fact for off box + set_fact: + openbao_backup_dir: "{{ target_backup_dir }}/openbao" + when: on_box_data|bool == false + +- name: Fail if passphrase is omitted + fail: + msg: > + A passphrase is required for encryption; set variable override + backup_encryption_passphrase. To disable encryption set + override backup_encryption_enabled=false + when: + - openbao_encrypt|bool + - encrypt_openbao_secret | length == 0 + +- name: Check openbao apply for backup + block: + - name: Check if openbao is applied + shell: | + source /etc/platform/openrc + system application-show openbao --format value --column status + register: openbao_applied_exists + + - name: Fail if openbao is not applied + fail: + msg: "Openbao application is not applied" + when: openbao_applied_exists.stdout != "applied" + when: openbao_mode == "backup" + +- name: Validate openbao health for restore. + block: + - name: Transfer backup tarball to {{ target_backup_dir }} on the target + copy: + src: "{{ initial_backup_dir }}/{{ backup_filename }}" + dest: "{{ target_backup_dir }}" + owner: root + group: root + mode: 0755 + become: yes + when: on_box_data | bool == false + + - name: Set backup file path for on box + set_fact: + backup_filepath: "{{ initial_backup_dir }}/{{ backup_filename }}" + when: on_box_data | bool == true + + - name: Set backup file path for off box + set_fact: + backup_filepath: "{{ target_backup_dir }}/{{ backup_filename }}" + when: on_box_data | bool == false + + - name: Find backup tarball + shell: | + ls {{ backup_filepath }} + register: backup_tarball + + - name: Fail if openbao backup tarball not found + fail: + msg: "Openbao snapshot tarball: {{ backup_filename }} was not found" + when: backup_tarball.stdout | length == 0 + + - name: Run application validation + block: + - name: Validate if openbao application is ready to be restored + script: validate_recover_openbao.sh + environment: + KUBECONFIG: /etc/kubernetes/admin.conf + register: validate_openbao_result + failed_when: validate_openbao_result.rc != 0 + always: + - name: Display openbao validation script output if it exists + debug: + msg: "{{ validate_openbao_result.stdout }}" + when: validate_openbao_result is defined + when: openbao_mode == "restore" + +- name: Find openbao manager pod + shell: >- + kubectl get pods -n openbao | grep "openbao-manager" | cut -d " " -f 1 + register: openbao_manager_pod_name + environment: + KUBECONFIG: /etc/kubernetes/admin.conf + +- name: Fail if openbao manager pod is not found + fail: + msg: "Openbao manager pod is not found" + when: openbao_manager_pod_name.stdout | length == 0 + +- name: Create openbao subdirectory in initial_backup_dir + file: + path: "{{ openbao_backup_dir }}" + state: directory + mode: 0755 + become: yes