(fix) Check sync of only active rack controllers
- The import resources job waits for rack controllers to sync the imported resources. In an environment that rack pods have been scheduled away from a node, it leaves a dead rack controller in the MAAS database. We cannot gate on dead controllers syncing as rackd is no longer running, so now only look at rack controllers with a running rackd. Change-Id: I5ca16a0c97ed201a08844ca7c82c2cbb7d059aa7
This commit is contained in:
parent
0b95ba4d41
commit
c84a5b64de
@ -22,83 +22,121 @@ TRY_LIMIT=${TRY_LIMIT:-1}
|
||||
JOB_TIMEOUT=${JOB_TIMEOUT:-900}
|
||||
RETRY_TIMER=${RETRY_TIMER:-30}
|
||||
|
||||
function start_import {
|
||||
check_for_download
|
||||
function timer {
|
||||
retry_wait=$1
|
||||
shift
|
||||
|
||||
if [[ $? -eq 0 ]]
|
||||
while [[ ${JOB_TIMEOUT} -gt 0 ]]
|
||||
do
|
||||
"$@"
|
||||
rc=$?
|
||||
if [ $rc -eq 0 ]
|
||||
then
|
||||
echo "Already have images, skipping import."
|
||||
return 0
|
||||
return $rc
|
||||
else
|
||||
JOB_TIMEOUT=$(($JOB_TIMEOUT - $retry_wait))
|
||||
sleep $retry_wait
|
||||
fi
|
||||
done
|
||||
|
||||
while [[ ${import_tries} -lt $TRY_LIMIT ]]
|
||||
do
|
||||
import_tries=$(($import_tries + 1))
|
||||
echo "Starting image import try ${import_tries}..."
|
||||
maas ${ADMIN_USERNAME} boot-resources import
|
||||
sleep 30 # Seems MAAS needs time to sync up
|
||||
check_for_download
|
||||
if [[ $? -eq 0 ]]
|
||||
then
|
||||
echo "Image import success!"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
return 124
|
||||
}
|
||||
|
||||
function import_resources {
|
||||
check_for_download
|
||||
rc=$?
|
||||
|
||||
if [ $rc -ne 0 ]
|
||||
then
|
||||
echo "Starting image import try ${import_tries}..."
|
||||
maas ${ADMIN_USERNAME} boot-resources import
|
||||
sleep 30
|
||||
check_for_download
|
||||
rc=$?
|
||||
fi
|
||||
|
||||
return $rc
|
||||
}
|
||||
|
||||
function start_import {
|
||||
timer "$RETRY_TIMER" import_resources
|
||||
}
|
||||
|
||||
function check_for_download {
|
||||
|
||||
while [[ ${JOB_TIMEOUT} -gt 0 ]]; do
|
||||
if maas ${ADMIN_USERNAME} boot-resources is-importing | grep -q 'true';
|
||||
then
|
||||
echo -e '\nBoot resources currently importing\n'
|
||||
let JOB_TIMEOUT-=${RETRY_TIMER}
|
||||
sleep ${RETRY_TIMER}
|
||||
else
|
||||
synced_imgs=$(maas ${ADMIN_USERNAME} boot-resources read | tail -n +1 | jq ".[] | select( .type | contains(\"Synced\")) | .name " | grep -c $MAAS_DEFAULT_DISTRO)
|
||||
if [[ $synced_imgs -gt 0 ]]
|
||||
then
|
||||
echo 'Boot resources have completed importing'
|
||||
return 0
|
||||
else
|
||||
echo 'Import failed!'
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
done
|
||||
echo "Timeout waiting for import!"
|
||||
if maas ${ADMIN_USERNAME} boot-resources is-importing | grep -q 'true';
|
||||
then
|
||||
echo -e '\nBoot resources currently importing\n'
|
||||
return 1
|
||||
else
|
||||
synced_imgs=$(maas ${ADMIN_USERNAME} boot-resources read | tail -n +1 | jq ".[] | select( .type | contains(\"Synced\")) | .name " | grep -c $MAAS_DEFAULT_DISTRO)
|
||||
if [[ $synced_imgs -gt 0 ]]
|
||||
then
|
||||
echo 'Boot resources have completed importing'
|
||||
return 0
|
||||
else
|
||||
echo 'Import failed!'
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
function check_then_set_single {
|
||||
option="$1"
|
||||
value="$2"
|
||||
|
||||
cur_val=$(maas ${ADMIN_USERNAME} maas get-config name=${option} | tail -1 | tr -d '"')
|
||||
desired_val=$(echo ${value} | tr -d '"')
|
||||
|
||||
if [[ $cur_val != $desired_val ]]
|
||||
then
|
||||
echo "Setting MAAS option ${option} to ${desired_val}"
|
||||
maas ${ADMIN_USERNAME} maas set-config name=${option} value=${desired_val}
|
||||
return $?
|
||||
else
|
||||
echo "MAAS option ${option} already set to ${cur_val}"
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
function check_then_set {
|
||||
option=$1
|
||||
value=$2
|
||||
|
||||
while [[ ${JOB_TIMEOUT} -gt 0 ]]
|
||||
do
|
||||
cur_val=$(maas ${ADMIN_USERNAME} maas get-config name=${option} | tail -1 | tr -d '"')
|
||||
desired_val=$(echo ${value} | tr -d '"')
|
||||
timer "$RETRY_TIMER" check_then_set_single "$option" "$value"
|
||||
}
|
||||
|
||||
if [[ $cur_val != $desired_val ]]
|
||||
then
|
||||
echo "Setting MAAS option ${option} to ${desired_val}"
|
||||
maas ${ADMIN_USERNAME} maas set-config name=${option} value=${desired_val}
|
||||
if [[ $? -gt 0 ]]
|
||||
then
|
||||
let JOB_TIMEOUT-=${RETRY_TIMER}
|
||||
sleep ${RETRY_TIMER}
|
||||
else
|
||||
return $?
|
||||
fi
|
||||
else
|
||||
echo "MAAS option ${option} already set to ${cur_val}"
|
||||
return 0
|
||||
fi
|
||||
# Get rack controllers reporting a healthy rackd
|
||||
function get_active_rack_controllers {
|
||||
maas ${ADMIN_USERNAME} rack-controllers read | jq -r 'map({"system_id":.system_id,"service_set":(.service_set[] | select(.name=="rackd"))}) | map(select(.service_set.status == "running")) | .[] | .system_id'
|
||||
}
|
||||
|
||||
function check_for_rack_sync_single {
|
||||
sync_list=""
|
||||
|
||||
rack_list=$(get_active_rack_controllers)
|
||||
for rack_id in ${rack_list}
|
||||
do
|
||||
selected_imgs=$(maas ${ADMIN_USERNAME} rack-controller list-boot-images ${rack_id} | tail -n +1 | jq ".images[] | select( .name | contains(\"${MAAS_DEFAULT_DISTRO}\")) | .name")
|
||||
synced_ctlr=$(maas ${ADMIN_USERNAME} rack-controller list-boot-images ${rack_id} | tail -n +1 | jq '.status == "synced"')
|
||||
if [[ $synced_ctlr == "true" && ! -z ${selected_imgs} ]]
|
||||
then
|
||||
sync_list=$(echo -e "${sync_list}\n${rack_id}" | sort | uniq)
|
||||
else
|
||||
maas ${ADMIN_USERNAME} rack-controller import-boot-images ${rack_id}
|
||||
fi
|
||||
if [[ $(echo -e "${rack_list}" | sort | uniq | grep -v '^$' ) == $(echo -e "${sync_list}" | sort | uniq | grep -v '^$') ]]
|
||||
then
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
function check_for_rack_sync {
|
||||
timer "$RETRY_TIMER" check_for_rack_sync_single
|
||||
}
|
||||
|
||||
function configure_proxy {
|
||||
check_then_set enable_http_proxy ${MAAS_PROXY_ENABLED}
|
||||
check_then_set use_peer_proxy ${MAAS_PEER_PROXY_ENABLED}
|
||||
@ -117,8 +155,9 @@ function configure_dns {
|
||||
}
|
||||
|
||||
function configure_images {
|
||||
check_for_rack_sync
|
||||
|
||||
if [[ $? -eq 1 ]]
|
||||
if [[ $? -eq 124 ]]
|
||||
then
|
||||
echo "Timed out waiting for rack controller sync."
|
||||
return 1
|
||||
@ -147,8 +186,17 @@ function configure_boot_sources {
|
||||
fi
|
||||
}
|
||||
|
||||
KEY=$(maas-region apikey --username=${ADMIN_USERNAME})
|
||||
maas login ${ADMIN_USERNAME} ${MAAS_ENDPOINT} $KEY
|
||||
function maas_login {
|
||||
KEY=$(maas-region apikey --username=${ADMIN_USERNAME})
|
||||
if [ -z "$KEY" ]
|
||||
then
|
||||
return 1
|
||||
fi
|
||||
maas login ${ADMIN_USERNAME} ${MAAS_ENDPOINT} $KEY
|
||||
return $?
|
||||
}
|
||||
|
||||
timer "$RETRY_TIMER" maas_login
|
||||
|
||||
configure_proxy
|
||||
configure_ntp
|
||||
@ -157,6 +205,7 @@ configure_dns
|
||||
# make call to import images
|
||||
configure_boot_sources
|
||||
start_import
|
||||
|
||||
if [[ $? -eq 0 ]]
|
||||
then
|
||||
configure_images
|
||||
|
Loading…
x
Reference in New Issue
Block a user