From eee6b51cb382bd02720472af0d70492df33e0bbc Mon Sep 17 00:00:00 2001 From: Chinasubbareddy Mallavarapu Date: Thu, 26 Sep 2019 13:07:35 -0500 Subject: [PATCH] [ceph-osd] Retry to create crush map for osd while ceph-mon service down for a while. This is update the logic to retry creating crush map for a osd if ceph-mon service is down for a while. Change-Id: Idffb189f0749a68a348cc0451daca5dec6796716 --- ceph-osd/templates/bin/osd/_common.sh.tpl | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/ceph-osd/templates/bin/osd/_common.sh.tpl b/ceph-osd/templates/bin/osd/_common.sh.tpl index 308edeed9e..bcf77f2e49 100644 --- a/ceph-osd/templates/bin/osd/_common.sh.tpl +++ b/ceph-osd/templates/bin/osd/_common.sh.tpl @@ -61,10 +61,18 @@ function is_available { command -v $@ &>/dev/null } +function ceph_cmd_retry() { + cnt=0 + until "ceph" "$@" || [ $cnt -ge 6 ]; do + sleep 10 + ((cnt++)) + done +} + function crush_create_or_move { local crush_location=${1} - ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ - osd crush create-or-move -- "${OSD_ID}" "${OSD_WEIGHT}" ${crush_location} || true + ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ + osd crush create-or-move -- "${OSD_ID}" "${OSD_WEIGHT}" ${crush_location} } function crush_add_and_move { @@ -72,15 +80,15 @@ function crush_add_and_move { local crush_failure_domain_name=${2} local crush_location=$(echo "root=default ${crush_failure_domain_type}=${crush_failure_domain_name} host=${HOSTNAME}") crush_create_or_move "${crush_location}" - local crush_failure_domain_location_check=$(ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" osd find ${OSD_ID} | grep "${crush_failure_domain_type}" | awk -F '"' '{print $4}') + local crush_failure_domain_location_check=$(ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" osd find ${OSD_ID} | grep "${crush_failure_domain_type}" | awk -F '"' '{print $4}') if [ "x${crush_failure_domain_location_check}" != "x${crush_failure_domain_name}" ]; then # NOTE(supamatt): Manually move the buckets for previously configured CRUSH configurations # as create-or-move may not appropiately move them. - ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ + ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ osd crush add-bucket "${crush_failure_domain_name}" "${crush_failure_domain_type}" || true - ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ + ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ osd crush move "${crush_failure_domain_name}" root=default || true - ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ + ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ osd crush move "${HOSTNAME}" "${crush_failure_domain_type}=${crush_failure_domain_name}" || true fi }