From cf7d665e793349f331b3eb76aea5c6d07c2b033a Mon Sep 17 00:00:00 2001 From: Stephen Taylor Date: Wed, 10 Feb 2021 10:43:42 -0700 Subject: [PATCH] [ceph-client] Separate pool quotas from pg_num calculations Currently pool quotas and pg_num calculations are both based on percent_total_data values. This can be problematic when the amount of data allowed in a pool doesn't necessarily match the percentage of the cluster's data expected to be stored in the pool. It is also more intuitive to define absolute quotas for pools. This change adds an optional pool_quota value that defines an explicit value in bytes to be used as a pool quota. If pool_quota is omitted for a given pool, that pool's quota is set to 0 (no quota). A check_pool_quota_target() Helm test has also been added to verify that the sum of all pool quotas does not exceed the target quota defined for the cluster if present. Change-Id: I959fb9e95d8f1e03c36e44aba57c552a315867d0 --- ceph-client/Chart.yaml | 2 +- ceph-client/templates/bin/pool/_init.sh.tpl | 51 +++++++++++++++++---- ceph-client/values.yaml | 12 ++++- releasenotes/notes/ceph-client.yaml | 3 ++ 4 files changed, 56 insertions(+), 12 deletions(-) diff --git a/ceph-client/Chart.yaml b/ceph-client/Chart.yaml index 63ba09339..ab237d0a3 100644 --- a/ceph-client/Chart.yaml +++ b/ceph-client/Chart.yaml @@ -15,6 +15,6 @@ apiVersion: v1 appVersion: v1.0.0 description: OpenStack-Helm Ceph Client name: ceph-client -version: 0.1.9 +version: 0.1.10 home: https://github.com/ceph/ceph-client ... diff --git a/ceph-client/templates/bin/pool/_init.sh.tpl b/ceph-client/templates/bin/pool/_init.sh.tpl index 0c3c66d6b..bfa3fa2f5 100644 --- a/ceph-client/templates/bin/pool/_init.sh.tpl +++ b/ceph-client/templates/bin/pool/_init.sh.tpl @@ -243,42 +243,73 @@ function manage_pool () { TOTAL_DATA_PERCENT=$4 TARGET_PG_PER_OSD=$5 POOL_CRUSH_RULE=$6 - TARGET_QUOTA=$7 + POOL_QUOTA=$7 POOL_PROTECTION=$8 CLUSTER_CAPACITY=$9 TOTAL_OSDS={{.Values.conf.pool.target.osd}} POOL_PLACEMENT_GROUPS=$(python3 /tmp/pool-calc.py ${POOL_REPLICATION} ${TOTAL_OSDS} ${TOTAL_DATA_PERCENT} ${TARGET_PG_PER_OSD}) create_pool "${POOL_APPLICATION}" "${POOL_NAME}" "${POOL_REPLICATION}" "${POOL_PLACEMENT_GROUPS}" "${POOL_CRUSH_RULE}" "${POOL_PROTECTION}" POOL_REPLICAS=$(ceph --cluster "${CLUSTER}" osd pool get "${POOL_NAME}" size | awk '{print $2}') - POOL_QUOTA=$(python3 -c "print(int($CLUSTER_CAPACITY * $TOTAL_DATA_PERCENT * $TARGET_QUOTA / $POOL_REPLICAS / 100 / 100))") ceph --cluster "${CLUSTER}" osd pool set-quota "${POOL_NAME}" max_bytes $POOL_QUOTA } +# Helper to convert TiB, TB, GiB, GB, MiB, MB, KiB, KB, or bytes to bytes +function convert_to_bytes() { + value=${1} + value="$(echo "${value}" | sed 's/TiB/ \* 1024GiB/g')" + value="$(echo "${value}" | sed 's/TB/ \* 1000GB/g')" + value="$(echo "${value}" | sed 's/GiB/ \* 1024MiB/g')" + value="$(echo "${value}" | sed 's/GB/ \* 1000MB/g')" + value="$(echo "${value}" | sed 's/MiB/ \* 1024KiB/g')" + value="$(echo "${value}" | sed 's/MB/ \* 1000KB/g')" + value="$(echo "${value}" | sed 's/KiB/ \* 1024/g')" + value="$(echo "${value}" | sed 's/KB/ \* 1000/g')" + python3 -c "print(int(${value}))" +} + set_cluster_flags unset_cluster_flags reweight_osds +{{ $targetOSDCount := .Values.conf.pool.target.osd }} +{{ $targetFinalOSDCount := .Values.conf.pool.target.final_osd }} {{ $targetPGperOSD := .Values.conf.pool.target.pg_per_osd }} {{ $crushRuleDefault := .Values.conf.pool.default.crush_rule }} {{ $targetQuota := .Values.conf.pool.target.quota | default 100 }} {{ $targetProtection := .Values.conf.pool.target.protected | default "false" | quote | lower }} -cluster_capacity=0 -if [[ $(ceph -v | awk '/version/{print $3}' | cut -d. -f1) -ge 14 ]]; then - cluster_capacity=$(ceph --cluster "${CLUSTER}" df | grep "TOTAL" | awk '{print $2 substr($3, 1, 1)}' | numfmt --from=iec) -else - cluster_capacity=$(ceph --cluster "${CLUSTER}" df | head -n3 | tail -n1 | awk '{print $1 substr($2, 1, 1)}' | numfmt --from=iec) -fi +cluster_capacity=$(ceph --cluster "${CLUSTER}" df -f json-pretty | grep '"total_bytes":' | head -n1 | awk '{print $2}' | tr -d ',') if [[ $(ceph mgr versions | awk '/version/{print $3}' | cut -d. -f1) -eq 14 ]]; then enable_or_disable_autoscaling fi +# Check to make sure pool quotas don't exceed the expected cluster capacity in its final state +target_quota=$(python3 -c "print(int(${cluster_capacity} * {{ $targetFinalOSDCount }} / {{ $targetOSDCount }} * {{ $targetQuota }} / 100))") +quota_sum=0 + {{- range $pool := .Values.conf.pool.spec -}} {{- with $pool }} +# Read the pool quota from the pool spec (no quota if absent) +# Set pool_quota to 0 if target_quota is 0 +[[ ${target_quota} -eq 0 ]] && pool_quota=0 || pool_quota="$(convert_to_bytes {{ .pool_quota | default 0 }})" +quota_sum=$(python3 -c "print(int(${quota_sum} + (${pool_quota} * {{ .replication }})))") +{{- end }} +{{- end }} + +if [[ ${quota_sum} -gt ${target_quota} ]]; then + echo "The sum of all pool quotas exceeds the target quota for the cluster" + exit 1 +fi + +{{- range $pool := .Values.conf.pool.spec -}} +{{- with $pool }} +# Read the pool quota from the pool spec (no quota if absent) +# Set pool_quota to 0 if target_quota is 0 +[[ ${target_quota} -eq 0 ]] && pool_quota=0 || pool_quota="$(convert_to_bytes {{ .pool_quota | default 0 }})" {{- if .crush_rule }} -manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ .crush_rule }} {{ $targetQuota }} {{ $targetProtection }} ${cluster_capacity} +manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ .crush_rule }} $pool_quota {{ $targetProtection }} ${cluster_capacity} {{ else }} -manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ $crushRuleDefault }} {{ $targetQuota }} {{ $targetProtection }} ${cluster_capacity} +manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ $crushRuleDefault }} $pool_quota {{ $targetProtection }} ${cluster_capacity} {{- end }} {{- end }} {{- end }} diff --git a/ceph-client/values.yaml b/ceph-client/values.yaml index 9d341acf0..8d9cfd241 100644 --- a/ceph-client/values.yaml +++ b/ceph-client/values.yaml @@ -272,8 +272,13 @@ conf: tunables: null target: # NOTE(portdirect): arbitrarily we set the default number of expected OSD's to 5 - # to match the number of nodes in the OSH gate (used only for helm tests). + # to match the number of nodes in the OSH gate. osd: 5 + # This the number of OSDs expected in the final state. This is to allow the above + # target to be smaller initially in the event of a partial deployment. This way + # helm tests can still pass at deployment time and pool quotas can be set based on + # the expected final state (actual target quota = final_osd / osd * quota). + final_osd: 5 # This is just for helm tests to proceed the deployment if we have mentioned % of # osds are up and running. required_percent_of_osds: 75 @@ -282,6 +287,7 @@ conf: # NOTE(st053q): target quota should be set to the overall cluster full percentage # to be tolerated as a quota (percent full to allow in order to tolerate some # level of failure) + # Set target quota to "0" (must be quoted) to remove quotas for all pools quota: 100 default: # NOTE(supamatt): Accepted values are taken from `crush_rules` list. @@ -336,6 +342,10 @@ conf: application: rbd replication: 3 percent_total_data: 40 + # Example of 100 GiB pool_quota for rbd pool (no pool quota if absent) + # May be specified in TiB, TB, GiB, GB, MiB, MB, KiB, KB, or bytes + # NOTE: This should always be a string value to avoid Helm issues with large integers + # pool_quota: "100GiB" # NOTE(supamatt): By default the crush rules used to create each pool will be # taken from the pool default `crush_rule` unless a pool specific `crush_rule` # is specified. The rule MUST exist for it to be defined here. diff --git a/releasenotes/notes/ceph-client.yaml b/releasenotes/notes/ceph-client.yaml index 30b522e87..65264ee17 100644 --- a/releasenotes/notes/ceph-client.yaml +++ b/releasenotes/notes/ceph-client.yaml @@ -8,4 +8,7 @@ ceph-client: - 0.1.5 Fix Helm test check_pgs() check for inactive PGs - 0.1.6 Uplift from Nautilus to Octopus release - 0.1.7 Don't wait for premerge PGs in the rbd pool job + - 0.1.8 enhance logic to enable the autoscaler for Octopus + - 0.1.9 Revert "[ceph-client] enhance logic to enable the autoscaler for Octopus" + - 0.1.10 Separate pool quotas from pg_num calculations ...