From cf7d665e793349f331b3eb76aea5c6d07c2b033a Mon Sep 17 00:00:00 2001
From: Stephen Taylor <st053q@att.com>
Date: Wed, 10 Feb 2021 10:43:42 -0700
Subject: [PATCH] [ceph-client] Separate pool quotas from pg_num calculations

Currently pool quotas and pg_num calculations are both based on
percent_total_data values. This can be problematic when the amount
of data allowed in a pool doesn't necessarily match the percentage
of the cluster's data expected to be stored in the pool. It is
also more intuitive to define absolute quotas for pools.

This change adds an optional pool_quota value that defines an
explicit value in bytes to be used as a pool quota. If pool_quota
is omitted for a given pool, that pool's quota is set to 0 (no
quota).

A check_pool_quota_target() Helm test has also been added to
verify that the sum of all pool quotas does not exceed the target
quota defined for the cluster if present.

Change-Id: I959fb9e95d8f1e03c36e44aba57c552a315867d0
---
 ceph-client/Chart.yaml                      |  2 +-
 ceph-client/templates/bin/pool/_init.sh.tpl | 51 +++++++++++++++++----
 ceph-client/values.yaml                     | 12 ++++-
 releasenotes/notes/ceph-client.yaml         |  3 ++
 4 files changed, 56 insertions(+), 12 deletions(-)

diff --git a/ceph-client/Chart.yaml b/ceph-client/Chart.yaml
index 63ba09339..ab237d0a3 100644
--- a/ceph-client/Chart.yaml
+++ b/ceph-client/Chart.yaml
@@ -15,6 +15,6 @@ apiVersion: v1
 appVersion: v1.0.0
 description: OpenStack-Helm Ceph Client
 name: ceph-client
-version: 0.1.9
+version: 0.1.10
 home: https://github.com/ceph/ceph-client
 ...
diff --git a/ceph-client/templates/bin/pool/_init.sh.tpl b/ceph-client/templates/bin/pool/_init.sh.tpl
index 0c3c66d6b..bfa3fa2f5 100644
--- a/ceph-client/templates/bin/pool/_init.sh.tpl
+++ b/ceph-client/templates/bin/pool/_init.sh.tpl
@@ -243,42 +243,73 @@ function manage_pool () {
   TOTAL_DATA_PERCENT=$4
   TARGET_PG_PER_OSD=$5
   POOL_CRUSH_RULE=$6
-  TARGET_QUOTA=$7
+  POOL_QUOTA=$7
   POOL_PROTECTION=$8
   CLUSTER_CAPACITY=$9
   TOTAL_OSDS={{.Values.conf.pool.target.osd}}
   POOL_PLACEMENT_GROUPS=$(python3 /tmp/pool-calc.py ${POOL_REPLICATION} ${TOTAL_OSDS} ${TOTAL_DATA_PERCENT} ${TARGET_PG_PER_OSD})
   create_pool "${POOL_APPLICATION}" "${POOL_NAME}" "${POOL_REPLICATION}" "${POOL_PLACEMENT_GROUPS}" "${POOL_CRUSH_RULE}" "${POOL_PROTECTION}"
   POOL_REPLICAS=$(ceph --cluster "${CLUSTER}" osd pool get "${POOL_NAME}" size | awk '{print $2}')
-  POOL_QUOTA=$(python3 -c "print(int($CLUSTER_CAPACITY * $TOTAL_DATA_PERCENT * $TARGET_QUOTA / $POOL_REPLICAS / 100 / 100))")
   ceph --cluster "${CLUSTER}" osd pool set-quota "${POOL_NAME}" max_bytes $POOL_QUOTA
 }
 
+# Helper to convert TiB, TB, GiB, GB, MiB, MB, KiB, KB, or bytes to bytes
+function convert_to_bytes() {
+  value=${1}
+  value="$(echo "${value}" | sed 's/TiB/ \* 1024GiB/g')"
+  value="$(echo "${value}" | sed 's/TB/ \* 1000GB/g')"
+  value="$(echo "${value}" | sed 's/GiB/ \* 1024MiB/g')"
+  value="$(echo "${value}" | sed 's/GB/ \* 1000MB/g')"
+  value="$(echo "${value}" | sed 's/MiB/ \* 1024KiB/g')"
+  value="$(echo "${value}" | sed 's/MB/ \* 1000KB/g')"
+  value="$(echo "${value}" | sed 's/KiB/ \* 1024/g')"
+  value="$(echo "${value}" | sed 's/KB/ \* 1000/g')"
+  python3 -c "print(int(${value}))"
+}
+
 set_cluster_flags
 unset_cluster_flags
 reweight_osds
 
+{{ $targetOSDCount := .Values.conf.pool.target.osd }}
+{{ $targetFinalOSDCount := .Values.conf.pool.target.final_osd }}
 {{ $targetPGperOSD := .Values.conf.pool.target.pg_per_osd }}
 {{ $crushRuleDefault := .Values.conf.pool.default.crush_rule }}
 {{ $targetQuota := .Values.conf.pool.target.quota | default 100 }}
 {{ $targetProtection := .Values.conf.pool.target.protected | default "false" | quote | lower }}
-cluster_capacity=0
-if [[ $(ceph -v | awk '/version/{print $3}' | cut -d. -f1) -ge 14 ]]; then
-  cluster_capacity=$(ceph --cluster "${CLUSTER}" df | grep "TOTAL" | awk '{print $2 substr($3, 1, 1)}' | numfmt --from=iec)
-else
-  cluster_capacity=$(ceph --cluster "${CLUSTER}" df | head -n3 | tail -n1 | awk '{print $1 substr($2, 1, 1)}' | numfmt --from=iec)
-fi
+cluster_capacity=$(ceph --cluster "${CLUSTER}" df -f json-pretty | grep '"total_bytes":' | head -n1 | awk '{print $2}' | tr -d ',')
 
 if [[ $(ceph mgr versions | awk '/version/{print $3}' | cut -d. -f1) -eq 14 ]]; then
   enable_or_disable_autoscaling
 fi
 
+# Check to make sure pool quotas don't exceed the expected cluster capacity in its final state
+target_quota=$(python3 -c "print(int(${cluster_capacity} * {{ $targetFinalOSDCount }} / {{ $targetOSDCount }} * {{ $targetQuota }} / 100))")
+quota_sum=0
+
 {{- range $pool := .Values.conf.pool.spec -}}
 {{- with $pool }}
+# Read the pool quota from the pool spec (no quota if absent)
+# Set pool_quota to 0 if target_quota is 0
+[[ ${target_quota} -eq 0 ]] && pool_quota=0 || pool_quota="$(convert_to_bytes {{ .pool_quota | default 0 }})"
+quota_sum=$(python3 -c "print(int(${quota_sum} + (${pool_quota} * {{ .replication }})))")
+{{- end }}
+{{- end }}
+
+if [[ ${quota_sum} -gt ${target_quota} ]]; then
+  echo "The sum of all pool quotas exceeds the target quota for the cluster"
+  exit 1
+fi
+
+{{- range $pool := .Values.conf.pool.spec -}}
+{{- with $pool }}
+# Read the pool quota from the pool spec (no quota if absent)
+# Set pool_quota to 0 if target_quota is 0
+[[ ${target_quota} -eq 0 ]] && pool_quota=0 || pool_quota="$(convert_to_bytes {{ .pool_quota | default 0 }})"
 {{- if .crush_rule }}
-manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ .crush_rule }} {{ $targetQuota }} {{ $targetProtection }} ${cluster_capacity}
+manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ .crush_rule }} $pool_quota {{ $targetProtection }} ${cluster_capacity}
 {{ else }}
-manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ $crushRuleDefault }} {{ $targetQuota }} {{ $targetProtection }} ${cluster_capacity}
+manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ $crushRuleDefault }} $pool_quota {{ $targetProtection }} ${cluster_capacity}
 {{- end }}
 {{- end }}
 {{- end }}
diff --git a/ceph-client/values.yaml b/ceph-client/values.yaml
index 9d341acf0..8d9cfd241 100644
--- a/ceph-client/values.yaml
+++ b/ceph-client/values.yaml
@@ -272,8 +272,13 @@ conf:
       tunables: null
     target:
       # NOTE(portdirect): arbitrarily we set the default number of expected OSD's to 5
-      # to match the number of nodes in the OSH gate (used only for helm tests).
+      # to match the number of nodes in the OSH gate.
       osd: 5
+      # This the number of OSDs expected in the final state. This is to allow the above
+      # target to be smaller initially in the event of a partial deployment. This way
+      # helm tests can still pass at deployment time and pool quotas can be set based on
+      # the expected final state (actual target quota = final_osd / osd * quota).
+      final_osd: 5
       # This is  just for helm tests to proceed the deployment if  we have mentioned % of
       # osds are up and running.
       required_percent_of_osds: 75
@@ -282,6 +287,7 @@ conf:
       # NOTE(st053q): target quota should be set to the overall cluster full percentage
       # to be tolerated as a quota (percent full to allow in order to tolerate some
       # level of failure)
+      # Set target quota to "0" (must be quoted) to remove quotas for all pools
       quota: 100
     default:
       # NOTE(supamatt): Accepted values are taken from `crush_rules` list.
@@ -336,6 +342,10 @@ conf:
         application: rbd
         replication: 3
         percent_total_data: 40
+        # Example of 100 GiB pool_quota for rbd pool (no pool quota if absent)
+        # May be specified in TiB, TB, GiB, GB, MiB, MB, KiB, KB, or bytes
+        # NOTE: This should always be a string value to avoid Helm issues with large integers
+        # pool_quota: "100GiB"
       # NOTE(supamatt): By default the crush rules used to create each pool will be
       # taken from the pool default `crush_rule` unless a pool specific `crush_rule`
       # is specified. The rule MUST exist for it to be defined here.
diff --git a/releasenotes/notes/ceph-client.yaml b/releasenotes/notes/ceph-client.yaml
index 30b522e87..65264ee17 100644
--- a/releasenotes/notes/ceph-client.yaml
+++ b/releasenotes/notes/ceph-client.yaml
@@ -8,4 +8,7 @@ ceph-client:
   - 0.1.5 Fix Helm test check_pgs() check for inactive PGs
   - 0.1.6 Uplift from Nautilus to Octopus release
   - 0.1.7 Don't wait for premerge PGs in the rbd pool job
+  - 0.1.8 enhance logic to enable the autoscaler for Octopus
+  - 0.1.9 Revert "[ceph-client] enhance logic to enable the autoscaler for Octopus"
+  - 0.1.10 Separate pool quotas from pg_num calculations
 ...