Make aux etcd more conservative
- Currently the auxiliary etcd instances remove themselves after a single non-genesis member joins the cluster. This leaves the cluster susceptible to non-recoverable disruption until a 3rd member joins. This change makes the auxiliary control script wait for a configurable number of non-auxiliary members to join before removing the auxiliary members. Change-Id: Ib4968b533e8433e3c40a845d086c7078e807c3e2
This commit is contained in:
parent
fe60268244
commit
8649fbd3f5
@ -32,6 +32,8 @@ data:
|
||||
- --v=3
|
||||
armada:
|
||||
target_manifest: cluster-bootstrap
|
||||
etcd:
|
||||
auxiliary_threshold: 3
|
||||
labels:
|
||||
dynamic:
|
||||
- calico-etcd=enabled
|
||||
|
@ -13,6 +13,8 @@ data:
|
||||
external_ip: 192.168.77.10
|
||||
armada:
|
||||
target_manifest: cluster-bootstrap
|
||||
etcd:
|
||||
auxiliary_threshold: 3
|
||||
labels:
|
||||
dynamic:
|
||||
- calico-etcd=enabled
|
||||
|
@ -32,6 +32,8 @@ data:
|
||||
- --v=3
|
||||
armada:
|
||||
target_manifest: cluster-bootstrap
|
||||
etcd:
|
||||
auxiliary_threshold: 3
|
||||
labels:
|
||||
dynamic:
|
||||
- calico-etcd=enabled
|
||||
@ -73,4 +75,4 @@ data:
|
||||
- type: Server
|
||||
qps: 1000
|
||||
burst: 10000
|
||||
...
|
||||
...
|
||||
|
@ -91,6 +91,15 @@ data:
|
||||
additionalProperties: true
|
||||
additionalProperties: false
|
||||
|
||||
etcd:
|
||||
type: object
|
||||
properties:
|
||||
# What number of non-auxiliary etcd members are needed
|
||||
# before the auxiliary members will self-terminate
|
||||
auxiliary_threshold:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
|
||||
files:
|
||||
type: array
|
||||
items:
|
||||
|
@ -30,7 +30,7 @@ spec:
|
||||
function external_member_count() {
|
||||
etcdctl member list \
|
||||
| grep '\bstarted\b' \
|
||||
| grep -Ev "\\b({{ config['Genesis:hostname'] }}|auxiliary-0|auxiliary-1)\\b" \
|
||||
| grep -Ev "\\b(auxiliary-0|auxiliary-1)\\b" \
|
||||
| wc -l
|
||||
}
|
||||
|
||||
@ -42,10 +42,11 @@ spec:
|
||||
fi
|
||||
}
|
||||
|
||||
# NOTE(mark-burnett): If there are any non-genesis members, then we are ready to
|
||||
# remove the auxiliary members. Otherwise, wait.
|
||||
while [ ! "$(external_member_count)" -gt 0 ]; do
|
||||
sleep 10
|
||||
auxiliary_threshold="{{ config.get_first('Genesis:etcd.auxiliary_threshold', default=3) }}"
|
||||
# NOTE(sh8121att): If there are enough (a fully resilient contigent) non-auxiliary members,
|
||||
# then we are ready to remove the auxiliary members. Otherwise, wait.
|
||||
while [ ! "$(external_member_count)" -ge "$auxiliary_threshold" ]; do
|
||||
sleep 30
|
||||
done
|
||||
|
||||
# NOTE(mark-burnett): Failures beyond this point are unexpected, but
|
||||
|
Loading…
x
Reference in New Issue
Block a user