Ceph rook gates improvement
This patchset fixes the instability of the ceph-rook gates by adding extra nodes to the cluster. Also improved ceph deployment process monitoring. Change-Id: I405e501afc15f3974a047475a2b463e7f254da66
This commit is contained in:
parent
fe95c4d1cf
commit
711ef3f735
@ -394,10 +394,10 @@ cephClusterSpec:
|
||||
continueUpgradeAfterChecksEvenIfNotHealthy: false
|
||||
waitTimeoutForHealthyOSDInMinutes: 10
|
||||
mon:
|
||||
count: 1
|
||||
count: 3
|
||||
allowMultiplePerNode: false
|
||||
mgr:
|
||||
count: 1
|
||||
count: 3
|
||||
allowMultiplePerNode: false
|
||||
modules:
|
||||
- name: pg_autoscaler
|
||||
@ -636,6 +636,28 @@ EOF
|
||||
|
||||
helm upgrade --install --create-namespace --namespace ceph rook-ceph-cluster --set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster --version ${ROOK_RELEASE} -f /tmp/ceph.yaml
|
||||
|
||||
TOOLS_POD=$(kubectl get pods \
|
||||
--namespace=ceph \
|
||||
--selector="app=rook-ceph-tools" \
|
||||
--no-headers | awk '{ print $1; exit }')
|
||||
|
||||
helm osh wait-for-pods rook-ceph
|
||||
|
||||
kubectl wait --namespace=ceph --for=condition=ready pod --selector=app=rook-ceph-tools --timeout=600s
|
||||
|
||||
# Wait for all monitor pods to be ready
|
||||
MON_PODS=$(kubectl get pods --namespace=ceph --selector=app=rook-ceph-mon --no-headers | awk '{ print $1 }')
|
||||
for MON_POD in $MON_PODS; do
|
||||
if kubectl get pod --namespace=ceph "$MON_POD" > /dev/null 2>&1; then
|
||||
kubectl wait --namespace=ceph --for=condition=ready "pod/$MON_POD" --timeout=600s
|
||||
else
|
||||
echo "Pod $MON_POD not found, skipping..."
|
||||
fi
|
||||
done
|
||||
|
||||
echo "=========== CEPH K8S PODS LIST ============"
|
||||
kubectl get pods -n rook-ceph -o wide
|
||||
kubectl get pods -n ceph -o wide
|
||||
#NOTE: Wait for deploy
|
||||
RGW_POD=$(kubectl get pods \
|
||||
--namespace=ceph \
|
||||
@ -644,6 +666,12 @@ RGW_POD=$(kubectl get pods \
|
||||
while [[ -z "${RGW_POD}" ]]
|
||||
do
|
||||
sleep 5
|
||||
echo "=========== CEPH STATUS ============"
|
||||
kubectl exec -n ceph ${TOOLS_POD} -- ceph -s
|
||||
echo "=========== CEPH OSD POOL LIST ============"
|
||||
kubectl exec -n ceph ${TOOLS_POD} -- ceph osd pool ls
|
||||
echo "=========== CEPH K8S PODS LIST ============"
|
||||
kubectl get pods -n ceph -o wide
|
||||
RGW_POD=$(kubectl get pods \
|
||||
--namespace=ceph \
|
||||
--selector="app=rook-ceph-rgw" \
|
||||
@ -652,8 +680,4 @@ done
|
||||
helm osh wait-for-pods ceph
|
||||
|
||||
#NOTE: Validate deploy
|
||||
TOOLS_POD=$(kubectl get pods \
|
||||
--namespace=ceph \
|
||||
--selector="app=rook-ceph-tools" \
|
||||
--no-headers | awk '{ print $1; exit }')
|
||||
kubectl exec -n ceph ${TOOLS_POD} -- ceph -s
|
||||
|
7
tools/deployment/common/sleep.sh
Executable file
7
tools/deployment/common/sleep.sh
Executable file
@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
while true; do
|
||||
echo "Sleeping for 100 seconds..."
|
||||
done
|
@ -120,7 +120,7 @@
|
||||
- job:
|
||||
name: openstack-helm-infra-logging
|
||||
parent: openstack-helm-infra-deploy
|
||||
nodeset: openstack-helm-3nodes-ubuntu_jammy
|
||||
nodeset: openstack-helm-5nodes-ubuntu_jammy
|
||||
vars:
|
||||
osh_params:
|
||||
openstack_release: "2024.1"
|
||||
@ -353,8 +353,9 @@
|
||||
name: openstack-helm-infra-cinder-2024-1-ubuntu_jammy
|
||||
description: |
|
||||
This job uses Rook for managing Ceph cluster.
|
||||
The job is run on 3 nodes.
|
||||
The job is run on 5 nodes.
|
||||
parent: openstack-helm-cinder-2024-1-ubuntu_jammy
|
||||
nodeset: openstack-helm-5nodes-ubuntu_jammy
|
||||
files:
|
||||
- ^helm-toolkit/.*
|
||||
- ^roles/.*
|
||||
|
Loading…
x
Reference in New Issue
Block a user