Ceph rook gates improvement
This patchset fixes the instability of the ceph-rook gates by adding extra nodes to the cluster. Also improved ceph deployment process monitoring. Change-Id: I405e501afc15f3974a047475a2b463e7f254da66
This commit is contained in:
parent
fe95c4d1cf
commit
711ef3f735
@ -394,10 +394,10 @@ cephClusterSpec:
|
|||||||
continueUpgradeAfterChecksEvenIfNotHealthy: false
|
continueUpgradeAfterChecksEvenIfNotHealthy: false
|
||||||
waitTimeoutForHealthyOSDInMinutes: 10
|
waitTimeoutForHealthyOSDInMinutes: 10
|
||||||
mon:
|
mon:
|
||||||
count: 1
|
count: 3
|
||||||
allowMultiplePerNode: false
|
allowMultiplePerNode: false
|
||||||
mgr:
|
mgr:
|
||||||
count: 1
|
count: 3
|
||||||
allowMultiplePerNode: false
|
allowMultiplePerNode: false
|
||||||
modules:
|
modules:
|
||||||
- name: pg_autoscaler
|
- name: pg_autoscaler
|
||||||
@ -636,6 +636,28 @@ EOF
|
|||||||
|
|
||||||
helm upgrade --install --create-namespace --namespace ceph rook-ceph-cluster --set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster --version ${ROOK_RELEASE} -f /tmp/ceph.yaml
|
helm upgrade --install --create-namespace --namespace ceph rook-ceph-cluster --set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster --version ${ROOK_RELEASE} -f /tmp/ceph.yaml
|
||||||
|
|
||||||
|
TOOLS_POD=$(kubectl get pods \
|
||||||
|
--namespace=ceph \
|
||||||
|
--selector="app=rook-ceph-tools" \
|
||||||
|
--no-headers | awk '{ print $1; exit }')
|
||||||
|
|
||||||
|
helm osh wait-for-pods rook-ceph
|
||||||
|
|
||||||
|
kubectl wait --namespace=ceph --for=condition=ready pod --selector=app=rook-ceph-tools --timeout=600s
|
||||||
|
|
||||||
|
# Wait for all monitor pods to be ready
|
||||||
|
MON_PODS=$(kubectl get pods --namespace=ceph --selector=app=rook-ceph-mon --no-headers | awk '{ print $1 }')
|
||||||
|
for MON_POD in $MON_PODS; do
|
||||||
|
if kubectl get pod --namespace=ceph "$MON_POD" > /dev/null 2>&1; then
|
||||||
|
kubectl wait --namespace=ceph --for=condition=ready "pod/$MON_POD" --timeout=600s
|
||||||
|
else
|
||||||
|
echo "Pod $MON_POD not found, skipping..."
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "=========== CEPH K8S PODS LIST ============"
|
||||||
|
kubectl get pods -n rook-ceph -o wide
|
||||||
|
kubectl get pods -n ceph -o wide
|
||||||
#NOTE: Wait for deploy
|
#NOTE: Wait for deploy
|
||||||
RGW_POD=$(kubectl get pods \
|
RGW_POD=$(kubectl get pods \
|
||||||
--namespace=ceph \
|
--namespace=ceph \
|
||||||
@ -644,6 +666,12 @@ RGW_POD=$(kubectl get pods \
|
|||||||
while [[ -z "${RGW_POD}" ]]
|
while [[ -z "${RGW_POD}" ]]
|
||||||
do
|
do
|
||||||
sleep 5
|
sleep 5
|
||||||
|
echo "=========== CEPH STATUS ============"
|
||||||
|
kubectl exec -n ceph ${TOOLS_POD} -- ceph -s
|
||||||
|
echo "=========== CEPH OSD POOL LIST ============"
|
||||||
|
kubectl exec -n ceph ${TOOLS_POD} -- ceph osd pool ls
|
||||||
|
echo "=========== CEPH K8S PODS LIST ============"
|
||||||
|
kubectl get pods -n ceph -o wide
|
||||||
RGW_POD=$(kubectl get pods \
|
RGW_POD=$(kubectl get pods \
|
||||||
--namespace=ceph \
|
--namespace=ceph \
|
||||||
--selector="app=rook-ceph-rgw" \
|
--selector="app=rook-ceph-rgw" \
|
||||||
@ -652,8 +680,4 @@ done
|
|||||||
helm osh wait-for-pods ceph
|
helm osh wait-for-pods ceph
|
||||||
|
|
||||||
#NOTE: Validate deploy
|
#NOTE: Validate deploy
|
||||||
TOOLS_POD=$(kubectl get pods \
|
|
||||||
--namespace=ceph \
|
|
||||||
--selector="app=rook-ceph-tools" \
|
|
||||||
--no-headers | awk '{ print $1; exit }')
|
|
||||||
kubectl exec -n ceph ${TOOLS_POD} -- ceph -s
|
kubectl exec -n ceph ${TOOLS_POD} -- ceph -s
|
||||||
|
7
tools/deployment/common/sleep.sh
Executable file
7
tools/deployment/common/sleep.sh
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
echo "Sleeping for 100 seconds..."
|
||||||
|
done
|
@ -120,7 +120,7 @@
|
|||||||
- job:
|
- job:
|
||||||
name: openstack-helm-infra-logging
|
name: openstack-helm-infra-logging
|
||||||
parent: openstack-helm-infra-deploy
|
parent: openstack-helm-infra-deploy
|
||||||
nodeset: openstack-helm-3nodes-ubuntu_jammy
|
nodeset: openstack-helm-5nodes-ubuntu_jammy
|
||||||
vars:
|
vars:
|
||||||
osh_params:
|
osh_params:
|
||||||
openstack_release: "2024.1"
|
openstack_release: "2024.1"
|
||||||
@ -353,8 +353,9 @@
|
|||||||
name: openstack-helm-infra-cinder-2024-1-ubuntu_jammy
|
name: openstack-helm-infra-cinder-2024-1-ubuntu_jammy
|
||||||
description: |
|
description: |
|
||||||
This job uses Rook for managing Ceph cluster.
|
This job uses Rook for managing Ceph cluster.
|
||||||
The job is run on 3 nodes.
|
The job is run on 5 nodes.
|
||||||
parent: openstack-helm-cinder-2024-1-ubuntu_jammy
|
parent: openstack-helm-cinder-2024-1-ubuntu_jammy
|
||||||
|
nodeset: openstack-helm-5nodes-ubuntu_jammy
|
||||||
files:
|
files:
|
||||||
- ^helm-toolkit/.*
|
- ^helm-toolkit/.*
|
||||||
- ^roles/.*
|
- ^roles/.*
|
||||||
|
Loading…
x
Reference in New Issue
Block a user