Merge "Check cephfs recovery commands during rook-ceph restore"

2025-04-08 15:01:43 +00:00 · 2025-04-08 15:01:43 +00:00 · 72f2833e89
commit 72f2833e89
parent f02abb8772 4d6f0d3c8e
1 changed files with 9 additions and 6 deletions
--- a/playbookconfig/src/playbooks/roles/recover-rook-ceph-data/files/recover_rook_ceph.py
+++ b/playbookconfig/src/playbooks/roles/recover-rook-ceph-data/files/recover_rook_ceph.py
@ -415,12 +415,15 @@ data:

        # Try to recover from some common errors
        # The timeout command was used because depending on the status of the cluster, it can get stuck
-        # on "cephfs-journal-tool" commands. But this will not cause any problems in recovery.
-        timeout 180 cephfs-journal-tool --rank=${FS_NAME}:0 event recover_dentries summary
-        timeout 180 cephfs-journal-tool --rank=${FS_NAME}:0 journal reset
-        cephfs-table-tool ${FS_NAME}:0 reset session
-        cephfs-table-tool ${FS_NAME}:0 reset snap
-        cephfs-table-tool ${FS_NAME}:0 reset inode
+        # on "cephfs" commands. But this will not cause any problems in recovery.
+        CEPHFS_CMD_TIMEOUT=180
+        timeout ${CEPHFS_CMD_TIMEOUT} cephfs-journal-tool --rank=${FS_NAME}:0 event recover_dentries summary
+        if [ $? -eq 0 ]; then
+          timeout ${CEPHFS_CMD_TIMEOUT} cephfs-journal-tool --rank=${FS_NAME}:0 journal reset
+          timeout ${CEPHFS_CMD_TIMEOUT} cephfs-table-tool ${FS_NAME}:0 reset session
+          timeout ${CEPHFS_CMD_TIMEOUT} cephfs-table-tool ${FS_NAME}:0 reset snap
+          timeout ${CEPHFS_CMD_TIMEOUT} cephfs-table-tool ${FS_NAME}:0 reset inode
+        fi
    fi

    kubectl -n rook-ceph scale deployment -l app=rook-ceph-osd --replicas 1