From 49ebb729600781fdc7fff091ddb88b1cd219ab30 Mon Sep 17 00:00:00 2001 From: jmarchel Date: Thu, 1 Feb 2024 11:05:01 +0100 Subject: [PATCH] Improve the CI check for pod liveness to fail on pods that are repeatedly restarting Change-Id: Ic7d638c090c108efc70a8a9d5f417fbf0ca84795 --- playbooks/helm/run.yaml | 15 +++++++++++++++ roles/check-pod-restarts/tasks/main.yaml | 15 +++++++++++++++ zuul.d/jobs.yaml | 1 + 3 files changed, 31 insertions(+) create mode 100644 playbooks/helm/run.yaml create mode 100644 roles/check-pod-restarts/tasks/main.yaml diff --git a/playbooks/helm/run.yaml b/playbooks/helm/run.yaml new file mode 100644 index 0000000..2686879 --- /dev/null +++ b/playbooks/helm/run.yaml @@ -0,0 +1,15 @@ +- hosts: all + tasks: + - name: Install helm charts + include_role: + name: helm-template + vars: + helm_release_name: "{{ zj_item.key }}" + helm_chart: "{{ zj_item.value }}" + loop: "{{ helm_charts | dict2items }}" + loop_control: + loop_var: 'zj_item' + + - name: Check pod restarts after helm chart installations + include_role: + name: check-pod-restarts diff --git a/roles/check-pod-restarts/tasks/main.yaml b/roles/check-pod-restarts/tasks/main.yaml new file mode 100644 index 0000000..eccb586 --- /dev/null +++ b/roles/check-pod-restarts/tasks/main.yaml @@ -0,0 +1,15 @@ +- name: Get pods and their restart counts + shell: > + kubectl get pods -o=jsonpath="{range .items[*]}{.metadata.name}:{.status.containerStatuses[*].restartCount}{'\n'}{end}" + register: pods_restart_counts + +- name: Check for pods that have restarted more than the allowed threshold + set_fact: + unstable_pods: "{{ unstable_pods | default([]) + [item.split(':')[0]] }}" + loop: "{{ pods_restart_counts.stdout_lines }}" + when: item.split(':')[1] | int >= 3 + +- name: Report if any pod has restarted too many times + fail: + msg: "There were some unstable pods: {{ unstable_pods }}" + when: unstable_pods | default([]) | length > 0 diff --git a/zuul.d/jobs.yaml b/zuul.d/jobs.yaml index 8a8d5b7..8eb3872 100644 --- a/zuul.d/jobs.yaml +++ b/zuul.d/jobs.yaml @@ -22,6 +22,7 @@ roles: - zuul: zuul/zuul-jobs pre-run: playbooks/helm/pre.yaml + run: playbooks/helm/run.yaml post-run: playbooks/kubernetes/post.yaml vars: helm_version: *helm_version