diff --git a/playbooks/service-bridge.yaml b/playbooks/service-bridge.yaml index 164d924773..7f84396435 100644 --- a/playbooks/service-bridge.yaml +++ b/playbooks/service-bridge.yaml @@ -39,3 +39,23 @@ state: directory owner: root mode: 0755 + + - name: Automated Zuul cluster reboots and updates + # Note this is run via cron because a zuul job can't run this playbook + # as the playbook relies on all jobs ending for graceful stops on the + # executors. + cron: + name: "Zuul cluster restart" + # Start Sundays at 00:01 UTC. + # Estimated completion time Sunday at 18:00 UTC. + minute: 1 + hour: 0 + weekday: 6 + job: "flock -n /var/run/ansible/zuul_reboot.lock /usr/local/bin/ansible-playbook -f 20 /home/zuul/src/opendev.org/opendev/system-config/playbooks/zuul_reboot.yaml > /var/log/ansible/zuul_reboot.log 2>&1" + + - name: Rotate Zuul restart logs + include_role: + name: logrotate + vars: + logrotate_file_name: /var/log/ansible/zuul_reboot.log + logrotate_frequency: weekly diff --git a/playbooks/zuul_reboot.yaml b/playbooks/zuul_reboot.yaml index 1cb00b5a58..e0b7fed147 100644 --- a/playbooks/zuul_reboot.yaml +++ b/playbooks/zuul_reboot.yaml @@ -1,4 +1,5 @@ -# TODO We need to add a locking/failsafe check mechanism +# This relies on flock -n /var/run/ansible/zuul_reboot.lock to ensure +# we don't run multiple copies of this playbook concurrently. # TODO: stop pulling in the hourly job if we do this - name: "Ensure we are going to restart/reboot on the same image"