Disable o-c-c on start-up after reimage

O-C-C was runnning multiple times on bootup after a
re-image. This causes services to die during startup breaking
services like neutron. This change disables o-c-c after it
has done the key restoration. We then start it when we want it.

Change-Id: I2dd21f115c3425e68fe37e2235ab6a8ff56fa43a
This commit is contained in:
Cian O'Driscoll 2014-11-27 11:52:30 +00:00 committed by Julia Kreger
parent 1398f892f5
commit 74ea04fad8
4 changed files with 44 additions and 48 deletions

View File

@ -20,6 +20,9 @@
- name: "Disable os-collect-config service"
sudo: yes
service: name=os-collect-config enabled=no state=stopped
- name: "Remove file that allows os-collect-config start via upstart"
shell: os-svc-enable-upstart os-collect-config disable
sudo: yes
- name: "Ensure os-collect-config local-data folder is present"
sudo: yes
file: path=/var/lib/os-collect-config/local-data state=directory owner=root group=root mode=0700

View File

@ -0,0 +1,21 @@
# Copyright (c) 2014 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---
- name: "Create file that allows os-collect-config start via upstart"
shell: os-svc-enable-upstart os-collect-config enable
sudo: yes
- name: "Enable os-collect-config via upstart and start the service"
service: name=os-collect-config state=started
sudo: yes

View File

@ -13,20 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
---
- name: "Setting default fact to run os-collect-config"
set_fact: test_bypass_os_collect_config="False"
- name: "Evaluate if os-collect-config needs to be run"
command: grep -q "(os-refresh-config) [INFO] Completed phase migration" /var/log/upstart/os-collect-config.log
register: test_did_os_collect_config_complete
ignore_errors: yes
when: online_upgrade is not defined
- name: "Setting fact to bypass os-collect-config if applicable"
set_fact: test_bypass_os_collect_config="True"
when: online_upgrade is not defined and test_did_os_collect_config_complete.rc == 0
- name: Remove os-collect-config disable sentinel file
file: path=/mnt/state/disable-os-collect-config state=absent
sudo: yes
- name: "Execute os-collect-config"
command: os-collect-config --force --one
when: test_bypass_os_collect_config != "True"
register: test_occ_results
until: test_occ_results.rc == 0
retries: 2
delay: 15

View File

@ -39,6 +39,7 @@
- service: name={{ item }} enabled=no state=stopped
with_items: undercloud_services
when: helion is not defined and instance_status == "ACTIVE" and item in existing_services
- include: disable_os_collect_config.yml
- hosts: nova-compute
name: Disable Overcloud Compute
tags: shutdown-cloud
@ -46,6 +47,7 @@
gather_facts: no
max_fail_percentage: 0
tasks:
- include: disable_os_collect_config.yml
- fail: "FAILURE: Cannot perform an online upgrade on nodes that are not in ACTIVE state"
when: instance_status != "ACTIVE" and online_upgrade is defined
- include: stop_vms.yml
@ -69,6 +71,7 @@
gather_facts: no
max_fail_percentage: 0
tasks:
- include: disable_os_collect_config.yml
- fail: "Fail if online_upgrade is defined - online upgrades are not supported on swift instances."
when: online_upgrade is defined
- service_facts:
@ -86,6 +89,7 @@
gather_facts: no
max_fail_percentage: 0
tasks:
- include: disable_os_collect_config.yml
- fail: "Fail if online_upgrade is defined - online upgrades are not supported on vsa instances."
when: online_upgrade is defined
- include: stop_vms.yml
@ -171,6 +175,7 @@
- name: "Waiting for rabbitmq-server to stop"
wait_for: port=5672 state=stopped timeout=60 delay=10
when: instance_status == "ACTIVE"
- include: disable_os_collect_config.yml
- hosts: controller-bootstrap
name: Stop MySQL/RabbitMQ on Overcloud Controller Bootstrap node
tags: shutdown-cloud
@ -191,6 +196,7 @@
- name: "Waiting for rabbitmq-server to stop"
wait_for: port=5672 state=stopped timeout=60 delay=10
when: instance_status == "ACTIVE"
- include: disable_os_collect_config.yml
- hosts: all:!unknown
tags: shutdown-cloud
gather_facts: no
@ -224,7 +230,6 @@
sudo: yes
max_fail_percentage: 0
tasks:
- include: disable_os_collect_config.yml
- service_facts:
when: instance_status == "ACTIVE"
- include: step_reset_mnt_state_permissions.yml
@ -237,13 +242,13 @@
# https://bugs.launchpad.net/ironic/+bug/1382698
- include: step_undercloud_ironic_release_reservations.yml
- include: step_run_occ.yml
- service: name=os-collect-config state=started
- service: name={{ item }} enabled=yes state=started
with_items: helion_undercloud_services
when: helion is defined and item in existing_services
- service: name={{ item }} enabled=yes state=started
with_items: undercloud_services
when: helion is not defined and item in existing_services
- include: enable_start_os_collect_config.yml
- hosts: controller-bootstrap
name: Rebuild and Refresh controller-bootstrap
gather_facts: no
@ -274,14 +279,8 @@
- include: mysql_init_fix.yml
- include: rabbitmq_occ_disable.yml
- include: refresh_config.yml
- name: Stop os-collect-config to avoid collission
service: name=os-collect-config state=stopped
- name: "Take a 30 second pause in case os-collect-config had intiiated a process"
pause: seconds=30 prompt="Pausing for 30 seconds to give time for any running os-collect-config sub processes to exit"
- name: "Work around apache2 starting up at boot w/o config..."
service: name=apache2 enabled=no state=stopped
- name: "Remove os-collect-config disable sentinel file"
file: path=/mnt/state/disable-os-collect-config state=absent
- name: "Run os-collect-config"
command: os-collect-config --force --one
- include: step_reset_mnt_state_permissions.yml
@ -309,12 +308,9 @@
when: single_controller is not defined
- include: step_create_databases.yml
- include: start_rabbitmq.yml
- name: "Run os-collect-config"
command: os-collect-config --force --one
- include: step_run_occ.yml
- name: Wait for Rabbit to listen on its usual port
wait_for: port=5672 state=started timeout=90 delay=10
- name: Restart os-collect-config
service: name=os-collect-config state=started
- include: mysql_access_fix.yml
- hosts: controller
name: Rebuild and Refresh Controller
@ -358,14 +354,8 @@
- include: rabbitmq_occ_disable.yml
- include: step_reset_mnt_state_permissions.yml
- include: refresh_config.yml
- name: Stop os-collect-config to avoid collission
service: name=os-collect-config state=stopped
- name: "Take a 30 second pause in case os-collect-config had intiiated a process"
pause: seconds=30 prompt="Pausing for 30 seconds to give time for any running os-collect-config sub processes to exit"
- name: "Work around apache2 starting up at boot w/o config..."
service: name=apache2 enabled=no state=stopped
- name: "Remove os-collect-config disable sentinel file"
file: path=/mnt/state/disable-os-collect-config state=absent
# Directly call os-apply-config to write out configuration files in case
# os-collect-config has failed to reach that step.
- include: step_os-apply-config.yml
@ -393,8 +383,6 @@
- include: start_rabbitmq.yml
- include: rabbitmq_rejoin_cluster.yml
- include: step_run_occ.yml
- name: "Restart os-collect-config"
service: name=os-collect-config state=started
- name: Wait for Rabbit to listen on its usual port
wait_for: port=5672 state=started timeout=120 delay=10
- hosts: controller:controller-bootstrap
@ -412,18 +400,17 @@
sudo: yes
max_fail_percentage: 0
tasks:
- service: name=os-collect-config state=started
- service_facts:
when: instance_status == "ACTIVE"
- service: name={{ item }} enabled=yes state=started
with_items: helion_overcloud_bootstrap_controller_service
when: helion is defined and item in existing_services
- include: enable_start_os_collect_config.yml
- hosts: controller
name: Enable Overcloud Controller
sudo: yes
max_fail_percentage: 0
tasks:
- service: name=os-collect-config state=started
- service_facts:
when: instance_status == "ACTIVE"
- service: name={{ item }} enabled=yes state=started
@ -432,6 +419,7 @@
- service: name={{ item }} enabled=yes state=started
with_items: overcloud_controller_services
when: helion is not defined and item in existing_services
- include: enable_start_os_collect_config.yml
- hosts: swift-storage
name: Rebuild and Refresh swift-storage
gather_facts: no
@ -454,9 +442,7 @@
sudo: yes
max_fail_percentage: 0
tasks:
- include: disable_os_collect_config.yml
- include: step_run_occ.yml
- service: name=os-collect-config state=started
sudo: yes
- service_facts:
when: instance_status == "ACTIVE"
@ -468,6 +454,7 @@
with_items: overcloud_swift_services
sudo: yes
when: helion is not defined and item in existing_services
- include: enable_start_os_collect_config.yml
- hosts: vsa
name: Rebuild and Refresh vsa
gather_facts: no
@ -490,14 +477,13 @@
sudo: yes
max_fail_percentage: 0
tasks:
- include: disable_os_collect_config.yml
- include: step_run_occ.yml
- service: name=os-collect-config state=started
- service_facts:
when: instance_status == "ACTIVE"
- service: name={{ item }} enabled=yes state=started
with_items: helion_overcloud_vsa_services
when: helion is defined and item in existing_services
- include: enable_start_os_collect_config.yml
- hosts: nova-compute
name: "Download image from glance if online upgrade is being invoked"
gather_facts: no
@ -537,24 +523,21 @@
max_fail_percentage: 0
tasks:
- include: step_os-apply-config.yml
- pause: seconds=60 prompt="Giving the compute node sixty seconds to complete existing processes"
- pause: seconds=45 prompt="Giving the compute node sixty seconds to complete existing processes"
when: online_upgrade is not defined
# Write out config files in as we might be getting in while the
# system is starting up.
- include: step_cloud_init.yml
when: online_upgrade is defined
# Note: The wait_for lines below may not, likely are not valid for
# online_upgrade mode of compute nodes.
- name: Wait for cloud-init to Complete
wait_for: path=/run/cloud-init/result.json state=present
- name: Wait for ovs-vswitchd to be started
wait_for: path=/var/run/openvswitch/ovs-vswitchd.pid state=present timeout=60
wait_for: path=/var/run/openvswitch/ovs-vswitchd.pid state=present
when: online_upgrade is not defined
- name: Wait for ovs-vswitchd to config during start-up
pause: minutes=1
when: online_upgrade is not defined
- include: disable_os_collect_config.yml
when: online_upgrade is not defined
- include: step_run_occ.yml
- service: name=os-collect-config state=started
- pause: seconds=30 msg="Pausing for 30 seconds to allow services to complete start-up."
- service_facts:
when: instance_status == "ACTIVE"
@ -568,4 +551,5 @@
# nova-compute should already be started, however this step explicitly sets
# the service to start upon boot.
- service: name=nova-compute state=started enabled=yes
- include: enable_start_os_collect_config.yml
- include: step_post_hook.yml