Improve reset_all_compute_nodes method
faults/ha tests use this method to reboot compute nodes, either sequentially or in parallel. In order to verify that the reboot was completed, this method executed a 'hostname' command on the rebooted computes. This didn't really guarantee that the reboot had been completed. Replacing this 'hostname' command by the existing RebootHostOperation.wait_for_operation method that obtains the output from the 'uptime' command and compares it from the previous value: the reboot is done when the uptime value has been updated. Change-Id: I4f6a68eabec509b36e915fec7f4dfbd763c343d4
This commit is contained in:
parent
5d2a5574ea
commit
a86c116782
@ -434,33 +434,30 @@ def network_undisrupt_controllers_non_main_vip():
|
||||
def reset_all_compute_nodes(hard_reset=False, sequentially=False):
|
||||
|
||||
# reboot all computes and wait for ssh Up on them
|
||||
def _check_compute(ssh_client):
|
||||
compute_checked = sh.execute(
|
||||
"hostname", ssh_client=ssh_client, expect_exit_status=None).stdout
|
||||
LOG.info('{} is up '.format(compute_checked))
|
||||
|
||||
if hard_reset:
|
||||
reset_method = sh.hard_reset_method
|
||||
else:
|
||||
reset_method = sh.soft_reset_method
|
||||
|
||||
nodes = topology.list_openstack_nodes(group='compute')
|
||||
compute_reboot_operation_list = []
|
||||
for compute in nodes:
|
||||
# using ssh_client.connect we use a fire and forget reboot method
|
||||
sh.reboot_host(ssh_client=compute.ssh_client, wait=False,
|
||||
method=reset_method)
|
||||
# if sequentially, then wait and check uptime has changed
|
||||
# else, do not wait (uptime will be checked later)
|
||||
reboot_operation = sh.reboot_host(ssh_client=compute.ssh_client,
|
||||
wait=sequentially,
|
||||
method=reset_method)
|
||||
compute_reboot_operation_list.append(reboot_operation)
|
||||
LOG.info('reboot exec: {} on server: {}'.format(reset_method,
|
||||
compute.name))
|
||||
tobiko.cleanup_fixture(compute.ssh_client)
|
||||
if sequentially:
|
||||
# without the sleep, the command succeeds right after the reboot
|
||||
# - i.e. the node had not been stopped yet
|
||||
time.sleep(5)
|
||||
_check_compute(compute.ssh_client)
|
||||
|
||||
if not sequentially:
|
||||
for compute in nodes:
|
||||
_check_compute(compute.ssh_client)
|
||||
for reboot_operation in compute_reboot_operation_list:
|
||||
# checking uptime on each compute - it should have been updated
|
||||
# after the reboot is done
|
||||
reboot_operation.wait_for_operation()
|
||||
LOG.info(f'{reboot_operation.hostname} is up')
|
||||
|
||||
|
||||
def reset_ovndb_pcs_master_resource():
|
||||
|
Loading…
x
Reference in New Issue
Block a user