Improve reset_all_compute_nodes method

faults/ha tests use this method to reboot compute nodes, either
sequentially or in parallel.
In order to verify that the reboot was completed, this method executed
a 'hostname' command on the rebooted computes.
This didn't really guarantee that the reboot had been completed.
Replacing this 'hostname' command by the existing
RebootHostOperation.wait_for_operation method that obtains the output
from the 'uptime' command and compares it from the previous value: the
reboot is done when the uptime value has been updated.

Change-Id: I4f6a68eabec509b36e915fec7f4dfbd763c343d4
This commit is contained in:
Eduardo Olivares 2023-07-13 10:44:10 +02:00
parent 5d2a5574ea
commit a86c116782

View File

@ -434,33 +434,30 @@ def network_undisrupt_controllers_non_main_vip():
def reset_all_compute_nodes(hard_reset=False, sequentially=False):
# reboot all computes and wait for ssh Up on them
def _check_compute(ssh_client):
compute_checked = sh.execute(
"hostname", ssh_client=ssh_client, expect_exit_status=None).stdout
LOG.info('{} is up '.format(compute_checked))
if hard_reset:
reset_method = sh.hard_reset_method
else:
reset_method = sh.soft_reset_method
nodes = topology.list_openstack_nodes(group='compute')
compute_reboot_operation_list = []
for compute in nodes:
# using ssh_client.connect we use a fire and forget reboot method
sh.reboot_host(ssh_client=compute.ssh_client, wait=False,
method=reset_method)
# if sequentially, then wait and check uptime has changed
# else, do not wait (uptime will be checked later)
reboot_operation = sh.reboot_host(ssh_client=compute.ssh_client,
wait=sequentially,
method=reset_method)
compute_reboot_operation_list.append(reboot_operation)
LOG.info('reboot exec: {} on server: {}'.format(reset_method,
compute.name))
tobiko.cleanup_fixture(compute.ssh_client)
if sequentially:
# without the sleep, the command succeeds right after the reboot
# - i.e. the node had not been stopped yet
time.sleep(5)
_check_compute(compute.ssh_client)
if not sequentially:
for compute in nodes:
_check_compute(compute.ssh_client)
for reboot_operation in compute_reboot_operation_list:
# checking uptime on each compute - it should have been updated
# after the reboot is done
reboot_operation.wait_for_operation()
LOG.info(f'{reboot_operation.hostname} is up')
def reset_ovndb_pcs_master_resource():