Merge "Add first health checks to faults podified ha tests"
This commit is contained in:
commit
8a3b7ea932
tobiko
openstack/nova
podified
shell/sh
tests/faults
tripleo
@ -13,6 +13,7 @@
|
||||
# under the License.
|
||||
from __future__ import absolute_import
|
||||
|
||||
from tobiko.openstack.nova import _checks
|
||||
from tobiko.openstack.nova import _client
|
||||
from tobiko.openstack.nova import _cloud_init
|
||||
from tobiko.openstack.nova import _hypervisor
|
||||
@ -85,5 +86,12 @@ find_server_ip_address = _server.find_server_ip_address
|
||||
HasServerMixin = _server.HasServerMixin
|
||||
get_server_id = _server.get_server_id
|
||||
list_server_ip_addresses = _server.list_server_ip_addresses
|
||||
action_on_all_instances = _server.action_on_all_instances
|
||||
|
||||
wait_for_services_up = _service.wait_for_services_up
|
||||
|
||||
check_nova_services_health = _checks.check_nova_services_health
|
||||
check_virsh_domains_running = _checks.check_virsh_domains_running
|
||||
wait_for_all_instances_status = _checks.wait_for_all_instances_status
|
||||
check_vms_ping = _checks.check_vms_ping
|
||||
check_vm_evacuations = _checks.check_vm_evacuations
|
||||
|
147
tobiko/openstack/nova/_checks.py
Normal file
147
tobiko/openstack/nova/_checks.py
Normal file
@ -0,0 +1,147 @@
|
||||
# Copyright (c) 2025 Red Hat, Inc.
|
||||
#
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
from oslo_log import log
|
||||
|
||||
import tobiko
|
||||
from tobiko.openstack.nova import _client
|
||||
from tobiko.openstack.nova import _server
|
||||
from tobiko.openstack.nova import _service
|
||||
from tobiko.openstack import topology
|
||||
from tobiko.shell import ping
|
||||
from tobiko.shell import sh
|
||||
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
def check_nova_services_health(timeout=600., interval=2.):
|
||||
retry = tobiko.retry(timeout=timeout, interval=interval)
|
||||
_service.wait_for_services_up(retry=retry)
|
||||
|
||||
|
||||
def check_virsh_domains_running():
|
||||
"""check all vms are running via virsh list command"""
|
||||
for compute in topology.list_openstack_nodes(group='compute'):
|
||||
hostname = sh.get_hostname(ssh_client=compute.ssh_client,
|
||||
fqdn=True)
|
||||
param = {'OS-EXT-SRV-ATTR:hypervisor_hostname': hostname}
|
||||
vm_list_per_compute = _client.list_servers(**param)
|
||||
for vm in vm_list_per_compute:
|
||||
for attempt in tobiko.retry(timeout=120, interval=5):
|
||||
if check_vm_running_via_virsh(compute, vm.id):
|
||||
LOG.info(f"{vm.id} is running ok on {hostname}")
|
||||
break
|
||||
else:
|
||||
msg = f"{vm.id} is not in running state on {hostname}"
|
||||
if attempt.is_last:
|
||||
tobiko.fail("timeout!! " + msg)
|
||||
LOG.error(f"{vm.id} is not in running state on "
|
||||
f"{hostname} ... Retrying")
|
||||
|
||||
|
||||
def check_vms_ping(vm_list):
|
||||
for vm in vm_list:
|
||||
fip = _server.list_server_ip_addresses(vm,
|
||||
address_type='floating').first
|
||||
ping.ping_until_received(fip).assert_replied()
|
||||
|
||||
|
||||
def check_vm_evacuations(vms_old=None, compute_host=None, timeout=600,
|
||||
interval=2, check_no_evacuation=False):
|
||||
"""check evacuation of vms
|
||||
input: old vm status and expected new compute"""
|
||||
|
||||
for attempt in tobiko.retry(timeout=timeout, interval=interval):
|
||||
failures = []
|
||||
param = ({} if compute_host is None
|
||||
else {'OS-EXT-SRV-ATTR:hypervisor_hostname': compute_host})
|
||||
vms_new = _client.list_servers(**param)
|
||||
for vm_old in vms_old or []:
|
||||
old_bm_host = vm_old._info[ # pylint: disable=W0212
|
||||
'OS-EXT-SRV-ATTR:hypervisor_hostname']
|
||||
new_vm_host = vms_new.with_attributes( # pylint: disable=W0212
|
||||
id=vm_old.id).uniq._info[
|
||||
'OS-EXT-SRV-ATTR:hypervisor_hostname']
|
||||
|
||||
if check_no_evacuation:
|
||||
cond = bool(old_bm_host != new_vm_host)
|
||||
else:
|
||||
cond = bool(old_bm_host == new_vm_host)
|
||||
|
||||
if cond:
|
||||
failures.append(
|
||||
'Failed vm evacuations: {}\n\n'.format(vm_old))
|
||||
if not failures:
|
||||
LOG.debug(vms_old.to_string())
|
||||
LOG.debug('All vms were evacuated!')
|
||||
return
|
||||
|
||||
if attempt.is_last:
|
||||
tobiko.fail(
|
||||
'Timeout checking VM evacuations:\n{!s}', '\n'.join(failures))
|
||||
else:
|
||||
LOG.error('Failed nova evacuation:\n {}'.format(failures))
|
||||
LOG.error('Retrying...')
|
||||
|
||||
|
||||
def check_vm_running_via_virsh(topology_compute, vm_id):
|
||||
"""check that a vm is in running state via virsh command,
|
||||
return false if not"""
|
||||
if vm_id in get_vm_uuid_list_running_via_virsh(topology_compute):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def get_vm_uuid_list_running_via_virsh(topology_compute):
|
||||
from tobiko import podified
|
||||
from tobiko.tripleo import containers
|
||||
from tobiko.tripleo import overcloud
|
||||
|
||||
get_uuid_loop = ("for i in `virsh list --name --state-running`; do "
|
||||
"virsh domuuid $i; done")
|
||||
containerized_libvirt_cmd = \
|
||||
"{container_runtime} exec -u root {nova_libvirt} sh -c '{get_uuids}'"
|
||||
|
||||
if podified.has_podified_cp():
|
||||
command = containerized_libvirt_cmd.format(
|
||||
container_runtime=podified.CONTAINER_RUNTIME,
|
||||
nova_libvirt=podified.NOVA_LIBVIRT_CONTAINER,
|
||||
get_uuids=get_uuid_loop)
|
||||
elif overcloud.has_overcloud():
|
||||
command = containerized_libvirt_cmd.format(
|
||||
container_runtime=containers.get_container_runtime_name(),
|
||||
nova_libvirt=containers.get_libvirt_container_name(),
|
||||
get_uuids=get_uuid_loop)
|
||||
else:
|
||||
command = get_uuid_loop
|
||||
|
||||
return sh.execute(command,
|
||||
ssh_client=topology_compute.ssh_client,
|
||||
sudo=True).stdout.split()
|
||||
|
||||
|
||||
def wait_for_all_instances_status(status, timeout=None):
|
||||
"""wait for all instances for a certain status or raise an exception"""
|
||||
for instance in _client.list_servers():
|
||||
_client.wait_for_server_status(server=instance.id, status=status,
|
||||
timeout=timeout)
|
||||
instance_info = 'instance {nova_instance} is {state} on {host}'.format(
|
||||
nova_instance=instance.name,
|
||||
state=status,
|
||||
host=instance._info[ # pylint: disable=W0212
|
||||
'OS-EXT-SRV-ATTR:hypervisor_hostname'])
|
||||
LOG.info(instance_info)
|
@ -136,3 +136,24 @@ def get_server_id(server: _client.ServerType) -> str:
|
||||
return server
|
||||
else:
|
||||
return server.id
|
||||
|
||||
|
||||
def action_on_all_instances(action):
|
||||
"""try to start/stop all instances"""
|
||||
if action not in ('active', 'shutoff'):
|
||||
tobiko.fail(f'Wrong action on VM instances: {action}')
|
||||
|
||||
client_action_method = (_client.activate_server if action == 'active'
|
||||
else _client.shutoff_server)
|
||||
expected_vm_status = 'ACTIVE' if action == 'active' else 'SHUTOFF'
|
||||
|
||||
for instance in _client.list_servers():
|
||||
activated_instance = client_action_method(instance)
|
||||
instance_info = 'instance {nova_instance} is {state} on {host}'.format(
|
||||
nova_instance=activated_instance.name,
|
||||
state=activated_instance.status,
|
||||
host=activated_instance._info[ # pylint: disable=W0212
|
||||
'OS-EXT-SRV-ATTR:hypervisor_hostname'])
|
||||
LOG.info(instance_info)
|
||||
if activated_instance.status != expected_vm_status:
|
||||
tobiko.fail(instance_info)
|
||||
|
@ -18,6 +18,9 @@ from tobiko.podified import _openshift
|
||||
from tobiko.podified import containers
|
||||
|
||||
|
||||
NOVA_LIBVIRT_CONTAINER = 'nova_compute'
|
||||
CONTAINER_RUNTIME = 'podman'
|
||||
|
||||
EDPM_NODE = _topology.EDPM_NODE
|
||||
OCP_WORKER = _topology.OCP_WORKER
|
||||
EDPM_COMPUTE_GROUP = _openshift.EDPM_COMPUTE_GROUP
|
||||
|
@ -31,10 +31,14 @@ class HostnameError(tobiko.TobikoException):
|
||||
|
||||
HOSTNAMES_CACHE: typing.MutableMapping[typing.Optional[ssh.SSHClientFixture],
|
||||
str] = weakref.WeakKeyDictionary()
|
||||
HOSTNAMES_FQDN_CACHE: \
|
||||
typing.MutableMapping[typing.Optional[ssh.SSHClientFixture],
|
||||
str] = weakref.WeakKeyDictionary()
|
||||
|
||||
|
||||
def get_hostname(ssh_client: ssh.SSHClientType = None,
|
||||
cached=True,
|
||||
fqdn=False,
|
||||
**execute_params) -> str:
|
||||
ssh_client = ssh.ssh_client_fixture(ssh_client)
|
||||
if ssh_client is None:
|
||||
@ -42,25 +46,36 @@ def get_hostname(ssh_client: ssh.SSHClientType = None,
|
||||
|
||||
if cached:
|
||||
try:
|
||||
hostname = HOSTNAMES_CACHE[ssh_client]
|
||||
if not fqdn:
|
||||
hostname = HOSTNAMES_CACHE[ssh_client]
|
||||
else:
|
||||
hostname = HOSTNAMES_FQDN_CACHE[ssh_client]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
return hostname
|
||||
|
||||
hostname = ssh_hostname(ssh_client=ssh_client,
|
||||
fqdn=fqdn,
|
||||
**execute_params)
|
||||
if cached:
|
||||
HOSTNAMES_CACHE[ssh_client] = hostname
|
||||
if not fqdn:
|
||||
HOSTNAMES_CACHE[ssh_client] = hostname
|
||||
else:
|
||||
HOSTNAMES_FQDN_CACHE[ssh_client] = hostname
|
||||
return hostname
|
||||
|
||||
|
||||
def ssh_hostname(ssh_client: ssh.SSHClientFixture,
|
||||
fqdn=False,
|
||||
**execute_params) \
|
||||
-> str:
|
||||
tobiko.check_valid_type(ssh_client, ssh.SSHClientFixture)
|
||||
command = 'hostname'
|
||||
if fqdn:
|
||||
command += ' -f'
|
||||
try:
|
||||
result = _execute.execute('hostname',
|
||||
result = _execute.execute(command,
|
||||
ssh_client=ssh_client,
|
||||
**execute_params)
|
||||
except _exception.ShellCommandFailed as ex:
|
||||
|
@ -31,6 +31,7 @@ from tobiko import config
|
||||
from tobiko.openstack import glance
|
||||
from tobiko.openstack import keystone
|
||||
from tobiko.openstack import neutron
|
||||
from tobiko.openstack import nova
|
||||
from tobiko.openstack import stacks
|
||||
from tobiko.openstack import tests
|
||||
from tobiko.openstack import topology
|
||||
@ -38,7 +39,6 @@ from tobiko.tests.faults.ha import test_cloud_recovery
|
||||
from tobiko.shell import ping
|
||||
from tobiko.shell import sh
|
||||
from tobiko.tripleo import containers
|
||||
from tobiko.tripleo import nova
|
||||
from tobiko.tripleo import pacemaker
|
||||
from tobiko.tripleo import topology as tripleo_topology
|
||||
from tobiko import tripleo
|
||||
@ -760,6 +760,15 @@ def evac_failover_compute(compute_host, failover_type=sh.hard_reset_method):
|
||||
disrupt_node(compute_host, disrupt_method=failover_type)
|
||||
|
||||
|
||||
def get_random_compute_with_vms():
|
||||
for compute in nova.list_hypervisors():
|
||||
param = {'OS-EXT-SRV-ATTR:hypervisor_hostname':
|
||||
compute.hypervisor_hostname}
|
||||
vm_list_per_compute = nova.list_servers(**param)
|
||||
if len(vm_list_per_compute) > 0:
|
||||
return compute.hypervisor_hostname
|
||||
|
||||
|
||||
def check_iha_evacuation(failover_type=None, vm_type=None):
|
||||
"""check vms on compute host,disrupt compute host,
|
||||
check all vms evacuated and pingable"""
|
||||
@ -767,36 +776,37 @@ def check_iha_evacuation(failover_type=None, vm_type=None):
|
||||
LOG.info(f'Begin IHA tests iteration {iteration}')
|
||||
LOG.info('create 2 vms')
|
||||
tests.test_servers_creation(number_of_servers=2)
|
||||
compute_host = nova.get_random_compute_with_vms_name()
|
||||
vms_starting_state_df = nova.get_compute_vms_df(compute_host)
|
||||
compute_host = get_random_compute_with_vms()
|
||||
vms_starting_state = nova.list_servers(
|
||||
**{'OS-EXT-SRV-ATTR:hypervisor_hostname': compute_host})
|
||||
if vm_type == 'shutoff':
|
||||
nova.stop_all_instances()
|
||||
nova.action_on_all_instances('shutoff')
|
||||
if vm_type == 'evac_image_vm':
|
||||
evac_vm_stack = tests.test_evacuable_server_creation()
|
||||
evac_vm_id = nova.get_stack_server_id(evac_vm_stack)
|
||||
org_nova_evac_df = nova.vm_df(evac_vm_id, nova.get_vms_table())
|
||||
evac_vm_id = evac_vm_stack.server_details.id
|
||||
old_nova_evac = nova.get_server(server_id=evac_vm_id)
|
||||
if not vm_type == 'shutoff':
|
||||
nova.check_df_vms_ping(vms_starting_state_df)
|
||||
nova.check_vms_ping(vms_starting_state)
|
||||
LOG.info(f'perform a failover on {compute_host}')
|
||||
evac_failover_compute(compute_host, failover_type=failover_type)
|
||||
test_cloud_recovery.overcloud_health_checks(passive_checks_only=True)
|
||||
if vm_type == 'evac_image_vm':
|
||||
nova.check_vm_evacuations(vms_df_old=org_nova_evac_df,
|
||||
nova.check_vm_evacuations(vms_old=old_nova_evac,
|
||||
compute_host=compute_host,
|
||||
timeout=600,
|
||||
check_no_evacuation=True)
|
||||
# delete evacuable tagged image because it prevents
|
||||
# non tagged evacuations if exists
|
||||
delete_evacuable_tagged_image()
|
||||
new_nova_evac_df = nova.vm_df(evac_vm_id, nova.get_vms_table())
|
||||
nova.check_vm_evacuations(org_nova_evac_df, new_nova_evac_df)
|
||||
new_nova_evac = nova.get_server(server_id=evac_vm_id)
|
||||
nova.check_vm_evacuations(old_nova_evac, new_nova_evac)
|
||||
else:
|
||||
nova.check_vm_evacuations(vms_df_old=vms_starting_state_df,
|
||||
nova.check_vm_evacuations(vms_old=vms_starting_state,
|
||||
compute_host=compute_host,
|
||||
timeout=600)
|
||||
LOG.info('check evac is Done')
|
||||
if not vm_type == 'shutoff':
|
||||
nova.check_df_vms_ping(vms_starting_state_df)
|
||||
nova.check_vms_ping(vms_starting_state)
|
||||
|
||||
|
||||
def check_iha_evacuation_evac_image_vm():
|
||||
|
@ -50,16 +50,15 @@ has_external_lb = CONF.tobiko.rhosp.has_external_load_balancer
|
||||
|
||||
def overcloud_health_checks(passive_checks_only=False,
|
||||
skip_mac_table_size_test=False):
|
||||
# this method will be changed in future commit
|
||||
check_pacemaker_resources_health()
|
||||
check_overcloud_processes_health()
|
||||
nova.check_nova_services_health()
|
||||
nova_osp.check_nova_services_health()
|
||||
tests.test_alive_agents_are_consistent_along_time()
|
||||
if not passive_checks_only:
|
||||
# create a uniq stack
|
||||
# create a unique stack that will be cleaned up at the end of each test
|
||||
check_vm_create()
|
||||
nova.start_all_instances()
|
||||
nova.check_computes_vms_running_via_virsh()
|
||||
nova_osp.action_on_all_instances('active')
|
||||
nova_osp.check_virsh_domains_running()
|
||||
containers.list_node_containers.cache_clear()
|
||||
containers.assert_all_tripleo_containers_running()
|
||||
containers.assert_equal_containers_state()
|
||||
@ -231,10 +230,10 @@ class DisruptTripleoNodesTest(testtools.TestCase):
|
||||
hard_reset=False,
|
||||
sequentially=sequentially)
|
||||
# verify VM status is updated after reboot
|
||||
nova.wait_for_all_instances_status('SHUTOFF')
|
||||
nova_osp.wait_for_all_instances_status('SHUTOFF')
|
||||
# start all VM instance
|
||||
# otherwise sidecar containers will not run after computes reboot
|
||||
nova.start_all_instances()
|
||||
nova_osp.action_on_all_instances('active')
|
||||
OvercloudHealthCheck.run_after(passive_checks_only=True)
|
||||
|
||||
_run_test()
|
||||
@ -247,7 +246,7 @@ class DisruptTripleoNodesTest(testtools.TestCase):
|
||||
# nova.wait_for_all_instances_status('SHUTOFF')
|
||||
# # start all VM instance
|
||||
# # otherwise sidecar containers will not run after computes reboot
|
||||
# nova.start_all_instances()
|
||||
# nova_osp.action_on_all_instances('active')
|
||||
# OvercloudHealthCheck.run_after(passive_checks_only=True)
|
||||
|
||||
@testtools.skipIf(has_external_lb, SKIP_MESSAGE_EXTLB)
|
||||
|
@ -14,11 +14,37 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
from __future__ import absolute_import
|
||||
|
||||
from oslo_log import log
|
||||
import testtools
|
||||
|
||||
|
||||
from tobiko.tests.faults.ha import test_cloud_recovery
|
||||
from tobiko.tests.faults.podified.ha import cloud_disruptions
|
||||
from tobiko.openstack import tests
|
||||
from tobiko import podified
|
||||
from tobiko.openstack import nova
|
||||
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
def podified_health_checks():
|
||||
nova.check_nova_services_health()
|
||||
tests.test_alive_agents_are_consistent_along_time()
|
||||
# create a unique stack that will be cleaned up at the end of each test
|
||||
# TODO(eolivare) add tests.test_server_creation_no_fip() when BGP is
|
||||
# configured with expose_tenant_networks
|
||||
tests.test_server_creation()
|
||||
nova.action_on_all_instances('active')
|
||||
nova.check_virsh_domains_running()
|
||||
test_cloud_recovery.octavia_health_checks()
|
||||
|
||||
|
||||
class PodifiedCloudHealthCheck(test_cloud_recovery.OvercloudHealthCheck):
|
||||
def setup_fixture(self):
|
||||
# run validations
|
||||
LOG.info("Start executing Podified health checks.")
|
||||
podified_health_checks()
|
||||
LOG.info("Podified health checks successfully executed.")
|
||||
|
||||
|
||||
@podified.skip_if_not_podified
|
||||
@ -27,17 +53,20 @@ class DisruptPodifiedNodesTest(testtools.TestCase):
|
||||
disruptive_action: a function that runs some
|
||||
disruptive scenario on a node"""
|
||||
|
||||
def test_0vercloud_health_check(self):
|
||||
PodifiedCloudHealthCheck.run_before()
|
||||
|
||||
def test_kill_all_galera_services(self):
|
||||
# HealthCheck.run_before()
|
||||
PodifiedCloudHealthCheck.run_before()
|
||||
cloud_disruptions.kill_all_galera_services()
|
||||
# HealthCheck.run_after()
|
||||
PodifiedCloudHealthCheck.run_after()
|
||||
|
||||
def test_remove_all_grastate_galera(self):
|
||||
# HealthCheck.run_before()
|
||||
PodifiedCloudHealthCheck.run_before()
|
||||
cloud_disruptions.remove_all_grastate_galera()
|
||||
# HealthCheck.run_before()
|
||||
PodifiedCloudHealthCheck.run_after()
|
||||
|
||||
def test_remove_one_grastate_galera(self):
|
||||
# HealthCheck.run_before()
|
||||
PodifiedCloudHealthCheck.run_before()
|
||||
cloud_disruptions.remove_one_grastate_galera()
|
||||
# HealthCheck.run_after()
|
||||
PodifiedCloudHealthCheck.run_after()
|
||||
|
@ -1,13 +1,24 @@
|
||||
# Copyright (c) 2025 Red Hat, Inc.
|
||||
#
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
from __future__ import absolute_import
|
||||
|
||||
import time
|
||||
import typing # noqa
|
||||
from functools import wraps
|
||||
|
||||
|
||||
import netaddr
|
||||
from oslo_log import log
|
||||
import pandas
|
||||
|
||||
import tobiko
|
||||
from tobiko.tripleo import overcloud
|
||||
@ -15,217 +26,6 @@ from tobiko.shell import iperf3
|
||||
from tobiko.shell import ping
|
||||
from tobiko.shell import sh
|
||||
from tobiko.shell import ssh
|
||||
from tobiko.openstack import nova
|
||||
from tobiko.openstack import topology
|
||||
from tobiko.tripleo import containers
|
||||
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
def check_nova_services_health(timeout=600., interval=2.):
|
||||
retry = tobiko.retry(timeout=timeout, interval=interval)
|
||||
nova.wait_for_services_up(retry=retry)
|
||||
|
||||
|
||||
def start_all_instances():
|
||||
"""try to start all stopped overcloud instances"""
|
||||
for instance in nova.list_servers():
|
||||
activated_instance = nova.activate_server(instance)
|
||||
time.sleep(3)
|
||||
instance_info = 'instance {nova_instance} is {state} on {host}'.format(
|
||||
nova_instance=activated_instance.name,
|
||||
state=activated_instance.status,
|
||||
host=activated_instance._info[ # pylint: disable=W0212
|
||||
'OS-EXT-SRV-ATTR:hypervisor_hostname'])
|
||||
LOG.info(instance_info)
|
||||
if activated_instance.status != 'ACTIVE':
|
||||
tobiko.fail(instance_info)
|
||||
|
||||
|
||||
def stop_all_instances():
|
||||
"""try to start all stopped overcloud instances"""
|
||||
for instance in nova.list_servers():
|
||||
activated_instance = nova.shutoff_server(instance)
|
||||
time.sleep(3)
|
||||
instance_info = 'instance {nova_instance} is {state} on {host}'.format(
|
||||
nova_instance=activated_instance.name,
|
||||
state=activated_instance.status,
|
||||
host=activated_instance._info[ # pylint: disable=W0212
|
||||
'OS-EXT-SRV-ATTR:hypervisor_hostname'])
|
||||
LOG.info(instance_info)
|
||||
if activated_instance.status != 'SHUTOFF':
|
||||
tobiko.fail(instance_info)
|
||||
|
||||
|
||||
def wait_for_all_instances_status(status, timeout=None):
|
||||
"""wait for all instances for a certain status or raise an exception"""
|
||||
for instance in nova.list_servers():
|
||||
nova.wait_for_server_status(server=instance.id, status=status,
|
||||
timeout=timeout)
|
||||
instance_info = 'instance {nova_instance} is {state} on {host}'.format(
|
||||
nova_instance=instance.name,
|
||||
state=status,
|
||||
host=instance._info[ # pylint: disable=W0212
|
||||
'OS-EXT-SRV-ATTR:hypervisor_hostname'])
|
||||
LOG.info(instance_info)
|
||||
|
||||
|
||||
def get_vms_table():
|
||||
"""populate a dataframe with vm host,id,status"""
|
||||
vms_data = [(vm._info[ # pylint: disable=W0212
|
||||
'OS-EXT-SRV-ATTR:hypervisor_hostname'], vm.id,
|
||||
vm.status) for vm in nova.list_servers()]
|
||||
vms_df = pandas.DataFrame(vms_data, columns=['vm_host', 'vm_id',
|
||||
'vm_state'])
|
||||
return vms_df
|
||||
|
||||
|
||||
def list_computes():
|
||||
"""list compute host names"""
|
||||
return [compute.hypervisor_hostname for compute in nova.list_hypervisors()]
|
||||
|
||||
|
||||
def get_compute_vms_df(compute_host):
|
||||
"""input: compute hostname (can be short)
|
||||
output: dataframe with vms of that host"""
|
||||
return get_vms_table().query(f"vm_host=='{compute_host}'")
|
||||
|
||||
|
||||
def get_random_compute_with_vms_name():
|
||||
"""get a randomcompute holding vm/s"""
|
||||
for compute in list_computes():
|
||||
if not get_compute_vms_df(compute).empty:
|
||||
return compute
|
||||
|
||||
|
||||
def vm_info(vm_id, vms_df):
|
||||
"""input: vm and a vms df
|
||||
output: host string"""
|
||||
return vms_df.query(f"vm_id == '{vm_id}'").to_string()
|
||||
|
||||
|
||||
def vm_df(vm_id, vms_df):
|
||||
"""input: vm and a vms df
|
||||
output: host string"""
|
||||
return vms_df.query(f"vm_id == '{vm_id}'")
|
||||
|
||||
|
||||
def vm_floating_ip(vm_id):
|
||||
"""input: vm_id
|
||||
output it's floating ip"""
|
||||
|
||||
vm = nova.get_server(vm_id)
|
||||
floating_ip = nova.list_server_ip_addresses(
|
||||
vm, address_type='floating').first
|
||||
return floating_ip
|
||||
|
||||
|
||||
def check_ping_vm_fip(fip):
|
||||
ping.ping_until_received(fip).assert_replied()
|
||||
|
||||
|
||||
def check_df_vms_ping(df):
|
||||
"""input: dataframe with vms_ids
|
||||
try to ping all vms in df"""
|
||||
|
||||
for vm_id in df.vm_id.to_list():
|
||||
check_ping_vm_fip(vm_floating_ip(vm_id))
|
||||
|
||||
|
||||
def vm_location(vm_id, vms_df):
|
||||
"""input: vm and a vms df
|
||||
output: host string"""
|
||||
return vms_df.query(f"vm_id == '{vm_id}'")['vm_host'].to_string(
|
||||
index=False)
|
||||
|
||||
|
||||
def check_vm_evacuations(vms_df_old=None, compute_host=None, timeout=600,
|
||||
interval=2, check_no_evacuation=False):
|
||||
"""check evacuation of vms
|
||||
input: old and new vms_state_tables dfs"""
|
||||
failures = []
|
||||
start = time.time()
|
||||
|
||||
while time.time() - start < timeout:
|
||||
failures = []
|
||||
vms_df_new = get_compute_vms_df(compute_host)
|
||||
for vm_id in vms_df_old.vm_id.to_list():
|
||||
old_bm_host = vm_location(vm_id, vms_df_old)
|
||||
new_vm_host = vm_location(vm_id, vms_df_new)
|
||||
|
||||
if check_no_evacuation:
|
||||
cond = bool(old_bm_host != new_vm_host)
|
||||
else:
|
||||
cond = bool(old_bm_host == new_vm_host)
|
||||
|
||||
if cond:
|
||||
failures.append(
|
||||
'failed vm evacuations: {}\n\n'.format(vm_info(vm_id,
|
||||
vms_df_old)))
|
||||
if failures:
|
||||
LOG.info('Failed nova evacuation:\n {}'.format(failures))
|
||||
LOG.info('Not all nova vms evacuated ..')
|
||||
LOG.info('Retrying , timeout at: {}'
|
||||
.format(timeout-(time.time() - start)))
|
||||
time.sleep(interval)
|
||||
else:
|
||||
LOG.info(vms_df_old.to_string())
|
||||
LOG.info('All vms were evacuated!')
|
||||
return
|
||||
# exhausted all retries
|
||||
if failures:
|
||||
tobiko.fail(
|
||||
'failed vm evacuations:\n{!s}', '\n'.join(failures))
|
||||
|
||||
|
||||
def get_stack_server_id(stack):
|
||||
return stack.server_details.id
|
||||
|
||||
|
||||
def get_fqdn_from_topology_node(topology_node):
|
||||
return sh.execute("hostname -f", ssh_client=topology_node.ssh_client,
|
||||
expect_exit_status=None).stdout.strip()
|
||||
|
||||
|
||||
def check_vm_running_via_virsh(topology_compute, vm_id):
|
||||
"""check that a vm is in running state via virsh command,
|
||||
return false if not"""
|
||||
if vm_id in get_vm_uuid_list_running_via_virsh(topology_compute):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def get_vm_uuid_list_running_via_virsh(topology_compute):
|
||||
if overcloud.has_overcloud():
|
||||
container_runtime = containers.get_container_runtime_name()
|
||||
nova_libvirt = containers.get_libvirt_container_name()
|
||||
command = f"sudo {container_runtime} exec {nova_libvirt} " \
|
||||
f"sh -c 'for i in `virsh list --name --state-running` " \
|
||||
f";do virsh domuuid $i;done'"
|
||||
else:
|
||||
command = "for i in `sudo virsh list --name --state-running` " \
|
||||
";do virsh domuuid $i;done'"
|
||||
return sh.execute(command,
|
||||
ssh_client=topology_compute.ssh_client).stdout.split()
|
||||
|
||||
|
||||
def check_computes_vms_running_via_virsh():
|
||||
"""check all vms are running via virsh list command"""
|
||||
for compute in topology.list_openstack_nodes(group='compute'):
|
||||
hostname = get_fqdn_from_topology_node(compute)
|
||||
retry = tobiko.retry(timeout=120, interval=5)
|
||||
vms_df = get_compute_vms_df(hostname)
|
||||
for vm_id in vms_df.vm_id.to_list():
|
||||
for _ in retry:
|
||||
if check_vm_running_via_virsh(compute, vm_id):
|
||||
LOG.info(f"{vm_id} is running ok on "
|
||||
f"{compute.hostname}")
|
||||
break
|
||||
else:
|
||||
LOG.info(f"{vm_id} is not in running state on "
|
||||
f"{compute.hostname}")
|
||||
|
||||
|
||||
# Test is inteded for D/S env
|
||||
|
Loading…
x
Reference in New Issue
Block a user