From 19be771767d716b105873eadbdbc87fdec59a3f3 Mon Sep 17 00:00:00 2001
From: Eduardo Olivares <eolivare@redhat.com>
Date: Mon, 30 Dec 2024 16:07:49 +0100
Subject: [PATCH] Add first health checks to faults podified ha tests

This patch adds mostly blackbox health checks using the Openstack
component APIs (neutron, nova) and creating a workload and checking
connectivity to it.
To implement in next patches:
- Health checks using Openshift client (pods status)
- Connect to EDPM nodes and verify they are healthy

Change-Id: I412cb9933c4cdd2662561fafcfc1c18fceccc0c3
---
 tobiko/openstack/nova/__init__.py             |   8 +
 tobiko/openstack/nova/_checks.py              | 147 +++++++++++
 tobiko/openstack/nova/_server.py              |  21 ++
 tobiko/podified/__init__.py                   |   3 +
 tobiko/shell/sh/_hostname.py                  |  21 +-
 tobiko/tests/faults/ha/cloud_disruptions.py   |  34 ++-
 tobiko/tests/faults/ha/test_cloud_recovery.py |  15 +-
 .../faults/podified/ha/test_cloud_recovery.py |  43 +++-
 tobiko/tripleo/nova.py                        | 230 ++----------------
 9 files changed, 277 insertions(+), 245 deletions(-)
 create mode 100644 tobiko/openstack/nova/_checks.py

diff --git a/tobiko/openstack/nova/__init__.py b/tobiko/openstack/nova/__init__.py
index 0ab197bd9..5e37df98e 100644
--- a/tobiko/openstack/nova/__init__.py
+++ b/tobiko/openstack/nova/__init__.py
@@ -13,6 +13,7 @@
 #    under the License.
 from __future__ import absolute_import
 
+from tobiko.openstack.nova import _checks
 from tobiko.openstack.nova import _client
 from tobiko.openstack.nova import _cloud_init
 from tobiko.openstack.nova import _hypervisor
@@ -85,5 +86,12 @@ find_server_ip_address = _server.find_server_ip_address
 HasServerMixin = _server.HasServerMixin
 get_server_id = _server.get_server_id
 list_server_ip_addresses = _server.list_server_ip_addresses
+action_on_all_instances = _server.action_on_all_instances
 
 wait_for_services_up = _service.wait_for_services_up
+
+check_nova_services_health = _checks.check_nova_services_health
+check_virsh_domains_running = _checks.check_virsh_domains_running
+wait_for_all_instances_status = _checks.wait_for_all_instances_status
+check_vms_ping = _checks.check_vms_ping
+check_vm_evacuations = _checks.check_vm_evacuations
diff --git a/tobiko/openstack/nova/_checks.py b/tobiko/openstack/nova/_checks.py
new file mode 100644
index 000000000..347e048ca
--- /dev/null
+++ b/tobiko/openstack/nova/_checks.py
@@ -0,0 +1,147 @@
+# Copyright (c) 2025 Red Hat, Inc.
+#
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+from oslo_log import log
+
+import tobiko
+from tobiko.openstack.nova import _client
+from tobiko.openstack.nova import _server
+from tobiko.openstack.nova import _service
+from tobiko.openstack import topology
+from tobiko.shell import ping
+from tobiko.shell import sh
+
+
+LOG = log.getLogger(__name__)
+
+
+def check_nova_services_health(timeout=600., interval=2.):
+    retry = tobiko.retry(timeout=timeout, interval=interval)
+    _service.wait_for_services_up(retry=retry)
+
+
+def check_virsh_domains_running():
+    """check all vms are running via virsh list command"""
+    for compute in topology.list_openstack_nodes(group='compute'):
+        hostname = sh.get_hostname(ssh_client=compute.ssh_client,
+                                   fqdn=True)
+        param = {'OS-EXT-SRV-ATTR:hypervisor_hostname': hostname}
+        vm_list_per_compute = _client.list_servers(**param)
+        for vm in vm_list_per_compute:
+            for attempt in tobiko.retry(timeout=120, interval=5):
+                if check_vm_running_via_virsh(compute, vm.id):
+                    LOG.info(f"{vm.id} is running ok on {hostname}")
+                    break
+                else:
+                    msg = f"{vm.id} is not in running state on {hostname}"
+                    if attempt.is_last:
+                        tobiko.fail("timeout!! " + msg)
+                    LOG.error(f"{vm.id} is not in running state on "
+                              f"{hostname} ... Retrying")
+
+
+def check_vms_ping(vm_list):
+    for vm in vm_list:
+        fip = _server.list_server_ip_addresses(vm,
+                                               address_type='floating').first
+        ping.ping_until_received(fip).assert_replied()
+
+
+def check_vm_evacuations(vms_old=None, compute_host=None, timeout=600,
+                         interval=2, check_no_evacuation=False):
+    """check evacuation of vms
+    input: old vm status and expected new compute"""
+
+    for attempt in tobiko.retry(timeout=timeout, interval=interval):
+        failures = []
+        param = ({} if compute_host is None
+                 else {'OS-EXT-SRV-ATTR:hypervisor_hostname': compute_host})
+        vms_new = _client.list_servers(**param)
+        for vm_old in vms_old or []:
+            old_bm_host = vm_old._info[  # pylint: disable=W0212
+                'OS-EXT-SRV-ATTR:hypervisor_hostname']
+            new_vm_host = vms_new.with_attributes(  # pylint: disable=W0212
+                id=vm_old.id).uniq._info[
+                    'OS-EXT-SRV-ATTR:hypervisor_hostname']
+
+            if check_no_evacuation:
+                cond = bool(old_bm_host != new_vm_host)
+            else:
+                cond = bool(old_bm_host == new_vm_host)
+
+            if cond:
+                failures.append(
+                    'Failed vm evacuations: {}\n\n'.format(vm_old))
+        if not failures:
+            LOG.debug(vms_old.to_string())
+            LOG.debug('All vms were evacuated!')
+            return
+
+        if attempt.is_last:
+            tobiko.fail(
+                'Timeout checking VM evacuations:\n{!s}', '\n'.join(failures))
+        else:
+            LOG.error('Failed nova evacuation:\n {}'.format(failures))
+            LOG.error('Retrying...')
+
+
+def check_vm_running_via_virsh(topology_compute, vm_id):
+    """check that a vm is in running state via virsh command,
+    return false if not"""
+    if vm_id in get_vm_uuid_list_running_via_virsh(topology_compute):
+        return True
+    else:
+        return False
+
+
+def get_vm_uuid_list_running_via_virsh(topology_compute):
+    from tobiko import podified
+    from tobiko.tripleo import containers
+    from tobiko.tripleo import overcloud
+
+    get_uuid_loop = ("for i in `virsh list --name --state-running`; do "
+                     "virsh domuuid $i; done")
+    containerized_libvirt_cmd = \
+        "{container_runtime} exec -u root {nova_libvirt} sh -c '{get_uuids}'"
+
+    if podified.has_podified_cp():
+        command = containerized_libvirt_cmd.format(
+            container_runtime=podified.CONTAINER_RUNTIME,
+            nova_libvirt=podified.NOVA_LIBVIRT_CONTAINER,
+            get_uuids=get_uuid_loop)
+    elif overcloud.has_overcloud():
+        command = containerized_libvirt_cmd.format(
+            container_runtime=containers.get_container_runtime_name(),
+            nova_libvirt=containers.get_libvirt_container_name(),
+            get_uuids=get_uuid_loop)
+    else:
+        command = get_uuid_loop
+
+    return sh.execute(command,
+                      ssh_client=topology_compute.ssh_client,
+                      sudo=True).stdout.split()
+
+
+def wait_for_all_instances_status(status, timeout=None):
+    """wait for all instances for a certain status or raise an exception"""
+    for instance in _client.list_servers():
+        _client.wait_for_server_status(server=instance.id, status=status,
+                                       timeout=timeout)
+        instance_info = 'instance {nova_instance} is {state} on {host}'.format(
+            nova_instance=instance.name,
+            state=status,
+            host=instance._info[  # pylint: disable=W0212
+                'OS-EXT-SRV-ATTR:hypervisor_hostname'])
+        LOG.info(instance_info)
diff --git a/tobiko/openstack/nova/_server.py b/tobiko/openstack/nova/_server.py
index 4aa4283b4..061e8eaec 100644
--- a/tobiko/openstack/nova/_server.py
+++ b/tobiko/openstack/nova/_server.py
@@ -136,3 +136,24 @@ def get_server_id(server: _client.ServerType) -> str:
         return server
     else:
         return server.id
+
+
+def action_on_all_instances(action):
+    """try to start/stop all instances"""
+    if action not in ('active', 'shutoff'):
+        tobiko.fail(f'Wrong action on VM instances: {action}')
+
+    client_action_method = (_client.activate_server if action == 'active'
+                            else _client.shutoff_server)
+    expected_vm_status = 'ACTIVE' if action == 'active' else 'SHUTOFF'
+
+    for instance in _client.list_servers():
+        activated_instance = client_action_method(instance)
+        instance_info = 'instance {nova_instance} is {state} on {host}'.format(
+            nova_instance=activated_instance.name,
+            state=activated_instance.status,
+            host=activated_instance._info[  # pylint: disable=W0212
+                'OS-EXT-SRV-ATTR:hypervisor_hostname'])
+        LOG.info(instance_info)
+        if activated_instance.status != expected_vm_status:
+            tobiko.fail(instance_info)
diff --git a/tobiko/podified/__init__.py b/tobiko/podified/__init__.py
index c2141545e..585d9303a 100644
--- a/tobiko/podified/__init__.py
+++ b/tobiko/podified/__init__.py
@@ -18,6 +18,9 @@ from tobiko.podified import _openshift
 from tobiko.podified import containers
 
 
+NOVA_LIBVIRT_CONTAINER = 'nova_compute'
+CONTAINER_RUNTIME = 'podman'
+
 EDPM_NODE = _topology.EDPM_NODE
 OCP_WORKER = _topology.OCP_WORKER
 EDPM_COMPUTE_GROUP = _openshift.EDPM_COMPUTE_GROUP
diff --git a/tobiko/shell/sh/_hostname.py b/tobiko/shell/sh/_hostname.py
index 9fb9b6c3f..200ca6834 100644
--- a/tobiko/shell/sh/_hostname.py
+++ b/tobiko/shell/sh/_hostname.py
@@ -31,10 +31,14 @@ class HostnameError(tobiko.TobikoException):
 
 HOSTNAMES_CACHE: typing.MutableMapping[typing.Optional[ssh.SSHClientFixture],
                                        str] = weakref.WeakKeyDictionary()
+HOSTNAMES_FQDN_CACHE: \
+    typing.MutableMapping[typing.Optional[ssh.SSHClientFixture],
+                          str] = weakref.WeakKeyDictionary()
 
 
 def get_hostname(ssh_client: ssh.SSHClientType = None,
                  cached=True,
+                 fqdn=False,
                  **execute_params) -> str:
     ssh_client = ssh.ssh_client_fixture(ssh_client)
     if ssh_client is None:
@@ -42,25 +46,36 @@ def get_hostname(ssh_client: ssh.SSHClientType = None,
 
     if cached:
         try:
-            hostname = HOSTNAMES_CACHE[ssh_client]
+            if not fqdn:
+                hostname = HOSTNAMES_CACHE[ssh_client]
+            else:
+                hostname = HOSTNAMES_FQDN_CACHE[ssh_client]
         except KeyError:
             pass
         else:
             return hostname
 
     hostname = ssh_hostname(ssh_client=ssh_client,
+                            fqdn=fqdn,
                             **execute_params)
     if cached:
-        HOSTNAMES_CACHE[ssh_client] = hostname
+        if not fqdn:
+            HOSTNAMES_CACHE[ssh_client] = hostname
+        else:
+            HOSTNAMES_FQDN_CACHE[ssh_client] = hostname
     return hostname
 
 
 def ssh_hostname(ssh_client: ssh.SSHClientFixture,
+                 fqdn=False,
                  **execute_params) \
         -> str:
     tobiko.check_valid_type(ssh_client, ssh.SSHClientFixture)
+    command = 'hostname'
+    if fqdn:
+        command += ' -f'
     try:
-        result = _execute.execute('hostname',
+        result = _execute.execute(command,
                                   ssh_client=ssh_client,
                                   **execute_params)
     except _exception.ShellCommandFailed as ex:
diff --git a/tobiko/tests/faults/ha/cloud_disruptions.py b/tobiko/tests/faults/ha/cloud_disruptions.py
index dd82d5b84..271b6262d 100644
--- a/tobiko/tests/faults/ha/cloud_disruptions.py
+++ b/tobiko/tests/faults/ha/cloud_disruptions.py
@@ -31,6 +31,7 @@ from tobiko import config
 from tobiko.openstack import glance
 from tobiko.openstack import keystone
 from tobiko.openstack import neutron
+from tobiko.openstack import nova
 from tobiko.openstack import stacks
 from tobiko.openstack import tests
 from tobiko.openstack import topology
@@ -38,7 +39,6 @@ from tobiko.tests.faults.ha import test_cloud_recovery
 from tobiko.shell import ping
 from tobiko.shell import sh
 from tobiko.tripleo import containers
-from tobiko.tripleo import nova
 from tobiko.tripleo import pacemaker
 from tobiko.tripleo import topology as tripleo_topology
 from tobiko import tripleo
@@ -760,6 +760,15 @@ def evac_failover_compute(compute_host, failover_type=sh.hard_reset_method):
         disrupt_node(compute_host, disrupt_method=failover_type)
 
 
+def get_random_compute_with_vms():
+    for compute in nova.list_hypervisors():
+        param = {'OS-EXT-SRV-ATTR:hypervisor_hostname':
+                 compute.hypervisor_hostname}
+        vm_list_per_compute = nova.list_servers(**param)
+        if len(vm_list_per_compute) > 0:
+            return compute.hypervisor_hostname
+
+
 def check_iha_evacuation(failover_type=None, vm_type=None):
     """check vms on compute host,disrupt compute host,
     check all vms evacuated and pingable"""
@@ -767,36 +776,37 @@ def check_iha_evacuation(failover_type=None, vm_type=None):
         LOG.info(f'Begin IHA tests iteration {iteration}')
         LOG.info('create 2 vms')
         tests.test_servers_creation(number_of_servers=2)
-        compute_host = nova.get_random_compute_with_vms_name()
-        vms_starting_state_df = nova.get_compute_vms_df(compute_host)
+        compute_host = get_random_compute_with_vms()
+        vms_starting_state = nova.list_servers(
+            **{'OS-EXT-SRV-ATTR:hypervisor_hostname': compute_host})
         if vm_type == 'shutoff':
-            nova.stop_all_instances()
+            nova.action_on_all_instances('shutoff')
         if vm_type == 'evac_image_vm':
             evac_vm_stack = tests.test_evacuable_server_creation()
-            evac_vm_id = nova.get_stack_server_id(evac_vm_stack)
-            org_nova_evac_df = nova.vm_df(evac_vm_id, nova.get_vms_table())
+            evac_vm_id = evac_vm_stack.server_details.id
+            old_nova_evac = nova.get_server(server_id=evac_vm_id)
         if not vm_type == 'shutoff':
-            nova.check_df_vms_ping(vms_starting_state_df)
+            nova.check_vms_ping(vms_starting_state)
         LOG.info(f'perform a failover on {compute_host}')
         evac_failover_compute(compute_host, failover_type=failover_type)
         test_cloud_recovery.overcloud_health_checks(passive_checks_only=True)
         if vm_type == 'evac_image_vm':
-            nova.check_vm_evacuations(vms_df_old=org_nova_evac_df,
+            nova.check_vm_evacuations(vms_old=old_nova_evac,
                                       compute_host=compute_host,
                                       timeout=600,
                                       check_no_evacuation=True)
             # delete evacuable tagged image because it prevents
             # non tagged evacuations if exists
             delete_evacuable_tagged_image()
-            new_nova_evac_df = nova.vm_df(evac_vm_id, nova.get_vms_table())
-            nova.check_vm_evacuations(org_nova_evac_df, new_nova_evac_df)
+            new_nova_evac = nova.get_server(server_id=evac_vm_id)
+            nova.check_vm_evacuations(old_nova_evac, new_nova_evac)
         else:
-            nova.check_vm_evacuations(vms_df_old=vms_starting_state_df,
+            nova.check_vm_evacuations(vms_old=vms_starting_state,
                                       compute_host=compute_host,
                                       timeout=600)
         LOG.info('check evac is Done')
         if not vm_type == 'shutoff':
-            nova.check_df_vms_ping(vms_starting_state_df)
+            nova.check_vms_ping(vms_starting_state)
 
 
 def check_iha_evacuation_evac_image_vm():
diff --git a/tobiko/tests/faults/ha/test_cloud_recovery.py b/tobiko/tests/faults/ha/test_cloud_recovery.py
index 768138ed8..d57117b8b 100644
--- a/tobiko/tests/faults/ha/test_cloud_recovery.py
+++ b/tobiko/tests/faults/ha/test_cloud_recovery.py
@@ -50,16 +50,15 @@ has_external_lb = CONF.tobiko.rhosp.has_external_load_balancer
 
 def overcloud_health_checks(passive_checks_only=False,
                             skip_mac_table_size_test=False):
-    # this method will be changed in future commit
     check_pacemaker_resources_health()
     check_overcloud_processes_health()
-    nova.check_nova_services_health()
+    nova_osp.check_nova_services_health()
     tests.test_alive_agents_are_consistent_along_time()
     if not passive_checks_only:
-        # create a uniq stack
+        # create a unique stack that will be cleaned up at the end of each test
         check_vm_create()
-        nova.start_all_instances()
-        nova.check_computes_vms_running_via_virsh()
+        nova_osp.action_on_all_instances('active')
+        nova_osp.check_virsh_domains_running()
     containers.list_node_containers.cache_clear()
     containers.assert_all_tripleo_containers_running()
     containers.assert_equal_containers_state()
@@ -231,10 +230,10 @@ class DisruptTripleoNodesTest(testtools.TestCase):
                 hard_reset=False,
                 sequentially=sequentially)
             # verify VM status is updated after reboot
-            nova.wait_for_all_instances_status('SHUTOFF')
+            nova_osp.wait_for_all_instances_status('SHUTOFF')
             # start all VM instance
             # otherwise sidecar containers will not run after computes reboot
-            nova.start_all_instances()
+            nova_osp.action_on_all_instances('active')
             OvercloudHealthCheck.run_after(passive_checks_only=True)
 
         _run_test()
@@ -247,7 +246,7 @@ class DisruptTripleoNodesTest(testtools.TestCase):
     #     nova.wait_for_all_instances_status('SHUTOFF')
     #     # start all VM instance
     #     # otherwise sidecar containers will not run after computes reboot
-    #     nova.start_all_instances()
+    #     nova_osp.action_on_all_instances('active')
     #     OvercloudHealthCheck.run_after(passive_checks_only=True)
 
     @testtools.skipIf(has_external_lb, SKIP_MESSAGE_EXTLB)
diff --git a/tobiko/tests/faults/podified/ha/test_cloud_recovery.py b/tobiko/tests/faults/podified/ha/test_cloud_recovery.py
index b81fb6b9b..d215127c6 100644
--- a/tobiko/tests/faults/podified/ha/test_cloud_recovery.py
+++ b/tobiko/tests/faults/podified/ha/test_cloud_recovery.py
@@ -14,11 +14,37 @@
 #    License for the specific language governing permissions and limitations
 from __future__ import absolute_import
 
+from oslo_log import log
 import testtools
 
-
+from tobiko.tests.faults.ha import test_cloud_recovery
 from tobiko.tests.faults.podified.ha import cloud_disruptions
+from tobiko.openstack import tests
 from tobiko import podified
+from tobiko.openstack import nova
+
+
+LOG = log.getLogger(__name__)
+
+
+def podified_health_checks():
+    nova.check_nova_services_health()
+    tests.test_alive_agents_are_consistent_along_time()
+    # create a unique stack that will be cleaned up at the end of each test
+    # TODO(eolivare) add tests.test_server_creation_no_fip() when BGP is
+    # configured with expose_tenant_networks
+    tests.test_server_creation()
+    nova.action_on_all_instances('active')
+    nova.check_virsh_domains_running()
+    test_cloud_recovery.octavia_health_checks()
+
+
+class PodifiedCloudHealthCheck(test_cloud_recovery.OvercloudHealthCheck):
+    def setup_fixture(self):
+        # run validations
+        LOG.info("Start executing Podified health checks.")
+        podified_health_checks()
+        LOG.info("Podified health checks successfully executed.")
 
 
 @podified.skip_if_not_podified
@@ -27,17 +53,20 @@ class DisruptPodifiedNodesTest(testtools.TestCase):
     disruptive_action: a function that runs some
     disruptive scenario on a node"""
 
+    def test_0vercloud_health_check(self):
+        PodifiedCloudHealthCheck.run_before()
+
     def test_kill_all_galera_services(self):
-        # HealthCheck.run_before()
+        PodifiedCloudHealthCheck.run_before()
         cloud_disruptions.kill_all_galera_services()
-        # HealthCheck.run_after()
+        PodifiedCloudHealthCheck.run_after()
 
     def test_remove_all_grastate_galera(self):
-        # HealthCheck.run_before()
+        PodifiedCloudHealthCheck.run_before()
         cloud_disruptions.remove_all_grastate_galera()
-        # HealthCheck.run_before()
+        PodifiedCloudHealthCheck.run_after()
 
     def test_remove_one_grastate_galera(self):
-        # HealthCheck.run_before()
+        PodifiedCloudHealthCheck.run_before()
         cloud_disruptions.remove_one_grastate_galera()
-        # HealthCheck.run_after()
+        PodifiedCloudHealthCheck.run_after()
diff --git a/tobiko/tripleo/nova.py b/tobiko/tripleo/nova.py
index e25d278c3..ee49ec2fb 100644
--- a/tobiko/tripleo/nova.py
+++ b/tobiko/tripleo/nova.py
@@ -1,13 +1,24 @@
+# Copyright (c) 2025 Red Hat, Inc.
+#
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
 from __future__ import absolute_import
 
-import time
 import typing  # noqa
 from functools import wraps
 
-
 import netaddr
-from oslo_log import log
-import pandas
 
 import tobiko
 from tobiko.tripleo import overcloud
@@ -15,217 +26,6 @@ from tobiko.shell import iperf3
 from tobiko.shell import ping
 from tobiko.shell import sh
 from tobiko.shell import ssh
-from tobiko.openstack import nova
-from tobiko.openstack import topology
-from tobiko.tripleo import containers
-
-
-LOG = log.getLogger(__name__)
-
-
-def check_nova_services_health(timeout=600., interval=2.):
-    retry = tobiko.retry(timeout=timeout, interval=interval)
-    nova.wait_for_services_up(retry=retry)
-
-
-def start_all_instances():
-    """try to start all stopped overcloud instances"""
-    for instance in nova.list_servers():
-        activated_instance = nova.activate_server(instance)
-        time.sleep(3)
-        instance_info = 'instance {nova_instance} is {state} on {host}'.format(
-            nova_instance=activated_instance.name,
-            state=activated_instance.status,
-            host=activated_instance._info[  # pylint: disable=W0212
-                'OS-EXT-SRV-ATTR:hypervisor_hostname'])
-        LOG.info(instance_info)
-        if activated_instance.status != 'ACTIVE':
-            tobiko.fail(instance_info)
-
-
-def stop_all_instances():
-    """try to start all stopped overcloud instances"""
-    for instance in nova.list_servers():
-        activated_instance = nova.shutoff_server(instance)
-        time.sleep(3)
-        instance_info = 'instance {nova_instance} is {state} on {host}'.format(
-            nova_instance=activated_instance.name,
-            state=activated_instance.status,
-            host=activated_instance._info[  # pylint: disable=W0212
-                'OS-EXT-SRV-ATTR:hypervisor_hostname'])
-        LOG.info(instance_info)
-        if activated_instance.status != 'SHUTOFF':
-            tobiko.fail(instance_info)
-
-
-def wait_for_all_instances_status(status, timeout=None):
-    """wait for all instances for a certain status or raise an exception"""
-    for instance in nova.list_servers():
-        nova.wait_for_server_status(server=instance.id, status=status,
-                                    timeout=timeout)
-        instance_info = 'instance {nova_instance} is {state} on {host}'.format(
-            nova_instance=instance.name,
-            state=status,
-            host=instance._info[  # pylint: disable=W0212
-                'OS-EXT-SRV-ATTR:hypervisor_hostname'])
-        LOG.info(instance_info)
-
-
-def get_vms_table():
-    """populate a dataframe with vm host,id,status"""
-    vms_data = [(vm._info[  # pylint: disable=W0212
-                     'OS-EXT-SRV-ATTR:hypervisor_hostname'], vm.id,
-                 vm.status) for vm in nova.list_servers()]
-    vms_df = pandas.DataFrame(vms_data, columns=['vm_host', 'vm_id',
-                                                 'vm_state'])
-    return vms_df
-
-
-def list_computes():
-    """list compute host names"""
-    return [compute.hypervisor_hostname for compute in nova.list_hypervisors()]
-
-
-def get_compute_vms_df(compute_host):
-    """input: compute hostname (can be short)
-    output: dataframe with vms of that host"""
-    return get_vms_table().query(f"vm_host=='{compute_host}'")
-
-
-def get_random_compute_with_vms_name():
-    """get a randomcompute holding vm/s"""
-    for compute in list_computes():
-        if not get_compute_vms_df(compute).empty:
-            return compute
-
-
-def vm_info(vm_id, vms_df):
-    """input: vm and a vms df
-    output: host string"""
-    return vms_df.query(f"vm_id == '{vm_id}'").to_string()
-
-
-def vm_df(vm_id, vms_df):
-    """input: vm and a vms df
-    output: host string"""
-    return vms_df.query(f"vm_id == '{vm_id}'")
-
-
-def vm_floating_ip(vm_id):
-    """input: vm_id
-    output it's floating ip"""
-
-    vm = nova.get_server(vm_id)
-    floating_ip = nova.list_server_ip_addresses(
-        vm, address_type='floating').first
-    return floating_ip
-
-
-def check_ping_vm_fip(fip):
-    ping.ping_until_received(fip).assert_replied()
-
-
-def check_df_vms_ping(df):
-    """input: dataframe with vms_ids
-    try to ping all vms in df"""
-
-    for vm_id in df.vm_id.to_list():
-        check_ping_vm_fip(vm_floating_ip(vm_id))
-
-
-def vm_location(vm_id, vms_df):
-    """input: vm and a vms df
-    output: host string"""
-    return vms_df.query(f"vm_id == '{vm_id}'")['vm_host'].to_string(
-            index=False)
-
-
-def check_vm_evacuations(vms_df_old=None, compute_host=None, timeout=600,
-                         interval=2, check_no_evacuation=False):
-    """check evacuation of vms
-    input: old and new vms_state_tables dfs"""
-    failures = []
-    start = time.time()
-
-    while time.time() - start < timeout:
-        failures = []
-        vms_df_new = get_compute_vms_df(compute_host)
-        for vm_id in vms_df_old.vm_id.to_list():
-            old_bm_host = vm_location(vm_id, vms_df_old)
-            new_vm_host = vm_location(vm_id, vms_df_new)
-
-            if check_no_evacuation:
-                cond = bool(old_bm_host != new_vm_host)
-            else:
-                cond = bool(old_bm_host == new_vm_host)
-
-            if cond:
-                failures.append(
-                    'failed vm evacuations: {}\n\n'.format(vm_info(vm_id,
-                                                           vms_df_old)))
-            if failures:
-                LOG.info('Failed nova evacuation:\n {}'.format(failures))
-                LOG.info('Not all nova vms evacuated ..')
-                LOG.info('Retrying , timeout at: {}'
-                         .format(timeout-(time.time() - start)))
-                time.sleep(interval)
-            else:
-                LOG.info(vms_df_old.to_string())
-                LOG.info('All vms were evacuated!')
-                return
-    # exhausted all retries
-    if failures:
-        tobiko.fail(
-            'failed vm evacuations:\n{!s}', '\n'.join(failures))
-
-
-def get_stack_server_id(stack):
-    return stack.server_details.id
-
-
-def get_fqdn_from_topology_node(topology_node):
-    return sh.execute("hostname -f", ssh_client=topology_node.ssh_client,
-                      expect_exit_status=None).stdout.strip()
-
-
-def check_vm_running_via_virsh(topology_compute, vm_id):
-    """check that a vm is in running state via virsh command,
-    return false if not"""
-    if vm_id in get_vm_uuid_list_running_via_virsh(topology_compute):
-        return True
-    else:
-        return False
-
-
-def get_vm_uuid_list_running_via_virsh(topology_compute):
-    if overcloud.has_overcloud():
-        container_runtime = containers.get_container_runtime_name()
-        nova_libvirt = containers.get_libvirt_container_name()
-        command = f"sudo {container_runtime} exec {nova_libvirt} " \
-                  f"sh -c 'for i in `virsh list --name --state-running` " \
-                  f";do virsh domuuid $i;done'"
-    else:
-        command = "for i in `sudo virsh list --name --state-running` " \
-                  ";do virsh domuuid $i;done'"
-    return sh.execute(command,
-                      ssh_client=topology_compute.ssh_client).stdout.split()
-
-
-def check_computes_vms_running_via_virsh():
-    """check all vms are running via virsh list command"""
-    for compute in topology.list_openstack_nodes(group='compute'):
-        hostname = get_fqdn_from_topology_node(compute)
-        retry = tobiko.retry(timeout=120, interval=5)
-        vms_df = get_compute_vms_df(hostname)
-        for vm_id in vms_df.vm_id.to_list():
-            for _ in retry:
-                if check_vm_running_via_virsh(compute, vm_id):
-                    LOG.info(f"{vm_id} is running ok on "
-                             f"{compute.hostname}")
-                    break
-                else:
-                    LOG.info(f"{vm_id} is not in running state on "
-                             f"{compute.hostname}")
 
 
 # Test is inteded for D/S env