Add test case to reboot controller hosting Galera VIP

This test case will add automation coverage to the bug rhbz#2124877(OSP-20635). This case will first find the galera VIP, and then find the controller hosting the VIP, and then it reboots that controller while creating a stack of 10 VMs. Change-Id: I0b109918fd2061bf4342856bef929088b088c9d1
2023-01-04 16:29:59 +05:30 · 2023-01-04 16:29:59 +05:30 · 4e737b8a46
commit 4e737b8a46
parent 03bfa589bc
6 changed files with 188 additions and 5 deletions
--- a/tobiko/openstack/heat/_stack.py
+++ b/tobiko/openstack/heat/_stack.py
@ -126,6 +126,7 @@ class HeatStackFixture(tobiko.SharedFixture):
    parameters: typing.Optional['HeatStackParametersFixture'] = None
    project: typing.Optional[str] = None
    user: typing.Optional[str] = None
+    output_needs_stack_complete: bool = True

    def __init__(
            self,
@ -670,8 +671,8 @@ class HeatStackOutputsFixture(HeatStackNamespaceFixture):
        return frozenset(template.outputs or [])

    def get_values(self):
-        # Can't get output values before stack creation is complete
-        self.stack.wait_for_create_complete()
+        if self.stack.output_needs_stack_complete:
+            self.stack.wait_for_create_complete()
        outputs = self.stack.get_stack(resolve_outputs=True).outputs
        return {o['output_key']: o['output_value']
                for o in outputs}
@ -739,7 +740,13 @@ class HeatStackResourceFixture(HeatStackNamespaceFixture):
        return frozenset(template.resources or [])

    def get_values(self):
-        self.stack.wait_for_create_complete()
+        # Setting output_needs_stack_complete to False may be necessary
+        # in some case, such as the faults tests
+        # that covers RHBZ#2124877
+        # Some VMs may be in ERROR state for this testcase
+        # but that is ok, that is not the aim of this test
+        if self.stack.output_needs_stack_complete:
+            self.stack.wait_for_create_complete()
        client = self.stack.client
        resources = client.resources.list(self.stack.stack_id)
        return {r.resource_name: r for r in resources}
--- a/tobiko/openstack/stacks/init.py
+++ b/tobiko/openstack/stacks/init.py
@ -49,6 +49,8 @@ EvacuableCirrosImageFixture = _cirros.EvacuableCirrosImageFixture
 EvacuableServerStackFixture = _cirros.EvacuableServerStackFixture
 ExtraDhcpOptsCirrosServerStackFixture = (
    _cirros.ExtraDhcpOptsCirrosServerStackFixture)
+MultiIPCirrosServerStackFixture = (
+    _cirros.MultiIPCirrosServerStackFixture)

 DesignateZoneStackFixture = _designate.DesignateZoneStackFixture

--- a/tobiko/openstack/stacks/_cirros.py
+++ b/tobiko/openstack/stacks/_cirros.py
@ -21,11 +21,12 @@ from paramiko import sftp_file
 from tobiko import config
 from tobiko.openstack import glance
 from tobiko.openstack import neutron
+from tobiko.openstack import heat
 from tobiko.openstack.stacks import _nova
 from tobiko.shell import sh
 from tobiko.shell import ssh
 import tobiko.tripleo
-
+from tobiko.openstack.stacks import _hot

 CONF = config.CONF

@ -155,3 +156,13 @@ class EvacuableServerStackFixture(CirrosServerStackFixture):

 class ExtraDhcpOptsCirrosServerStackFixture(CirrosServerStackFixture):
    use_extra_dhcp_opts = True
+
+
+class MultiIPCirrosServerStackFixture(CirrosServerStackFixture):
+    template = _hot.heat_template_file('nova/multi_ip_test_stack.yaml')
+    expected_creted_status = {heat.CREATE_IN_PROGRESS, heat.CREATE_COMPLETE}
+    output_needs_stack_complete = False
+
+    def cleanup_stack(self):
+        self.delete_stack()
+        # this is skipped for this stack: self.wait_until_stack_deleted()
--- a/tobiko/openstack/stacks/nova/multi_ip_test_stack.yaml
+++ b/tobiko/openstack/stacks/nova/multi_ip_test_stack.yaml
@ -0,0 +1,47 @@
+heat_template_version: newton
+
+description: |
+  Create a group of servers connected to the same tenant network.
+
+parameters:
+  key_name:
+    type: string
+    description: Name of keypair to assign to server
+    constraints:
+    - custom_constraint: nova.keypair
+  flavor:
+    type: string
+    description: Flavor to use for server
+    constraints:
+    - custom_constraint: nova.flavor
+  image:
+    type: string
+    description: Name of image to use for server
+  network:
+    type: string
+    description: ID of network to which server get connected
+    constraints:
+    - custom_constraint: neutron.network
+
+resources:
+  group_of_vms:
+    type: OS::Heat::ResourceGroup
+    properties:
+      count: 10
+      resource_def:
+        type: OS::Nova::Server
+        properties:
+           name: group_of_vms_%index%
+           key_name: {get_param: key_name}
+           image: {get_param: image}
+           flavor: {get_param: flavor}
+           networks:
+             - network: {get_param: network}
+
+outputs:
+  list_of_server_ids:
+    description: list of server IDs from this group
+    value: {get_attr: [group_of_vms, refs]}
+  vms_detailed_info:
+    description: detailed info from the group of VMs
+    value: {get_attr: [group_of_vms, show]}
--- a/tobiko/tests/faults/ha/cloud_disruptions.py
+++ b/tobiko/tests/faults/ha/cloud_disruptions.py
@ -29,6 +29,7 @@ from oslo_log import log
 import tobiko
 from tobiko.openstack import glance
 from tobiko.openstack import keystone
+from tobiko.openstack import neutron
 from tobiko.openstack import stacks
 from tobiko.openstack import tests
 from tobiko.openstack import topology
@ -39,7 +40,7 @@ from tobiko.tripleo import containers
 from tobiko.tripleo import nova
 from tobiko.tripleo import pacemaker
 from tobiko.tripleo import topology as tripleo_topology
-
+from tobiko import tripleo

 LOG = log.getLogger(__name__)

@ -281,6 +282,95 @@ def disrupt_controller_main_vip(disrupt_method=sh.hard_reset_method,
            disrupt_node(main_vip_controller, disrupt_method=disrupt_method)


+def disrupt_controller_galera_main_vip(disrupt_method=sh.soft_reset_method):
+    # This case reboots controller while VM creation is in progress
+    # Please refer to RHBZ#2124877 for more info
+    # Find the Galera VIP (port name : internal_api_virtual_ip)
+    try:
+        session = tripleo.undercloud_keystone_session()
+        uc_neutron_client = neutron.get_neutron_client(session=session)
+        new_port = neutron.find_port(client=uc_neutron_client, unique=False,
+                                     name='internal_api_virtual_ip')
+        galera_vip_address = new_port['fixed_ips'][0]['ip_address']
+        LOG.info("The Galera VIP address is: %r", galera_vip_address)
+    except sh.ShellCommandFailed as no_internal_api:
+        raise tobiko.SkipException(
+            'This OSP environment doesnt have an internal_api \
+              network, so this test cannot be executed') from no_internal_api
+
+    # Find the controller hosting VIP resource
+    galera_vip_resource = "ip-"+galera_vip_address
+    galera_vip_controller = pacemaker.get_overcloud_nodes_running_pcs_resource(
+                               resource=galera_vip_resource)[0]
+
+    ports_before_stack_creation = neutron.list_ports(
+        device_owner="compute:nova")
+    multi_ip_test_fixture = tobiko.get_fixture(
+        stacks.MultiIPCirrosServerStackFixture)
+    tobiko.use_fixture(multi_ip_test_fixture)
+    time.sleep(10)  # wait until some of the VMs have been created
+
+    # Reboot that controller
+    reboot_node(galera_vip_controller, wait=True,
+                reboot_method=disrupt_method)
+
+    return multi_ip_test_fixture, ports_before_stack_creation
+
+
+def get_vms_detailed_info(multi_ip_test_fixture):
+    for attempt in tobiko.retry(timeout=240, interval=10):
+        # dynamically obtain the status of the VMs
+        vms_detailed_info = multi_ip_test_fixture.vms_detailed_info
+
+        vm_status_list = [
+            vm.get('status') for vm in vms_detailed_info if vm is not None]
+        if 'BUILD' not in vm_status_list:
+            LOG.debug("All VMs reached a final status")
+            break
+        if attempt.is_last:
+            LOG.warn("Still some VMs in status BUILD - the test continues...")
+            break
+
+    return vms_detailed_info
+
+
+def check_no_duplicate_ips(vms_detailed_info, ports_before_stack_creation):
+    test_case = tobiko.get_test_case()
+    ports_after_reboot = neutron.list_ports(device_owner="compute:nova")
+    # check VM IP addresses are different
+    ip4_list = []
+    ip6_list = []
+    for vm in vms_detailed_info:
+        addresses = vm.get('addresses', {}) if vm is not None else {}
+        for addresses_per_network in addresses.values():
+            test_case.assertEqual(len(addresses_per_network), 2)
+            for subnet_addr in addresses_per_network:
+                subnet_ip = subnet_addr['addr']
+                if netaddr.valid_ipv4(subnet_ip):
+                    ip4_list.append(subnet_ip)
+                elif netaddr.valid_ipv6(subnet_ip):
+                    ip6_list.append(subnet_ip)
+
+    ip4_set = set(ip4_list)  # this removes duplicate values
+    LOG.debug("list of IPv4s from the MultiIPVM group: %r", ip4_list)
+    test_case.assertEqual(len(ip4_list), len(ip4_set))
+
+    ip6_set = set(ip6_list)  # this removes duplicate values
+    LOG.debug("list of IPv6s from the MultiIPVM group: %r", ip6_list)
+    test_case.assertEqual(len(ip6_list), len(ip6_set))
+
+    LOG.debug("list of IPv4 and list of IPv6 addresses "
+              "should have the same length")
+    test_case.assertEqual(len(ip6_list), len(ip4_list))
+    test_case.assertEqual(len(ip6_list), len(ports_after_reboot) - len(
+        ports_before_stack_creation))
+
+
+def reboot_controller_galera_main_vip():
+    return disrupt_controller_galera_main_vip(
+        disrupt_method=sh.soft_reset_method)
+
+
 def reset_controller_main_vip():
    disrupt_controller_main_vip(disrupt_method=sh.hard_reset_method)

--- a/tobiko/tests/faults/ha/test_cloud_recovery.py
+++ b/tobiko/tests/faults/ha/test_cloud_recovery.py
@ -23,6 +23,7 @@ import testtools

 import tobiko
 from tobiko.openstack import neutron
+from tobiko.openstack import nova as nova_osp
 from tobiko.openstack import topology
 from tobiko.openstack import tests
 from tobiko.tests.faults.ha import cloud_disruptions
@ -126,6 +127,7 @@ class DisruptTripleoNodesTest(testtools.TestCase):
    """ HA Tests: run health check -> disruptive action -> health check
    disruptive_action: a function that runs some
    disruptive scenario on a overcloud"""
+    vms_detailed_info = None

    def test_0vercloud_health_check(self):
        OvercloudHealthCheck.run_before(skip_mac_table_size_test=False)
@ -158,6 +160,30 @@ class DisruptTripleoNodesTest(testtools.TestCase):
    #     nova.start_all_instances()
    #     OvercloudHealthCheck.run_after(passive_checks_only=True)

+    def tearDown(self):
+        super(DisruptTripleoNodesTest, self).tearDown()
+        for vm in self.vms_detailed_info or []:
+            if vm is None or vm.get('id') is None:
+                continue
+            vm_id = vm['id']
+            try:
+                nova_osp.delete_server(vm_id)
+            except nova_osp.ServerNotFoundError:
+                LOG.debug(f"Server {vm_id} not found")
+
+    def test_z99_reboot_controller_galera_main_vip(self):
+        # This test case may fail at times if RHBZ#2124877 is not resolved
+        # but that bug is due to a race condition,
+        # so it is not reproducible 100% times
+        OvercloudHealthCheck.run_before(passive_checks_only=True)
+        multi_ip_test_fixture, ports_before_stack_creation = \
+            cloud_disruptions.reboot_controller_galera_main_vip()
+        OvercloudHealthCheck.run_after(passive_checks_only=True)
+        self.vms_detailed_info = cloud_disruptions.get_vms_detailed_info(
+            multi_ip_test_fixture)
+        cloud_disruptions.check_no_duplicate_ips(
+            self.vms_detailed_info, ports_before_stack_creation)
+
    def test_z99_reboot_controller_main_vip(self):
        OvercloudHealthCheck.run_before()
        cloud_disruptions.reset_controller_main_vip()