Add faults ovn_bgp_agent and frr tests

This patch adds two faults tests. They restart the ovn_bgp_agent and
the frr services, respectively, on all the overcloud nodes where these
services run.
When BGP is configured with expose_tenant_networks enabled, faults tests
will create an extra VM without FIP to verify connectivity to its tenant
IP addresses.

Change-Id: I118427b41e1c97d075572a5ebee21025ac5cc967
This commit is contained in:
Eduardo Olivares 2023-03-31 17:20:39 +02:00
parent bc07bea7b4
commit a3116bf0e1
14 changed files with 130 additions and 25 deletions

View File

@ -35,6 +35,8 @@ OPENVSWITCH_AGENT = _agent.OPENVSWITCH_AGENT
OVN_CONTROLLER = _agent.OVN_CONTROLLER OVN_CONTROLLER = _agent.OVN_CONTROLLER
OVN_METADATA_AGENT = _agent.OVN_METADATA_AGENT OVN_METADATA_AGENT = _agent.OVN_METADATA_AGENT
NEUTRON_OVN_METADATA_AGENT = _agent.NEUTRON_OVN_METADATA_AGENT NEUTRON_OVN_METADATA_AGENT = _agent.NEUTRON_OVN_METADATA_AGENT
OVN_BGP_AGENT = _agent.OVN_BGP_AGENT
FRR = _agent.FRR
DEFAULT_SG_NAME = _security_group.DEFAULT_SG_NAME DEFAULT_SG_NAME = _security_group.DEFAULT_SG_NAME
STATEFUL_OVN_ACTION = _security_group.STATEFUL_OVN_ACTION STATEFUL_OVN_ACTION = _security_group.STATEFUL_OVN_ACTION
STATELESS_OVN_ACTION = _security_group.STATELESS_OVN_ACTION STATELESS_OVN_ACTION = _security_group.STATELESS_OVN_ACTION

View File

@ -38,6 +38,8 @@ OVN_CONTROLLER = 'ovn-controller'
# called 'neutron-ovn-metadata-agent' # called 'neutron-ovn-metadata-agent'
OVN_METADATA_AGENT = 'networking-ovn-metadata-agent' OVN_METADATA_AGENT = 'networking-ovn-metadata-agent'
NEUTRON_OVN_METADATA_AGENT = 'neutron-ovn-metadata-agent' NEUTRON_OVN_METADATA_AGENT = 'neutron-ovn-metadata-agent'
OVN_BGP_AGENT = 'ovn-bgp-agent'
FRR = 'frr'
class AgentNotFoundOnHost(tobiko.TobikoException): class AgentNotFoundOnHost(tobiko.TobikoException):

View File

@ -35,6 +35,7 @@ test_evacuable_server_creation = _nova.test_evacuable_server_creation
test_server_creation = _nova.test_server_creation test_server_creation = _nova.test_server_creation
test_servers_creation = _nova.test_servers_creation test_servers_creation = _nova.test_servers_creation
test_server_creation_and_shutoff = _nova.test_server_creation_and_shutoff test_server_creation_and_shutoff = _nova.test_server_creation_and_shutoff
test_server_creation_no_fip = _nova.test_server_creation_no_fip
TestServerCreationStack = _nova.TestServerCreationStack TestServerCreationStack = _nova.TestServerCreationStack
TestEvacuableServerCreationStack = _nova.TestEvacuableServerCreationStack TestEvacuableServerCreationStack = _nova.TestEvacuableServerCreationStack
test_ovsdb_transactions = _neutron.test_ovsdb_transactions test_ovsdb_transactions = _neutron.test_ovsdb_transactions

View File

@ -17,7 +17,6 @@ from __future__ import absolute_import
import os import os
import typing # noqa import typing # noqa
import time
import tobiko import tobiko
from tobiko.shell import ping from tobiko.shell import ping
@ -25,6 +24,7 @@ from tobiko.shell import sh
from tobiko.openstack import nova from tobiko.openstack import nova
from tobiko.openstack.stacks import _cirros from tobiko.openstack.stacks import _cirros
from tobiko.openstack.stacks import _nova from tobiko.openstack.stacks import _nova
from tobiko.openstack.stacks import _neutron
class TestServerCreationStack(_cirros.CirrosServerStackFixture): class TestServerCreationStack(_cirros.CirrosServerStackFixture):
@ -38,6 +38,27 @@ def test_server_creation(stack=TestServerCreationStack):
number_of_servers=0).first number_of_servers=0).first
class NetworkNoFipStackFixture(_neutron.NetworkStackFixture):
"""Neutron network where VMs will be created with no FIP"""
def setup_fixture(self):
super().setup_fixture()
# this stack will be deleted at the end of the test
tobiko.add_cleanup(NetworkNoFipStackFixture.cleanup_fixture, self)
class TestServerNoFipCreationStack(_cirros.CirrosServerStackFixture):
"""Nova instance without FIP intended to be used for testing server
creation"""
has_floating_ip = False
network_stack = tobiko.required_fixture(NetworkNoFipStackFixture)
def test_server_creation_no_fip():
"""Test Nova server without FIP creation
"""
return test_server_creation(stack=TestServerNoFipCreationStack)
class TestEvacuableServerCreationStack(_cirros.EvacuableServerStackFixture): class TestEvacuableServerCreationStack(_cirros.EvacuableServerStackFixture):
"""Nova instance intended to be used for testing server creation""" """Nova instance intended to be used for testing server creation"""
@ -86,14 +107,16 @@ def test_servers_creation(stack=TestServerCreationStack,
test_case.assertEqual(number_of_servers or 1, len(server_ids)) test_case.assertEqual(number_of_servers or 1, len(server_ids))
test_case.assertFalse(server_ids & initial_servers_ids) test_case.assertFalse(server_ids & initial_servers_ids)
# sleep for 20 sec , ensure no race condition with ssh
time.sleep(20)
# Test SSH connectivity to floating IP address
for fixture in fixtures: for fixture in fixtures:
# Test pinging to floating IP address (or fixed IP)
if fixture.floating_ip_address is not None:
pingable_ips = [fixture.floating_ip_address]
else:
pingable_ips = [fixed_ip['ip_address']
for fixed_ip in fixture.fixed_ips]
ping.assert_reachable_hosts(pingable_ips)
# Test SSH connectivity to floating IP address (or fixed IP)
test_case.assertTrue(sh.get_hostname(ssh_client=fixture.ssh_client)) test_case.assertTrue(sh.get_hostname(ssh_client=fixture.ssh_client))
# Test pinging to floating IP address
ping.assert_reachable_hosts(fixture.floating_ip_address
for fixture in fixtures)
return fixtures return fixtures

View File

@ -62,3 +62,4 @@ OpenStackTopologyNode = _topology.OpenStackTopologyNode
set_default_openstack_topology_class = ( set_default_openstack_topology_class = (
_topology.set_default_openstack_topology_class) _topology.set_default_openstack_topology_class)
verify_osp_version = _topology.verify_osp_version verify_osp_version = _topology.verify_osp_version
get_config_setting = _topology.get_config_setting

View File

@ -15,6 +15,7 @@ from __future__ import absolute_import
import collections import collections
from collections import abc from collections import abc
import configparser
import functools import functools
import re import re
import typing import typing
@ -302,13 +303,16 @@ class OpenStackTopology(tobiko.SharedFixture):
neutron.OVN_METADATA_AGENT: 'devstack@q-ovn-metadata-agent', neutron.OVN_METADATA_AGENT: 'devstack@q-ovn-metadata-agent',
neutron.NEUTRON_OVN_METADATA_AGENT: 'devstack@q-ovn-metadata-agent', neutron.NEUTRON_OVN_METADATA_AGENT: 'devstack@q-ovn-metadata-agent',
neutron.OVN_CONTROLLER: 'ovn-controller' neutron.OVN_CONTROLLER: 'ovn-controller'
# TODO(eolivare): ovn_bgp_agent on devstack?
# TODO(eolivare): frr on devstack?
} }
agent_to_container_name_mappings: typing.Dict[str, str] = {} agent_to_container_name_mappings: typing.Dict[str, str] = {}
has_containers = False has_containers = False
config_file_mappings = { config_file_mappings = {
'ml2_conf.ini': '/etc/neutron/plugins/ml2/ml2_conf.ini' 'ml2_conf.ini': '/etc/neutron/plugins/ml2/ml2_conf.ini',
'bgp-agent.conf': '/etc/ovn-bgp-agent/bgp-agent.conf'
} }
_connections = tobiko.required_fixture( _connections = tobiko.required_fixture(
@ -661,6 +665,25 @@ def get_config_file_path(file_name: str) -> str:
return topology.get_config_file_path(file_name) return topology.get_config_file_path(file_name)
def get_config_setting(file_name: str,
ssh_client: ssh.SSHClientFixture,
param: str,
section: str = None) -> typing.Optional[str]:
config_file_path = get_config_file_path(file_name)
config_file_content = sh.execute(f'cat {config_file_path}',
ssh_client=ssh_client, sudo=True).stdout
config = configparser.ConfigParser()
config.read_string(config_file_content)
if section is None:
value = config.defaults().get(param)
elif section not in config.sections():
value = None
else:
value = config[section].get(param)
return value
def get_rhosp_version(): def get_rhosp_version():
ssh_client = list_openstack_nodes(group='controller')[0].ssh_client ssh_client = list_openstack_nodes(group='controller')[0].ssh_client
rhosp_release = sh.execute('cat /etc/rhosp-release', rhosp_release = sh.execute('cat /etc/rhosp-release',

View File

@ -135,17 +135,15 @@ def list_systemd_units(*units: SystemdUnitType,
def stop_systemd_units(*units: SystemdUnitType, def stop_systemd_units(*units: SystemdUnitType,
ssh_client: ssh.SSHClientType = None, ssh_client: ssh.SSHClientType = None):
sudo: bool = None):
command = systemctl_command('stop', *units) command = systemctl_command('stop', *units)
_execute.execute(command, ssh_client=ssh_client, sudo=sudo) _execute.execute(command, ssh_client=ssh_client, sudo=True)
def start_systemd_units(*units: SystemdUnitType, def start_systemd_units(*units: SystemdUnitType,
ssh_client: ssh.SSHClientType = None, ssh_client: ssh.SSHClientType = None):
sudo: bool = None):
command = systemctl_command('start', *units) command = systemctl_command('start', *units)
_execute.execute(command, ssh_client=ssh_client, sudo=sudo) _execute.execute(command, ssh_client=ssh_client, sudo=True)
def wait_for_active_systemd_units(*units: SystemdUnitType, def wait_for_active_systemd_units(*units: SystemdUnitType,

View File

@ -470,6 +470,17 @@ def reset_ovndb_master_container():
container_host=node) container_host=node)
def restart_service_on_all_nodes(service):
"""restart the ovn bgp agent or the frr service from all the nodes where it
is running and check the cloud is healthy after they are started again"""
node_names = tripleo.get_overcloud_nodes_running_service(service)
nodes = topology.list_openstack_nodes(hostnames=node_names)
for node in nodes:
sh.stop_systemd_units(service, ssh_client=node.ssh_client)
for node in nodes:
sh.start_systemd_units(service, ssh_client=node.ssh_client)
def kill_rabbitmq_service(): def kill_rabbitmq_service():
"""kill a rabbit process on a random controller, """kill a rabbit process on a random controller,
check in pacemaker it is down""" check in pacemaker it is down"""

View File

@ -70,6 +70,15 @@ def overcloud_health_checks(passive_checks_only=False,
# check vm create with ssh and ping checks # check vm create with ssh and ping checks
def check_vm_create(): def check_vm_create():
tests.test_server_creation() tests.test_server_creation()
if overcloud.is_ovn_bgp_agent_running():
try:
node = topology.find_openstack_node(group='networker')
except topology.NoSuchOpenStackTopologyNodeGroup:
node = topology.find_openstack_node(group='controller')
expose_tenant_networks = topology.get_config_setting(
'bgp-agent.conf', node.ssh_client, 'expose_tenant_networks')
if expose_tenant_networks and expose_tenant_networks.lower() == 'true':
tests.test_server_creation_no_fip()
# check cluster failed statuses # check cluster failed statuses
@ -280,6 +289,20 @@ class DisruptTripleoNodesTest(testtools.TestCase):
cloud_disruptions.test_controllers_shutdown() cloud_disruptions.test_controllers_shutdown()
OvercloudHealthCheck.run_after() OvercloudHealthCheck.run_after()
@overcloud.skip_unless_ovn_bgp_agent
def test_restart_ovn_bgp_agents(self):
OvercloudHealthCheck.run_before()
cloud_disruptions.restart_service_on_all_nodes(
topology.get_agent_service_name(neutron.OVN_BGP_AGENT))
OvercloudHealthCheck.run_after()
@overcloud.skip_unless_ovn_bgp_agent
def test_restart_frr(self):
OvercloudHealthCheck.run_before()
cloud_disruptions.restart_service_on_all_nodes(
topology.get_agent_service_name(neutron.FRR))
OvercloudHealthCheck.run_after()
# [..] # [..]
# more tests to follow # more tests to follow
# run health checks # run health checks

View File

@ -201,9 +201,7 @@ class OctaviaServicesFaultTest(testtools.TestCase):
for service, ssh_clients in services_to_stop.items(): for service, ssh_clients in services_to_stop.items():
for ssh_client in ssh_clients: for ssh_client in ssh_clients:
sh.stop_systemd_units(service, sh.stop_systemd_units(service, ssh_client=ssh_client)
ssh_client=ssh_client,
sudo=True)
LOG.debug(f'We stopped {service} on {ssh_client.host}') LOG.debug(f'We stopped {service} on {ssh_client.host}')
self.loadbalancer_stack.wait_for_octavia_service() self.loadbalancer_stack.wait_for_octavia_service()
@ -238,9 +236,7 @@ class OctaviaServicesFaultTest(testtools.TestCase):
for service, ssh_clients in services_to_stop.items(): for service, ssh_clients in services_to_stop.items():
for ssh_client in ssh_clients: for ssh_client in ssh_clients:
sh.start_systemd_units(service, sh.start_systemd_units(service, ssh_client=ssh_client)
ssh_client=ssh_client,
sudo=True)
LOG.debug(f'We started {service} on {ssh_client.host}') LOG.debug(f'We started {service} on {ssh_client.host}')

View File

@ -19,6 +19,7 @@ from tobiko.tripleo import _rhosp
from tobiko.tripleo import _topology as topology from tobiko.tripleo import _topology as topology
from tobiko.tripleo import _undercloud as undercloud from tobiko.tripleo import _undercloud as undercloud
from tobiko.tripleo import containers from tobiko.tripleo import containers
from tobiko.tripleo import services
get_tripleo_ansible_inventory = _ansible.get_tripleo_ansible_inventory get_tripleo_ansible_inventory = _ansible.get_tripleo_ansible_inventory
@ -74,3 +75,6 @@ undercloud_keystone_credentials = undercloud.undercloud_keystone_credentials
undercloud_keystone_session = undercloud.undercloud_keystone_session undercloud_keystone_session = undercloud.undercloud_keystone_session
undercloud_ssh_client = undercloud.undercloud_ssh_client undercloud_ssh_client = undercloud.undercloud_ssh_client
undercloud_version = undercloud.undercloud_version undercloud_version = undercloud.undercloud_version
get_overcloud_nodes_running_service = \
services.get_overcloud_nodes_running_service

View File

@ -22,6 +22,7 @@ from oslo_log import log
import tobiko import tobiko
from tobiko import config from tobiko import config
from tobiko import tripleo
from tobiko.openstack import keystone from tobiko.openstack import keystone
from tobiko.openstack import ironic from tobiko.openstack import ironic
from tobiko.openstack import metalsmith from tobiko.openstack import metalsmith
@ -341,6 +342,16 @@ skip_unless_ovn_using_ha = tobiko.skip_unless(
'OVN does not use HA DB model', is_ovn_using_ha) 'OVN does not use HA DB model', is_ovn_using_ha)
def is_ovn_bgp_agent_running():
return (len(tripleo.get_overcloud_nodes_running_service(
topology.get_agent_service_name(neutron.OVN_BGP_AGENT))) > 0)
skip_unless_ovn_bgp_agent = tobiko.skip_unless(
'The OVN BGP Agent is not running on the System Under Test',
is_ovn_bgp_agent_running)
@functools.lru_cache() @functools.lru_cache()
def are_kexec_tools_installed(): def are_kexec_tools_installed():
for controller in topology.list_openstack_nodes(group='controller'): for controller in topology.list_openstack_nodes(group='controller'):

View File

@ -45,7 +45,9 @@ class TripleoTopology(topology.OpenStackTopology):
neutron.METADATA_AGENT: 'tripleo_neutron_metadata_agent', neutron.METADATA_AGENT: 'tripleo_neutron_metadata_agent',
neutron.OVN_METADATA_AGENT: 'tripleo_ovn_metadata_agent', neutron.OVN_METADATA_AGENT: 'tripleo_ovn_metadata_agent',
neutron.NEUTRON_OVN_METADATA_AGENT: 'tripleo_ovn_metadata_agent', neutron.NEUTRON_OVN_METADATA_AGENT: 'tripleo_ovn_metadata_agent',
neutron.OVN_CONTROLLER: 'tripleo_ovn_controller' neutron.OVN_CONTROLLER: 'tripleo_ovn_controller',
neutron.OVN_BGP_AGENT: 'tripleo_ovn_bgp_agent',
neutron.FRR: 'tripleo_frr'
} }
agent_to_container_name_mappings = { agent_to_container_name_mappings = {
@ -55,14 +57,18 @@ class TripleoTopology(topology.OpenStackTopology):
neutron.METADATA_AGENT: 'neutron_metadata_agent', neutron.METADATA_AGENT: 'neutron_metadata_agent',
neutron.OVN_METADATA_AGENT: 'ovn_metadata_agent', neutron.OVN_METADATA_AGENT: 'ovn_metadata_agent',
neutron.NEUTRON_OVN_METADATA_AGENT: 'ovn_metadata_agent', neutron.NEUTRON_OVN_METADATA_AGENT: 'ovn_metadata_agent',
neutron.OVN_CONTROLLER: 'ovn_controller' neutron.OVN_CONTROLLER: 'ovn_controller',
neutron.OVN_BGP_AGENT: 'ovn_bgp_agent',
neutron.FRR: 'frr'
} }
has_containers = True has_containers = True
config_file_mappings = { config_file_mappings = {
'ml2_conf.ini': '/var/lib/config-data/puppet-generated/neutron' 'ml2_conf.ini': '/var/lib/config-data/puppet-generated/neutron'
'/etc/neutron/plugins/ml2/ml2_conf.ini' '/etc/neutron/plugins/ml2/ml2_conf.ini',
'bgp-agent.conf': '/var/lib/config-data/ansible-generated/'
'ovn-bgp-agent/etc/ovn-bgp-agent/bgp-agent.conf'
} }
# TODO: add more known subgrups here # TODO: add more known subgrups here

View File

@ -67,9 +67,13 @@ def get_overcloud_nodes_running_service(service):
""" """
oc_procs_df = overcloud.get_overcloud_nodes_dataframe( oc_procs_df = overcloud.get_overcloud_nodes_dataframe(
get_overcloud_node_services_table) get_overcloud_node_services_table)
# remove the ".service" suffix
oc_procs_df = oc_procs_df.replace(to_replace={'UNIT': '.service'},
value='',
regex=True)
oc_nodes_running_service = oc_procs_df.query('UNIT=="{}"'.format(service))[ oc_nodes_running_service = oc_procs_df.query('UNIT=="{}"'.format(service))[
'overcloud_node'].unique() 'overcloud_node'].unique()
return oc_nodes_running_service return oc_nodes_running_service.tolist()
def check_if_process_running_on_overcloud(process): def check_if_process_running_on_overcloud(process):