Improve faults tests' neutron agent health checks
One of the verifications part of the cloud health checks executed before and after the disruptions from the faults tests was to check that all the neutron agents are alive. This patch adds an extra verification to this: the agents have to be consistently alive along time. Besides that, the number of reruns from the faults test test_controllers_shutdown is set to 0 because it makes analysing its failures more complicated. This test will still be flaky because we need to determine whether there are more issues with it. Change-Id: I354c66453493339622f99c0d18e1ff98f9f609e0
This commit is contained in:
parent
35083361eb
commit
92a7c7275a
@ -22,6 +22,8 @@ from tobiko.openstack.tests import _nova
|
|||||||
InvalidDBConnString = _neutron.InvalidDBConnString
|
InvalidDBConnString = _neutron.InvalidDBConnString
|
||||||
RAFTStatusError = _neutron.RAFTStatusError
|
RAFTStatusError = _neutron.RAFTStatusError
|
||||||
test_neutron_agents_are_alive = _neutron.test_neutron_agents_are_alive
|
test_neutron_agents_are_alive = _neutron.test_neutron_agents_are_alive
|
||||||
|
test_alive_agents_are_consistent_along_time = (
|
||||||
|
_neutron.test_alive_agents_are_consistent_along_time)
|
||||||
test_ovn_dbs_validations = _neutron.test_ovn_dbs_validations
|
test_ovn_dbs_validations = _neutron.test_ovn_dbs_validations
|
||||||
test_ovs_bridges_mac_table_size = _neutron.test_ovs_bridges_mac_table_size
|
test_ovs_bridges_mac_table_size = _neutron.test_ovs_bridges_mac_table_size
|
||||||
test_ovs_namespaces_are_absent = _neutron.test_ovs_namespaces_are_absent
|
test_ovs_namespaces_are_absent = _neutron.test_ovs_namespaces_are_absent
|
||||||
|
@ -94,6 +94,41 @@ def test_neutron_agents_are_alive(timeout=420., interval=5.) \
|
|||||||
return agents
|
return agents
|
||||||
|
|
||||||
|
|
||||||
|
def test_alive_agents_are_consistent_along_time(previous_alive_agents=None):
|
||||||
|
test_case = tobiko.get_test_case()
|
||||||
|
if previous_alive_agents is None:
|
||||||
|
# the following dict of agents is obtained when:
|
||||||
|
# - the list_agents request is replied with 200
|
||||||
|
# - the list is not empty
|
||||||
|
# - no agents are dead
|
||||||
|
alive_agents = {agent['id']: agent
|
||||||
|
for agent in test_neutron_agents_are_alive()}
|
||||||
|
else:
|
||||||
|
alive_agents = previous_alive_agents
|
||||||
|
|
||||||
|
for attempt in tobiko.retry(sleep_time=5., count=5):
|
||||||
|
agents = neutron.list_agents()
|
||||||
|
actual = {agent['id']: agent
|
||||||
|
for agent in agents}
|
||||||
|
|
||||||
|
# any dead agents? If yes, fail now
|
||||||
|
dead_agents = agents.with_items(alive=False)
|
||||||
|
test_case.assertEqual(
|
||||||
|
[], dead_agents, "Some neutron agents died")
|
||||||
|
|
||||||
|
if len(actual) > len(alive_agents):
|
||||||
|
LOG.debug('Some new agents appeared! It seems not all the agents '
|
||||||
|
'had been started yet, so let\'s restart this check')
|
||||||
|
return test_alive_agents_are_consistent_along_time(actual)
|
||||||
|
|
||||||
|
# any agent disappeared? If yes, fail now
|
||||||
|
test_case.assertEqual(
|
||||||
|
set(alive_agents), set(actual), 'Some agents disappeared')
|
||||||
|
|
||||||
|
if attempt.is_last:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
def ovn_dbs_vip_bindings(test_case):
|
def ovn_dbs_vip_bindings(test_case):
|
||||||
ovn_conn_str = get_ovn_db_connections()
|
ovn_conn_str = get_ovn_db_connections()
|
||||||
# ovn db sockets might be centrillized or distributed
|
# ovn db sockets might be centrillized or distributed
|
||||||
|
@ -45,7 +45,7 @@ def overcloud_health_checks(passive_checks_only=False,
|
|||||||
check_pacemaker_resources_health()
|
check_pacemaker_resources_health()
|
||||||
check_overcloud_processes_health()
|
check_overcloud_processes_health()
|
||||||
nova.check_nova_services_health()
|
nova.check_nova_services_health()
|
||||||
tests.test_neutron_agents_are_alive()
|
tests.test_alive_agents_are_consistent_along_time()
|
||||||
if not passive_checks_only:
|
if not passive_checks_only:
|
||||||
# create a uniq stack
|
# create a uniq stack
|
||||||
check_vm_create()
|
check_vm_create()
|
||||||
@ -265,7 +265,7 @@ class DisruptTripleoNodesTest(testtools.TestCase):
|
|||||||
cloud_disruptions.request_galera_sst()
|
cloud_disruptions.request_galera_sst()
|
||||||
OvercloudHealthCheck.run_after()
|
OvercloudHealthCheck.run_after()
|
||||||
|
|
||||||
@pytest.mark.flaky(reruns=3, reruns_delay=60)
|
@pytest.mark.flaky(reruns=0)
|
||||||
def test_controllers_shutdown(self):
|
def test_controllers_shutdown(self):
|
||||||
OvercloudHealthCheck.run_before()
|
OvercloudHealthCheck.run_before()
|
||||||
cloud_disruptions.test_controllers_shutdown()
|
cloud_disruptions.test_controllers_shutdown()
|
||||||
|
@ -17,8 +17,6 @@ from __future__ import absolute_import
|
|||||||
import pytest
|
import pytest
|
||||||
import testtools
|
import testtools
|
||||||
|
|
||||||
import tobiko
|
|
||||||
from tobiko.openstack import neutron
|
|
||||||
from tobiko.openstack import tests
|
from tobiko.openstack import tests
|
||||||
|
|
||||||
|
|
||||||
@ -29,16 +27,4 @@ class NeutronAgentTest(testtools.TestCase):
|
|||||||
tests.test_neutron_agents_are_alive()
|
tests.test_neutron_agents_are_alive()
|
||||||
|
|
||||||
def test_alive_agents_are_consistent_along_time(self):
|
def test_alive_agents_are_consistent_along_time(self):
|
||||||
alive_agents = {agent['id']: agent
|
tests.test_alive_agents_are_consistent_along_time()
|
||||||
for agent in tests.test_neutron_agents_are_alive()}
|
|
||||||
for attempt in tobiko.retry(sleep_time=5., count=5):
|
|
||||||
agents = neutron.list_agents()
|
|
||||||
actual = {agent['id']: agent
|
|
||||||
for agent in agents}
|
|
||||||
self.assertEqual(set(alive_agents), set(actual),
|
|
||||||
'Agents appeared or disappeared')
|
|
||||||
dead_agents = agents.with_items(alive=False)
|
|
||||||
self.assertEqual([], dead_agents,
|
|
||||||
"Neutron agent(s) no more alive")
|
|
||||||
if attempt.is_last:
|
|
||||||
break
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user