Add four toolchain functional tests
Add four toolchain functional tests. Add toolchain helpers methods. Change-Id: I0ea3613d8e8e44a2dfa9d5a9bea26c0e9b793ee4
This commit is contained in:
parent
6b41a46fad
commit
e4b4ef93de
@ -15,6 +15,8 @@
|
|||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
|
from devops.error import DevopsCalledProcessError
|
||||||
|
from devops.helpers import helpers as devops_helpers
|
||||||
from proboscis import asserts
|
from proboscis import asserts
|
||||||
import requests
|
import requests
|
||||||
from requests.packages.urllib3 import poolmanager
|
from requests.packages.urllib3 import poolmanager
|
||||||
@ -83,3 +85,32 @@ def check_port(address, port):
|
|||||||
"""
|
"""
|
||||||
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
|
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
|
||||||
return sock.connect_ex((address, port)) == 0
|
return sock.connect_ex((address, port)) == 0
|
||||||
|
|
||||||
|
|
||||||
|
def check_local_mail(remote, node_name, service, state, timeout=10 * 60):
|
||||||
|
"""Check that email from LMA Infrastructure Alerting plugin about service
|
||||||
|
changing it's state is presented on a host.
|
||||||
|
|
||||||
|
:param remote: SSH connection to the node.
|
||||||
|
:type remote: SSHClient
|
||||||
|
:param node_name: name of the node to check for email on.
|
||||||
|
:type node_name: str
|
||||||
|
:param message: message to look for.
|
||||||
|
:type message: str
|
||||||
|
:param timeout: timeout to wait for email to arrive.
|
||||||
|
:rtype timeout: int
|
||||||
|
"""
|
||||||
|
def check_mail():
|
||||||
|
try:
|
||||||
|
responce = remote.check_call("cat $MAIL")
|
||||||
|
if not responce:
|
||||||
|
return False
|
||||||
|
if ("Service: {}\n".format(service) in responce['stdout'] and
|
||||||
|
"State: {}\n".format(state) in responce['stdout']):
|
||||||
|
return True
|
||||||
|
except DevopsCalledProcessError:
|
||||||
|
return False
|
||||||
|
msg = ("Email about service {0} in {1} state was not "
|
||||||
|
"found on {2} after {3} seconds").format(
|
||||||
|
service, state, node_name, timeout)
|
||||||
|
devops_helpers.wait(check_mail, timeout=timeout, timeout_msg=msg)
|
||||||
|
@ -576,7 +576,7 @@ class PluginHelper(object):
|
|||||||
for service in ha_services:
|
for service in ha_services:
|
||||||
remote_ops.manage_pacemaker_service(remote, service)
|
remote_ops.manage_pacemaker_service(remote, service)
|
||||||
for service in non_ha_services:
|
for service in non_ha_services:
|
||||||
remote_ops.manage_initctl_service(remote, service)
|
remote_ops.manage_service(remote, service)
|
||||||
|
|
||||||
logger.info("Restarting services on computes")
|
logger.info("Restarting services on computes")
|
||||||
compute_services = (
|
compute_services = (
|
||||||
@ -586,7 +586,7 @@ class PluginHelper(object):
|
|||||||
for compute in computes:
|
for compute in computes:
|
||||||
with self.fuel_web.get_ssh_for_nailgun_node(compute) as remote:
|
with self.fuel_web.get_ssh_for_nailgun_node(compute) as remote:
|
||||||
for service in compute_services:
|
for service in compute_services:
|
||||||
remote_ops.manage_initctl_service(remote, service)
|
remote_ops.manage_service(remote, service)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def check_notifications(got_list, expected_list):
|
def check_notifications(got_list, expected_list):
|
||||||
|
@ -129,7 +129,7 @@ def manage_pacemaker_service(remote, name, operation="restart"):
|
|||||||
operation=operation, service=name))
|
operation=operation, service=name))
|
||||||
|
|
||||||
|
|
||||||
def manage_initctl_service(remote, name, operation="restart"):
|
def manage_service(remote, name, operation="restart"):
|
||||||
"""Operate service on remote node.
|
"""Operate service on remote node.
|
||||||
|
|
||||||
:param remote: SSH connection to the node.
|
:param remote: SSH connection to the node.
|
||||||
@ -139,8 +139,24 @@ def manage_initctl_service(remote, name, operation="restart"):
|
|||||||
:param operation: type of operation, usually start, stop or restart.
|
:param operation: type of operation, usually start, stop or restart.
|
||||||
:type operation: str
|
:type operation: str
|
||||||
"""
|
"""
|
||||||
remote.check_call("initctl {operation} {service}".format(
|
|
||||||
operation=operation, service=name))
|
if remote.execute("service {} status".format(name))['exit_code'] == 0:
|
||||||
|
service_cmd = 'service {service} {operation}'
|
||||||
|
elif remote.execute("initctl status {}".format(name))['exit_code'] == 0:
|
||||||
|
service_cmd = 'initctl {operation} {service}'
|
||||||
|
else:
|
||||||
|
raise Exception('no service handler!')
|
||||||
|
|
||||||
|
remote.check_call(service_cmd.format(service=name, operation=operation))
|
||||||
|
|
||||||
|
|
||||||
|
def clear_local_mail(remote):
|
||||||
|
"""Clean local mail
|
||||||
|
|
||||||
|
:param remote: SSH connection to the node.
|
||||||
|
:type remote: SSHClient
|
||||||
|
"""
|
||||||
|
remote.check_call("rm -f $MAIL")
|
||||||
|
|
||||||
|
|
||||||
def fill_up_filesystem(remote, fs, percent, file_name):
|
def fill_up_filesystem(remote, fs, percent, file_name):
|
||||||
|
@ -22,6 +22,10 @@ from stacklight_tests.influxdb_grafana.grafana_ui import api as ui_api
|
|||||||
from stacklight_tests.influxdb_grafana import plugin_settings
|
from stacklight_tests.influxdb_grafana import plugin_settings
|
||||||
|
|
||||||
|
|
||||||
|
class NotFound(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class InfluxdbPluginApi(base_test.PluginApi):
|
class InfluxdbPluginApi(base_test.PluginApi):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(InfluxdbPluginApi, self).__init__()
|
super(InfluxdbPluginApi, self).__init__()
|
||||||
@ -179,3 +183,35 @@ class InfluxdbPluginApi(base_test.PluginApi):
|
|||||||
if result:
|
if result:
|
||||||
return result["series"][0]["values"]
|
return result["series"][0]["values"]
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def check_cluster_status(self, name, expected_status, interval='3m'):
|
||||||
|
output = ("SELECT last(value) FROM cluster_status WHERE "
|
||||||
|
"time > now() - {0} AND cluster_name='{1}'".format(interval,
|
||||||
|
name))
|
||||||
|
msg_header = "Wrong '{0}' service state has been found!".format(
|
||||||
|
name)
|
||||||
|
self._check_influx_query_last_value(output, expected_status,
|
||||||
|
msg_header)
|
||||||
|
|
||||||
|
def check_count_of_haproxy_backends(self, service, node_state='down',
|
||||||
|
expected_count=0, interval='3m'):
|
||||||
|
|
||||||
|
query = ("SELECT last(value) FROM haproxy_backend_servers WHERE "
|
||||||
|
"backend='{0}' AND state='{1}' and "
|
||||||
|
"time > now() - {2}".format(service, node_state, interval))
|
||||||
|
|
||||||
|
msg_header = ("Wrong amout of nodes with service '{0}' "
|
||||||
|
"in '{1}' state!".format(service, node_state))
|
||||||
|
self._check_influx_query_last_value(query, expected_count, msg_header)
|
||||||
|
|
||||||
|
def _check_influx_query_last_value(self, query, expected_value,
|
||||||
|
msg_header):
|
||||||
|
output = self.do_influxdb_query(query)
|
||||||
|
lines = output.json()
|
||||||
|
if not lines['results'][0]:
|
||||||
|
logger.error("The query ['result'] is empty!")
|
||||||
|
raise NotFound
|
||||||
|
state = lines['results'][0]['series'][0]['values'][0][1]
|
||||||
|
asserts.assert_equal(expected_value, state,
|
||||||
|
msg_header + " Expected {0} but"
|
||||||
|
" found {1}".format(expected_value, state))
|
||||||
|
@ -11,10 +11,13 @@
|
|||||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
# License for the specific language governing permissions and limitations
|
# License for the specific language governing permissions and limitations
|
||||||
# under the License.
|
# under the License.
|
||||||
|
import six.moves as sm
|
||||||
|
|
||||||
|
from devops.helpers import helpers
|
||||||
from fuelweb_test import logger
|
from fuelweb_test import logger
|
||||||
from proboscis import asserts
|
from proboscis import asserts
|
||||||
|
|
||||||
|
from selenium.common.exceptions import StaleElementReferenceException
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
@ -90,9 +93,7 @@ class InfraAlertingPluginApi(base_test.PluginApi):
|
|||||||
return "{0}://{1}:{2}".format(self.nagios_protocol,
|
return "{0}://{1}:{2}".format(self.nagios_protocol,
|
||||||
self.get_nagios_vip(), self.nagios_port)
|
self.get_nagios_vip(), self.nagios_port)
|
||||||
|
|
||||||
def open_nagios_page(self, link_text, anchor):
|
def open_nagios_page(self, driver, link_text, anchor):
|
||||||
driver = self.ui_tester.get_driver(self.get_authenticated_nagios_url(),
|
|
||||||
"//frame[2]", "Nagios Core")
|
|
||||||
driver.switch_to.default_content()
|
driver.switch_to.default_content()
|
||||||
driver.switch_to.frame(driver.find_element_by_name("side"))
|
driver.switch_to.frame(driver.find_element_by_name("side"))
|
||||||
link = driver.find_element_by_link_text(link_text)
|
link = driver.find_element_by_link_text(link_text)
|
||||||
@ -104,19 +105,19 @@ class InfraAlertingPluginApi(base_test.PluginApi):
|
|||||||
return driver
|
return driver
|
||||||
|
|
||||||
def check_node_in_nagios(self, changed_node, state):
|
def check_node_in_nagios(self, changed_node, state):
|
||||||
driver = self.open_nagios_page(
|
with self.ui_tester.ui_driver(
|
||||||
'Hosts', "//table[@class='headertable']")
|
self.get_authenticated_nagios_url(),
|
||||||
try:
|
"//frame[2]", "Nagios Core") as driver:
|
||||||
|
driver = self.open_nagios_page(
|
||||||
|
driver, 'Hosts', "//table[@class='headertable']")
|
||||||
asserts.assert_equal(state, self.node_is_present(
|
asserts.assert_equal(state, self.node_is_present(
|
||||||
driver, changed_node), "Failed to find node '{0}' on nagios!"
|
driver, changed_node), "Failed to find node '{0}' "
|
||||||
.format(changed_node))
|
"on nagios!".format(changed_node))
|
||||||
finally:
|
|
||||||
driver.close()
|
|
||||||
|
|
||||||
def node_is_present(self, driver, name):
|
def node_is_present(self, driver, name):
|
||||||
table = self.ui_tester.get_table(driver,
|
table = self.ui_tester.get_table(driver,
|
||||||
"/html/body/div[2]/table/tbody")
|
"/html/body/div[2]/table/tbody")
|
||||||
for ind in xrange(2, self.ui_tester.get_table_size(table) + 1):
|
for ind in sm.xrange(2, self.ui_tester.get_table_size(table) + 1):
|
||||||
node_name = self.ui_tester.get_table_cell(
|
node_name = self.ui_tester.get_table_cell(
|
||||||
table, ind, 1).text.rstrip()
|
table, ind, 1).text.rstrip()
|
||||||
if name == node_name:
|
if name == node_name:
|
||||||
@ -131,3 +132,71 @@ class InfraAlertingPluginApi(base_test.PluginApi):
|
|||||||
def check_uninstall_failure(self):
|
def check_uninstall_failure(self):
|
||||||
return self.helpers.check_plugin_cannot_be_uninstalled(
|
return self.helpers.check_plugin_cannot_be_uninstalled(
|
||||||
self.settings.name, self.settings.version)
|
self.settings.name, self.settings.version)
|
||||||
|
|
||||||
|
def get_services_for_node(self, table, node_name, driver,
|
||||||
|
table_xpath="/html/body/table[3]/tbody"):
|
||||||
|
services = {}
|
||||||
|
found_node = False
|
||||||
|
ind = 2
|
||||||
|
while ind < self.ui_tester.get_table_size(table) + 1:
|
||||||
|
try:
|
||||||
|
if not self.ui_tester.get_table_row(table, ind).text:
|
||||||
|
if found_node:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
if self.ui_tester.get_table_cell(
|
||||||
|
table, ind, 1).text == node_name:
|
||||||
|
found_node = True
|
||||||
|
if found_node:
|
||||||
|
services[self.ui_tester.get_table_cell(
|
||||||
|
table, ind, 2).text] = (
|
||||||
|
self.ui_tester.get_table_cell(table, ind, 3).text)
|
||||||
|
except StaleElementReferenceException:
|
||||||
|
table = self.ui_tester.get_table(driver, table_xpath)
|
||||||
|
ind -= 1
|
||||||
|
ind += 1
|
||||||
|
|
||||||
|
return services
|
||||||
|
|
||||||
|
def check_service_state_on_nagios(self, driver, service_state=None,
|
||||||
|
node_names=None):
|
||||||
|
self.open_nagios_page(
|
||||||
|
driver, 'Services', "//table[@class='headertable']")
|
||||||
|
table = self.ui_tester.get_table(driver, "/html/body/table[3]/tbody")
|
||||||
|
if not node_names:
|
||||||
|
node_names = [self.ui_tester.get_table_cell(table, 2, 1).text]
|
||||||
|
for node in node_names:
|
||||||
|
node_services = self.get_services_for_node(table, node, driver)
|
||||||
|
if service_state:
|
||||||
|
for service in service_state:
|
||||||
|
if service_state[service] != node_services[service]:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
for service in node_services:
|
||||||
|
if 'OK' != node_services[service]:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def wait_service_state_on_nagios(self, driver, service_state=None,
|
||||||
|
node_names=None):
|
||||||
|
msg = ("Fail to get expected service states for services: {0} "
|
||||||
|
"on nodes: {1}")
|
||||||
|
|
||||||
|
if not service_state or not node_names:
|
||||||
|
self.open_nagios_page(
|
||||||
|
driver, 'Services', "//table[@class='headertable']")
|
||||||
|
table = self.ui_tester.get_table(driver,
|
||||||
|
"/html/body/table[3]/tbody")
|
||||||
|
if not node_names:
|
||||||
|
node_names = [self.ui_tester.get_table_cell(table, 2, 1).text]
|
||||||
|
if not service_state:
|
||||||
|
service_state = dict((key, 'OK') for key in
|
||||||
|
self.get_services_for_node(
|
||||||
|
table, node_names[0], driver))
|
||||||
|
|
||||||
|
msg = msg.format([key for key in service_state], node_names)
|
||||||
|
|
||||||
|
helpers.wait(lambda: self.check_service_state_on_nagios(
|
||||||
|
driver, service_state, node_names), timeout=60 * 5,
|
||||||
|
timeout_msg=msg)
|
||||||
|
@ -45,8 +45,8 @@ class ToolchainApi(object):
|
|||||||
self.ELASTICSEARCH_KIBANA = elasticsearch_api.ElasticsearchPluginApi()
|
self.ELASTICSEARCH_KIBANA = elasticsearch_api.ElasticsearchPluginApi()
|
||||||
self.INFLUXDB_GRAFANA = influx_api.InfluxdbPluginApi()
|
self.INFLUXDB_GRAFANA = influx_api.InfluxdbPluginApi()
|
||||||
self.LMA_COLLECTOR = collector_api.LMACollectorPluginApi()
|
self.LMA_COLLECTOR = collector_api.LMACollectorPluginApi()
|
||||||
self.LMA_INFRASTRUCTURE_ALERTING = \
|
self.LMA_INFRASTRUCTURE_ALERTING = (
|
||||||
infrastructure_alerting_api.InfraAlertingPluginApi()
|
infrastructure_alerting_api.InfraAlertingPluginApi())
|
||||||
self._plugins = {
|
self._plugins = {
|
||||||
self.ELASTICSEARCH_KIBANA,
|
self.ELASTICSEARCH_KIBANA,
|
||||||
self.INFLUXDB_GRAFANA,
|
self.INFLUXDB_GRAFANA,
|
||||||
@ -373,3 +373,132 @@ class ToolchainApi(object):
|
|||||||
msg = "Failed to set vm_memory_high_watermark to {}".format(limit)
|
msg = "Failed to set vm_memory_high_watermark to {}".format(limit)
|
||||||
devops_helpers.wait(check_result, timeout=timeout,
|
devops_helpers.wait(check_result, timeout=timeout,
|
||||||
interval=10, timeout_msg=msg)
|
interval=10, timeout_msg=msg)
|
||||||
|
|
||||||
|
def change_verify_service_state(self, service_name, action, new_state,
|
||||||
|
service_state_in_influx,
|
||||||
|
down_backends_in_haproxy, toolchain_node,
|
||||||
|
controller_nodes, nagios_driver):
|
||||||
|
"""Verify that the alerts for services show up in the Grafana
|
||||||
|
and Nagios UI.
|
||||||
|
|
||||||
|
:param service_name: name of the service to change state of.
|
||||||
|
Format [service name, service name
|
||||||
|
on dashboard] e.g. ['nova-api', 'nova']
|
||||||
|
:type service_name: list.
|
||||||
|
:param action: action to perform (e.g. stop, start).
|
||||||
|
:type action: str
|
||||||
|
:param new_state: new state of the service.
|
||||||
|
:type new_state: str
|
||||||
|
:param service_state_in_influx: new state of the service in influx.
|
||||||
|
:type new_state: int
|
||||||
|
:param down_backends_in_haproxy: amout of backends in 'down' state.
|
||||||
|
:type down_backends_in_haproxy: int
|
||||||
|
:param toolchain_node: toolchain node with
|
||||||
|
infrastructure_alerting_ui vip.
|
||||||
|
:type toolchain_node: dict
|
||||||
|
:param controller_nodes: list of the controller nodes to change
|
||||||
|
service state on.
|
||||||
|
:type controller_nodes: list
|
||||||
|
:param nagios_driver: selenium web driver
|
||||||
|
service state on.
|
||||||
|
:type nagios_driver: WebDriver
|
||||||
|
"""
|
||||||
|
|
||||||
|
logger.info("Changing state of service {0}. "
|
||||||
|
"New state is {1}".format(service_name[0], new_state))
|
||||||
|
with self.fuel_web.get_ssh_for_nailgun_node(toolchain_node) as remote:
|
||||||
|
self.remote_ops.clear_local_mail(remote)
|
||||||
|
for node in controller_nodes:
|
||||||
|
with self.helpers.fuel_web.get_ssh_for_nailgun_node(
|
||||||
|
node) as remote:
|
||||||
|
self.remote_ops.manage_service(remote, service_name[0], action)
|
||||||
|
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
||||||
|
nagios_driver, {service_name[1]: new_state})
|
||||||
|
self.INFLUXDB_GRAFANA.check_cluster_status(
|
||||||
|
service_name[1], service_state_in_influx)
|
||||||
|
self.INFLUXDB_GRAFANA.check_count_of_haproxy_backends(
|
||||||
|
service_name[0], expected_count=down_backends_in_haproxy)
|
||||||
|
with self.helpers.fuel_web.get_ssh_for_nailgun_node(
|
||||||
|
toolchain_node) as remote:
|
||||||
|
self.checkers.check_local_mail(
|
||||||
|
remote, toolchain_node["name"], service_name[1], new_state)
|
||||||
|
|
||||||
|
def change_verify_node_service_state(self, services, state, influx_state,
|
||||||
|
percent, toolchain_node,
|
||||||
|
controller_nodes, nagios_driver):
|
||||||
|
"""Verify that the alerts for nodes show up in the Grafana
|
||||||
|
and Nagios UI.
|
||||||
|
|
||||||
|
:param services: list of services to check new status of. Format
|
||||||
|
['mysql', 'mysql-nodes.mysql-fs']
|
||||||
|
:type services: list
|
||||||
|
:param state: new state of the service.
|
||||||
|
:type state: str
|
||||||
|
:param influx_state: new influx state.
|
||||||
|
:type influx_state: int
|
||||||
|
:param percent: amount of space to be filled on a node.
|
||||||
|
:type percent: int
|
||||||
|
:param toolchain_node: toolchain node with
|
||||||
|
infrastructure_alerting_ui vip.
|
||||||
|
:type toolchain_node: dict
|
||||||
|
:param controller_nodes: list of the controller nodes to change
|
||||||
|
service state on.
|
||||||
|
:type controller_nodes: list
|
||||||
|
:param nagios_driver: selenium web driver
|
||||||
|
service state on.
|
||||||
|
:type nagios_driver: WebDriver
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
with self.fuel_web.get_ssh_for_nailgun_node(toolchain_node) as remote:
|
||||||
|
self.remote_ops.clear_local_mail(remote)
|
||||||
|
|
||||||
|
with self.fuel_web.get_ssh_for_nailgun_node(
|
||||||
|
controller_nodes[0]) as remote:
|
||||||
|
self.remote_ops.fill_up_filesystem(
|
||||||
|
remote, "/dev/mapper/mysql-root", percent,
|
||||||
|
"/var/lib/mysql/test/bigfile")
|
||||||
|
|
||||||
|
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
||||||
|
nagios_driver, {services[0]: 'OK'})
|
||||||
|
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
||||||
|
nagios_driver, {services[1]: state},
|
||||||
|
[controller_nodes[0]['hostname']])
|
||||||
|
self.INFLUXDB_GRAFANA.check_cluster_status(services[0],
|
||||||
|
self.settings.OKAY)
|
||||||
|
|
||||||
|
with self.fuel_web.get_ssh_for_nailgun_node(
|
||||||
|
controller_nodes[1]) as remote:
|
||||||
|
self.remote_ops.fill_up_filesystem(
|
||||||
|
remote, "/dev/mapper/mysql-root", percent,
|
||||||
|
"/var/lib/mysql/test/bigfile")
|
||||||
|
|
||||||
|
for node in controller_nodes:
|
||||||
|
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
||||||
|
nagios_driver, {services[0]: state})
|
||||||
|
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
||||||
|
nagios_driver, {services[1]: state}, [node['hostname']])
|
||||||
|
self.INFLUXDB_GRAFANA.check_cluster_status(services[0], influx_state)
|
||||||
|
|
||||||
|
with self.helpers.fuel_web.get_ssh_for_nailgun_node(
|
||||||
|
toolchain_node) as remote:
|
||||||
|
self.checkers.check_local_mail(
|
||||||
|
remote, toolchain_node["name"], services[0], state)
|
||||||
|
|
||||||
|
for node in controller_nodes:
|
||||||
|
with self.fuel_web.get_ssh_for_nailgun_node(node) as remote:
|
||||||
|
self.remote_ops.clean_filesystem(remote,
|
||||||
|
"/var/lib/mysql/test/bigfile")
|
||||||
|
|
||||||
|
for node in controller_nodes:
|
||||||
|
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
||||||
|
nagios_driver, {services[0]: 'OK'})
|
||||||
|
self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
|
||||||
|
nagios_driver, {services[1]: 'OK'}, [node['hostname']])
|
||||||
|
self.INFLUXDB_GRAFANA.check_cluster_status(services[0],
|
||||||
|
self.settings.OKAY)
|
||||||
|
|
||||||
|
with self.helpers.fuel_web.get_ssh_for_nailgun_node(
|
||||||
|
toolchain_node) as remote:
|
||||||
|
self.checkers.check_local_mail(
|
||||||
|
remote, toolchain_node["name"], services[0], 'OK')
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
from fuelweb_test.helpers.decorators import log_snapshot_after_test
|
from fuelweb_test.helpers.decorators import log_snapshot_after_test
|
||||||
|
from fuelweb_test import logger
|
||||||
from proboscis import test
|
from proboscis import test
|
||||||
|
|
||||||
from stacklight_tests.toolchain import api
|
from stacklight_tests.toolchain import api
|
||||||
@ -234,3 +235,336 @@ class TestFunctionalToolchain(api.ToolchainApi):
|
|||||||
self.check_plugins_online()
|
self.check_plugins_online()
|
||||||
|
|
||||||
self.check_cinder_notifications()
|
self.check_cinder_notifications()
|
||||||
|
|
||||||
|
@test(depends_on_groups=["deploy_ha_toolchain"],
|
||||||
|
groups=["toolchain_warning_alert_service", "service_restart",
|
||||||
|
"toolchain", "functional"])
|
||||||
|
@log_snapshot_after_test
|
||||||
|
def toolchain_warning_alert_service(self):
|
||||||
|
"""Verify that the warning alerts for services show up in the
|
||||||
|
Grafana and Nagios UI.
|
||||||
|
|
||||||
|
Scenario:
|
||||||
|
1. Connect to one of the controller nodes using ssh and
|
||||||
|
stop the nova-api service.
|
||||||
|
2. Wait for at least 1 minute.
|
||||||
|
3. On Grafana, check the following items:
|
||||||
|
- the box in the upper left corner of the dashboard
|
||||||
|
displays 'WARN' with an orange background,
|
||||||
|
- the API panels report 1 entity as down.
|
||||||
|
4. On Nagios, check the following items:
|
||||||
|
- the 'nova' service is in 'WARNING' state,
|
||||||
|
- the local user root on the lma node has received
|
||||||
|
an email about the service
|
||||||
|
being in warning state.
|
||||||
|
5. Restart the nova-api service.
|
||||||
|
6. Wait for at least 1 minute.
|
||||||
|
7. On Grafana, check the following items:
|
||||||
|
- the box in the upper left corner of the dashboard
|
||||||
|
displays 'OKAY' with an green background,
|
||||||
|
- the API panels report 0 entity as down.
|
||||||
|
8. On Nagios, check the following items:
|
||||||
|
- the 'nova' service is in 'OK' state,
|
||||||
|
- the local user root on the lma node has received
|
||||||
|
an email about the recovery
|
||||||
|
of the service.
|
||||||
|
9. Repeat steps 2 to 8 for the following services:
|
||||||
|
- Nova (stopping and starting the nova-api and
|
||||||
|
nova-scheduler)
|
||||||
|
- Cinder (stopping and starting the cinder-api and
|
||||||
|
cinder-scheduler services respectively).
|
||||||
|
- Neutron (stopping and starting the neutron-server
|
||||||
|
and neutron-openvswitch-agent services respectively).
|
||||||
|
- Glance (stopping and starting the glance-api service).
|
||||||
|
- Heat (stopping and starting the heat-api service).
|
||||||
|
- Keystone (stopping and starting the Apache service).
|
||||||
|
|
||||||
|
Duration 45m
|
||||||
|
"""
|
||||||
|
self.env.revert_snapshot("deploy_ha_toolchain")
|
||||||
|
|
||||||
|
services = {
|
||||||
|
'nova': ['nova-api', 'nova-scheduler'],
|
||||||
|
'cinder': ['cinder-api', 'cinder-scheduler'],
|
||||||
|
'neutron': ['neutron-server', 'neutron-openvswitch-agent'],
|
||||||
|
'glance': ['glance-api'],
|
||||||
|
'heat': ['heat-api'],
|
||||||
|
'keystone': ['apache2']
|
||||||
|
}
|
||||||
|
|
||||||
|
lma_devops_node = self.helpers.get_node_with_vip(
|
||||||
|
self.settings.stacklight_roles,
|
||||||
|
self.helpers.full_vip_name(
|
||||||
|
self.LMA_INFRASTRUCTURE_ALERTING.settings.failover_vip))
|
||||||
|
toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
|
||||||
|
lma_devops_node)
|
||||||
|
|
||||||
|
url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
|
||||||
|
with self.ui_tester.ui_driver(url, "//frame[2]",
|
||||||
|
"Nagios Core") as driver:
|
||||||
|
self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
|
||||||
|
driver, 'Services', "//table[@class='headertable']")
|
||||||
|
controller_node = (
|
||||||
|
self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
||||||
|
self.helpers.cluster_id, ['controller'])[0])
|
||||||
|
for key in services:
|
||||||
|
for service in services[key]:
|
||||||
|
self.change_verify_service_state(
|
||||||
|
service_name=[service, key], action='stop',
|
||||||
|
new_state='WARNING',
|
||||||
|
service_state_in_influx=self.settings.WARN,
|
||||||
|
down_backends_in_haproxy=1,
|
||||||
|
toolchain_node=toolchain_node,
|
||||||
|
controller_nodes=[controller_node],
|
||||||
|
nagios_driver=driver)
|
||||||
|
self.change_verify_service_state(
|
||||||
|
service_name=[service, key], action='start',
|
||||||
|
new_state='OK',
|
||||||
|
service_state_in_influx=self.settings.OKAY,
|
||||||
|
down_backends_in_haproxy=0,
|
||||||
|
toolchain_node=toolchain_node,
|
||||||
|
controller_nodes=[controller_node],
|
||||||
|
nagios_driver=driver)
|
||||||
|
|
||||||
|
@test(depends_on_groups=["deploy_ha_toolchain"],
|
||||||
|
groups=["toolchain_critical_alert_service", "service_restart",
|
||||||
|
"toolchain", "functional"])
|
||||||
|
# @log_snapshot_after_test
|
||||||
|
def toolchain_critical_alert_service(self):
|
||||||
|
"""Verify that the critical alerts for services show up in
|
||||||
|
the Grafana and Nagios UI.
|
||||||
|
|
||||||
|
Scenario:
|
||||||
|
1. Open the Nagios URL
|
||||||
|
2. Connect to one of the controller nodes using ssh and
|
||||||
|
stop the nova-api service.
|
||||||
|
3. Connect to a second controller node using ssh and stop
|
||||||
|
the nova-api service.
|
||||||
|
4. Wait for at least 1 minute.
|
||||||
|
5. On Nagios, check the following items:
|
||||||
|
- the 'nova' service is in 'WARNING' state,
|
||||||
|
- the local user root on the lma node has received
|
||||||
|
an email about the service
|
||||||
|
being in warning state.
|
||||||
|
6. Restart the nova-api service on both nodes.
|
||||||
|
7. Wait for at least 1 minute.
|
||||||
|
8. On Nagios, check the following items:
|
||||||
|
- the 'nova' service is in 'OK' state,
|
||||||
|
- the local user root on the lma node has received
|
||||||
|
an email about the recovery
|
||||||
|
of the service.
|
||||||
|
9. Repeat steps 2 to 8 for the following services:
|
||||||
|
- Nova (stopping and starting the nova-api and
|
||||||
|
nova-scheduler)
|
||||||
|
- Cinder (stopping and starting the cinder-api and
|
||||||
|
cinder-scheduler services respectively).
|
||||||
|
- Neutron (stopping and starting the neutron-server
|
||||||
|
and neutron-openvswitch-agent services respectively).
|
||||||
|
- Glance (stopping and starting the glance-api service).
|
||||||
|
- Heat (stopping and starting the heat-api service).
|
||||||
|
- Keystone (stopping and starting the Apache service).
|
||||||
|
|
||||||
|
Duration 45m
|
||||||
|
"""
|
||||||
|
self.env.revert_snapshot("deploy_ha_toolchain")
|
||||||
|
|
||||||
|
services = {
|
||||||
|
'nova': ['nova-api', 'nova-scheduler'],
|
||||||
|
'cinder': ['cinder-api', 'cinder-scheduler'],
|
||||||
|
'neutron': ['neutron-server', 'neutron-openvswitch-agent'],
|
||||||
|
'glance': ['glance-api'],
|
||||||
|
'heat': ['heat-api'],
|
||||||
|
'keystone': ['apache2']
|
||||||
|
}
|
||||||
|
|
||||||
|
lma_devops_node = self.helpers.get_node_with_vip(
|
||||||
|
self.settings.stacklight_roles,
|
||||||
|
self.helpers.full_vip_name(
|
||||||
|
self.LMA_INFRASTRUCTURE_ALERTING.settings.failover_vip))
|
||||||
|
toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
|
||||||
|
lma_devops_node)
|
||||||
|
|
||||||
|
url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
|
||||||
|
with self.ui_tester.ui_driver(url, "//frame[2]",
|
||||||
|
"Nagios Core") as driver:
|
||||||
|
self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
|
||||||
|
driver, 'Services', "//table[@class='headertable']")
|
||||||
|
controller_nodes = (
|
||||||
|
self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
||||||
|
self.helpers.cluster_id, ['controller']))
|
||||||
|
for key in services:
|
||||||
|
for service in services[key]:
|
||||||
|
logger.info("Checking service {0}".format(service))
|
||||||
|
self.change_verify_service_state(
|
||||||
|
service_name=[service, key], action='stop',
|
||||||
|
new_state='CRITICAL',
|
||||||
|
service_state_in_influx=self.settings.CRIT,
|
||||||
|
down_backends_in_haproxy=2,
|
||||||
|
toolchain_node=toolchain_node,
|
||||||
|
controller_nodes=[controller_nodes[0],
|
||||||
|
controller_nodes[1]],
|
||||||
|
nagios_driver=driver)
|
||||||
|
self.change_verify_service_state(
|
||||||
|
service_name=[service, key], action='start',
|
||||||
|
new_state='OK',
|
||||||
|
service_state_in_influx=self.settings.OKAY,
|
||||||
|
down_backends_in_haproxy=0,
|
||||||
|
toolchain_node=toolchain_node,
|
||||||
|
controller_nodes=[controller_nodes[0],
|
||||||
|
controller_nodes[1]],
|
||||||
|
nagios_driver=driver)
|
||||||
|
|
||||||
|
@test(depends_on_groups=["deploy_ha_toolchain"],
|
||||||
|
groups=["toolchain_warning_alert_node", "node_alert_warning",
|
||||||
|
"toolchain", "functional"])
|
||||||
|
@log_snapshot_after_test
|
||||||
|
def toolchain_warning_alert_node(self):
|
||||||
|
"""Verify that the warning alerts for nodes show up in the
|
||||||
|
Grafana and Nagios UI.
|
||||||
|
|
||||||
|
Scenario:
|
||||||
|
1. Open the Nagios URL
|
||||||
|
2. Open the Grafana URl
|
||||||
|
3. Connect to one of the controller nodes using ssh and
|
||||||
|
run:
|
||||||
|
fallocate -l $(df | grep /dev/mapper/mysql-root
|
||||||
|
| awk '{ printf("%.0f\n", 1024 * ((($3 + $4) * 96
|
||||||
|
/ 100) - $3))}') /var/lib/mysql/test
|
||||||
|
4. Wait for at least 1 minute.
|
||||||
|
5. On Grafana, check the following items:
|
||||||
|
- the box in the upper left corner of the dashboard
|
||||||
|
displays 'OKAY' with an green background,
|
||||||
|
6. On Nagios, check the following items:
|
||||||
|
- the 'mysql' service is in 'OK' state,
|
||||||
|
- the 'mysql-nodes.mysql-fs' service is in 'WARNING'
|
||||||
|
state for the node.
|
||||||
|
7. Connect to a second controller node using ssh and run:
|
||||||
|
fallocate -l $(df | grep /dev/mapper/mysql-root
|
||||||
|
| awk '{ printf("%.0f\n", 1024 * ((($3 + $4) * 96
|
||||||
|
/ 100) - $3))}') /var/lib/mysql/test
|
||||||
|
8. Wait for at least 1 minute.
|
||||||
|
9. On Grafana, check the following items:
|
||||||
|
- the box in the upper left corner of the dashboard
|
||||||
|
displays 'WARN' with an orange background,
|
||||||
|
- an annotation telling that the service went from 'OKAY'
|
||||||
|
to 'WARN' is displayed.
|
||||||
|
10. On Nagios, check the following items:
|
||||||
|
- the 'mysql' service is in 'WARNING' state,
|
||||||
|
- the 'mysql-nodes.mysql-fs' service is in 'WARNING'
|
||||||
|
state for the 2 nodes,
|
||||||
|
- the local user root on the lma node has received an
|
||||||
|
email about the service
|
||||||
|
being in warning state.
|
||||||
|
11. Run the following command on both controller nodes:
|
||||||
|
rm /var/lib/mysql/test
|
||||||
|
12. Wait for at least 1 minutes.
|
||||||
|
13. On Grafana, check the following items:
|
||||||
|
- the box in the upper left corner of the dashboard
|
||||||
|
displays 'OKAY' with an green background,
|
||||||
|
- an annotation telling that the service went from 'WARN'
|
||||||
|
to 'OKAY' is displayed.
|
||||||
|
14. On Nagios, check the following items:
|
||||||
|
- the 'mysql' service is in 'OK' state,
|
||||||
|
- the 'mysql-nodes.mysql-fs' service is in 'OKAY' state
|
||||||
|
for the 2 nodes,
|
||||||
|
- the local user root on the lma node has received an
|
||||||
|
email about the recovery of the service.
|
||||||
|
|
||||||
|
Duration 15m
|
||||||
|
"""
|
||||||
|
self.env.revert_snapshot("deploy_ha_toolchain")
|
||||||
|
|
||||||
|
lma_devops_node = self.helpers.get_node_with_vip(
|
||||||
|
self.settings.stacklight_roles,
|
||||||
|
self.helpers.full_vip_name("infrastructure_alerting_mgmt_vip"))
|
||||||
|
toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
|
||||||
|
lma_devops_node)
|
||||||
|
nailgun_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
||||||
|
self.helpers.cluster_id, ['controller'])
|
||||||
|
|
||||||
|
url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
|
||||||
|
with self.ui_tester.ui_driver(url, "//frame[2]",
|
||||||
|
"Nagios Core") as driver:
|
||||||
|
self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
|
||||||
|
driver, 'Services', "//table[@class='headertable']")
|
||||||
|
self.change_verify_node_service_state(
|
||||||
|
['mysql', 'mysql-nodes.mysql-fs'], 'WARNING',
|
||||||
|
self.settings.WARN, '96', toolchain_node,
|
||||||
|
[nailgun_nodes[0], nailgun_nodes[1]], driver)
|
||||||
|
|
||||||
|
@test(depends_on_groups=["deploy_ha_toolchain"],
|
||||||
|
groups=["toolchain_critical_alert_node", "node_alert_critical",
|
||||||
|
"toolchain", "functional"])
|
||||||
|
@log_snapshot_after_test
|
||||||
|
def toolchain_critical_alert_node(self):
|
||||||
|
"""Verify that the critical alerts for nodes show up in the
|
||||||
|
Grafana and Nagios UI.
|
||||||
|
|
||||||
|
Scenario:
|
||||||
|
1. Open the Nagios URL
|
||||||
|
2. Open the Grafana URl
|
||||||
|
3. Connect to one of the controller nodes using ssh and run:
|
||||||
|
fallocate -l $(df | grep /dev/mapper/mysql-root
|
||||||
|
| awk '{ printf("%.0f\n", 1024 * ((($3 + $4) *
|
||||||
|
98 / 100) - $3))}') /var/lib/mysql/test
|
||||||
|
4. Wait for at least 1 minute.
|
||||||
|
5. On Grafana, check the following items:
|
||||||
|
- the box in the upper left corner of the dashboard
|
||||||
|
displays 'OKAY' with an green background,
|
||||||
|
6. On Nagios, check the following items:
|
||||||
|
- the 'mysql' service is in 'OK' state,
|
||||||
|
- the 'mysql-nodes.mysql-fs' service is in 'CRITICAL'
|
||||||
|
state for the node.
|
||||||
|
7. Connect to a second controller node using ssh and run:
|
||||||
|
fallocate -l $(df | grep /dev/mapper/mysql-root
|
||||||
|
| awk '{ printf("%.0f\n", 1024 * ((($3 + $4) *
|
||||||
|
98 / 100) - $3))}') /var/lib/mysql/test
|
||||||
|
8. Wait for at least 1 minute.
|
||||||
|
9. On Grafana, check the following items:
|
||||||
|
- the box in the upper left corner of the dashboard
|
||||||
|
displays 'CRIT' with an orange background,
|
||||||
|
- an annotation telling that the service went from 'OKAY'
|
||||||
|
to 'WARN' is displayed.
|
||||||
|
10. On Nagios, check the following items:
|
||||||
|
- the 'mysql' service is in 'CRITICAL' state,
|
||||||
|
- the 'mysql-nodes.mysql-fs' service is in 'CRITICAL'
|
||||||
|
state for the 2 nodes,
|
||||||
|
- the local user root on the lma node has received an
|
||||||
|
email about the service
|
||||||
|
being in warning state.
|
||||||
|
11. Run the following command on both controller nodes:
|
||||||
|
rm /var/lib/mysql/test
|
||||||
|
12. Wait for at least 1 minutes.
|
||||||
|
13. On Grafana, check the following items:
|
||||||
|
- the box in the upper left corner of the dashboard
|
||||||
|
displays 'OKAY' with an green background,
|
||||||
|
- an annotation telling that the service went from 'CRIT'
|
||||||
|
to 'OKAY' is displayed.
|
||||||
|
14. On Nagios, check the following items:
|
||||||
|
- the 'mysql' service is in OK' state,
|
||||||
|
- the 'mysql-nodes.mysql-fs' service is in 'OKAY' state
|
||||||
|
for the 2 nodes,
|
||||||
|
- the local user root on the lma node has received an
|
||||||
|
email about the recovery of the service.
|
||||||
|
|
||||||
|
Duration 15m
|
||||||
|
"""
|
||||||
|
self.env.revert_snapshot("deploy_ha_toolchain")
|
||||||
|
|
||||||
|
lma_devops_node = self.helpers.get_node_with_vip(
|
||||||
|
self.settings.stacklight_roles,
|
||||||
|
self.helpers.full_vip_name("infrastructure_alerting_mgmt_vip"))
|
||||||
|
toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
|
||||||
|
lma_devops_node)
|
||||||
|
nailgun_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
|
||||||
|
self.helpers.cluster_id, ['controller'])
|
||||||
|
|
||||||
|
url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
|
||||||
|
with self.ui_tester.ui_driver(url, "//frame[2]",
|
||||||
|
"Nagios Core") as driver:
|
||||||
|
self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
|
||||||
|
driver, 'Services', "//table[@class='headertable']")
|
||||||
|
self.change_verify_node_service_state(
|
||||||
|
['mysql', 'mysql-nodes.mysql-fs'], 'CRITICAL',
|
||||||
|
self.settings.UNKW, '98', toolchain_node,
|
||||||
|
[nailgun_nodes[0], nailgun_nodes[1]], driver)
|
||||||
|
@ -27,6 +27,12 @@ stacklight_roles = (elasticsearch_settings.role_name +
|
|||||||
collector_settings.role_name +
|
collector_settings.role_name +
|
||||||
infrastructure_alerting_settings.role_name)
|
infrastructure_alerting_settings.role_name)
|
||||||
|
|
||||||
|
OKAY = 0
|
||||||
|
WARN = 1
|
||||||
|
UNKW = 2
|
||||||
|
CRIT = 3
|
||||||
|
DOWN = 4
|
||||||
|
|
||||||
base_nodes = {
|
base_nodes = {
|
||||||
'slave-01': ['controller'],
|
'slave-01': ['controller'],
|
||||||
'slave-02': ['compute', 'cinder'],
|
'slave-02': ['compute', 'cinder'],
|
||||||
|
Loading…
x
Reference in New Issue
Block a user