Add four toolchain functional tests

Add four toolchain functional tests. Add toolchain helpers methods. Change-Id: I0ea3613d8e8e44a2dfa9d5a9bea26c0e9b793ee4
2016-06-16 13:08:39 +03:00 · 2016-06-16 13:08:39 +03:00 · e4b4ef93de
commit e4b4ef93de
parent 6b41a46fad
8 changed files with 639 additions and 18 deletions
--- a/stacklight_tests/helpers/checkers.py
+++ b/stacklight_tests/helpers/checkers.py
@ -15,6 +15,8 @@
 from contextlib import closing
 import socket
 from devops.error import DevopsCalledProcessError
 from devops.helpers import helpers as devops_helpers
 from proboscis import asserts
 import requests
 from requests.packages.urllib3 import poolmanager
@ -83,3 +85,32 @@ def check_port(address, port):
    """
    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
        return sock.connect_ex((address, port)) == 0
 def check_local_mail(remote, node_name, service, state, timeout=10 * 60):
    """Check that email from LMA Infrastructure Alerting plugin about service
    changing it's state is presented on a host.
    :param remote: SSH connection to the node.
    :type remote: SSHClient
    :param node_name: name of the node to check for email on.
    :type node_name: str
    :param message: message to look for.
    :type message: str
    :param timeout: timeout to wait for email to arrive.
    :rtype timeout: int
    """
    def check_mail():
        try:
            responce = remote.check_call("cat $MAIL")
            if not responce:
                return False
            if ("Service: {}\n".format(service) in responce['stdout'] and
                    "State: {}\n".format(state) in responce['stdout']):
                return True
        except DevopsCalledProcessError:
            return False
    msg = ("Email about service {0} in {1} state was not "
           "found on {2} after {3} seconds").format(
        service, state, node_name, timeout)
    devops_helpers.wait(check_mail, timeout=timeout, timeout_msg=msg)
--- a/stacklight_tests/helpers/helpers.py
+++ b/stacklight_tests/helpers/helpers.py
@ -576,7 +576,7 @@ class PluginHelper(object):
                for service in ha_services:
                    remote_ops.manage_pacemaker_service(remote, service)
                for service in non_ha_services:
-                    remote_ops.manage_initctl_service(remote, service)
+                    remote_ops.manage_service(remote, service)
        logger.info("Restarting services on computes")
        compute_services = (
@ -586,7 +586,7 @@ class PluginHelper(object):
        for compute in computes:
            with self.fuel_web.get_ssh_for_nailgun_node(compute) as remote:
                for service in compute_services:
-                    remote_ops.manage_initctl_service(remote, service)
+                    remote_ops.manage_service(remote, service)
    @staticmethod
    def check_notifications(got_list, expected_list):
--- a/stacklight_tests/helpers/remote_ops.py
+++ b/stacklight_tests/helpers/remote_ops.py
@ -129,7 +129,7 @@ def manage_pacemaker_service(remote, name, operation="restart"):
        operation=operation, service=name))
-def manage_initctl_service(remote, name, operation="restart"):
+def manage_service(remote, name, operation="restart"):
    """Operate service on remote node.
        :param remote: SSH connection to the node.
@ -139,8 +139,24 @@ def manage_initctl_service(remote, name, operation="restart"):
        :param operation: type of operation, usually start, stop or restart.
        :type operation: str
    """
-    remote.check_call("initctl {operation} {service}".format(
+
-        operation=operation, service=name))
+    if remote.execute("service {} status".format(name))['exit_code'] == 0:
        service_cmd = 'service {service} {operation}'
    elif remote.execute("initctl status {}".format(name))['exit_code'] == 0:
        service_cmd = 'initctl {operation} {service}'
    else:
        raise Exception('no service handler!')
    remote.check_call(service_cmd.format(service=name, operation=operation))
 def clear_local_mail(remote):
    """Clean local mail
        :param remote: SSH connection to the node.
        :type remote: SSHClient
    """
    remote.check_call("rm -f $MAIL")
 def fill_up_filesystem(remote, fs, percent, file_name):
--- a/stacklight_tests/influxdb_grafana/api.py
+++ b/stacklight_tests/influxdb_grafana/api.py
@ -22,6 +22,10 @@ from stacklight_tests.influxdb_grafana.grafana_ui import api as ui_api
 from stacklight_tests.influxdb_grafana import plugin_settings
 class NotFound(Exception):
    pass
 class InfluxdbPluginApi(base_test.PluginApi):
    def __init__(self):
        super(InfluxdbPluginApi, self).__init__()
@ -179,3 +183,35 @@ class InfluxdbPluginApi(base_test.PluginApi):
        if result:
            return result["series"][0]["values"]
        return []
    def check_cluster_status(self, name, expected_status, interval='3m'):
        output = ("SELECT last(value) FROM cluster_status WHERE "
                  "time > now() - {0} AND cluster_name='{1}'".format(interval,
                                                                     name))
        msg_header = "Wrong '{0}' service state has been found!".format(
            name)
        self._check_influx_query_last_value(output, expected_status,
                                            msg_header)
    def check_count_of_haproxy_backends(self, service, node_state='down',
                                        expected_count=0, interval='3m'):
        query = ("SELECT last(value) FROM haproxy_backend_servers WHERE "
                 "backend='{0}' AND state='{1}' and "
                 "time > now() - {2}".format(service, node_state, interval))
        msg_header = ("Wrong amout of nodes with service '{0}' "
                      "in '{1}' state!".format(service, node_state))
        self._check_influx_query_last_value(query, expected_count, msg_header)
    def _check_influx_query_last_value(self, query, expected_value,
                                       msg_header):
        output = self.do_influxdb_query(query)
        lines = output.json()
        if not lines['results'][0]:
            logger.error("The query ['result'] is empty!")
            raise NotFound
        state = lines['results'][0]['series'][0]['values'][0][1]
        asserts.assert_equal(expected_value, state,
                             msg_header + " Expected {0} but"
                             " found {1}".format(expected_value, state))
--- a/stacklight_tests/lma_infrastructure_alerting/api.py
+++ b/stacklight_tests/lma_infrastructure_alerting/api.py
@ -11,10 +11,13 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
 import six.moves as sm
 from devops.helpers import helpers
 from fuelweb_test import logger
 from proboscis import asserts
 from selenium.common.exceptions import StaleElementReferenceException
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.ui import WebDriverWait
@ -90,9 +93,7 @@ class InfraAlertingPluginApi(base_test.PluginApi):
        return "{0}://{1}:{2}".format(self.nagios_protocol,
                                      self.get_nagios_vip(), self.nagios_port)
-    def open_nagios_page(self, link_text, anchor):
+    def open_nagios_page(self, driver, link_text, anchor):
        driver = self.ui_tester.get_driver(self.get_authenticated_nagios_url(),
                                           "//frame[2]", "Nagios Core")
        driver.switch_to.default_content()
        driver.switch_to.frame(driver.find_element_by_name("side"))
        link = driver.find_element_by_link_text(link_text)
@ -104,19 +105,19 @@ class InfraAlertingPluginApi(base_test.PluginApi):
        return driver
    def check_node_in_nagios(self, changed_node, state):
-        driver = self.open_nagios_page(
+        with self.ui_tester.ui_driver(
-            'Hosts', "//table[@class='headertable']")
+                self.get_authenticated_nagios_url(),
-        try:
+                "//frame[2]", "Nagios Core") as driver:
            driver = self.open_nagios_page(
                driver, 'Hosts', "//table[@class='headertable']")
            asserts.assert_equal(state, self.node_is_present(
-                driver, changed_node), "Failed to find node '{0}' on nagios!"
+                driver, changed_node), "Failed to find node '{0}' "
-                .format(changed_node))
+                                       "on nagios!".format(changed_node))
        finally:
            driver.close()
    def node_is_present(self, driver, name):
        table = self.ui_tester.get_table(driver,
                                         "/html/body/div[2]/table/tbody")
-        for ind in xrange(2, self.ui_tester.get_table_size(table) + 1):
+        for ind in sm.xrange(2, self.ui_tester.get_table_size(table) + 1):
            node_name = self.ui_tester.get_table_cell(
                table, ind, 1).text.rstrip()
            if name == node_name:
@ -131,3 +132,71 @@ class InfraAlertingPluginApi(base_test.PluginApi):
    def check_uninstall_failure(self):
        return self.helpers.check_plugin_cannot_be_uninstalled(
            self.settings.name, self.settings.version)
    def get_services_for_node(self, table, node_name, driver,
                              table_xpath="/html/body/table[3]/tbody"):
        services = {}
        found_node = False
        ind = 2
        while ind < self.ui_tester.get_table_size(table) + 1:
            try:
                if not self.ui_tester.get_table_row(table, ind).text:
                    if found_node:
                        break
                    else:
                        continue
                if self.ui_tester.get_table_cell(
                        table, ind, 1).text == node_name:
                    found_node = True
                if found_node:
                    services[self.ui_tester.get_table_cell(
                        table, ind, 2).text] = (
                        self.ui_tester.get_table_cell(table, ind, 3).text)
            except StaleElementReferenceException:
                table = self.ui_tester.get_table(driver, table_xpath)
                ind -= 1
            ind += 1
        return services
    def check_service_state_on_nagios(self, driver, service_state=None,
                                      node_names=None):
        self.open_nagios_page(
            driver, 'Services', "//table[@class='headertable']")
        table = self.ui_tester.get_table(driver, "/html/body/table[3]/tbody")
        if not node_names:
            node_names = [self.ui_tester.get_table_cell(table, 2, 1).text]
        for node in node_names:
            node_services = self.get_services_for_node(table, node, driver)
            if service_state:
                for service in service_state:
                    if service_state[service] != node_services[service]:
                        return False
            else:
                for service in node_services:
                    if 'OK' != node_services[service]:
                        return False
        return True
    def wait_service_state_on_nagios(self, driver, service_state=None,
                                     node_names=None):
        msg = ("Fail to get expected service states for services: {0} "
               "on nodes: {1}")
        if not service_state or not node_names:
            self.open_nagios_page(
                driver, 'Services', "//table[@class='headertable']")
            table = self.ui_tester.get_table(driver,
                                             "/html/body/table[3]/tbody")
            if not node_names:
                node_names = [self.ui_tester.get_table_cell(table, 2, 1).text]
            if not service_state:
                service_state = dict((key, 'OK') for key in
                                     self.get_services_for_node(
                                         table, node_names[0], driver))
        msg = msg.format([key for key in service_state], node_names)
        helpers.wait(lambda: self.check_service_state_on_nagios(
            driver, service_state, node_names), timeout=60 * 5,
            timeout_msg=msg)
--- a/stacklight_tests/toolchain/api.py
+++ b/stacklight_tests/toolchain/api.py
@ -45,8 +45,8 @@ class ToolchainApi(object):
        self.ELASTICSEARCH_KIBANA = elasticsearch_api.ElasticsearchPluginApi()
        self.INFLUXDB_GRAFANA = influx_api.InfluxdbPluginApi()
        self.LMA_COLLECTOR = collector_api.LMACollectorPluginApi()
-        self.LMA_INFRASTRUCTURE_ALERTING = \
+        self.LMA_INFRASTRUCTURE_ALERTING = (
-            infrastructure_alerting_api.InfraAlertingPluginApi()
+            infrastructure_alerting_api.InfraAlertingPluginApi())
        self._plugins = {
            self.ELASTICSEARCH_KIBANA,
            self.INFLUXDB_GRAFANA,
@ -373,3 +373,132 @@ class ToolchainApi(object):
        msg = "Failed to set vm_memory_high_watermark to {}".format(limit)
        devops_helpers.wait(check_result, timeout=timeout,
                            interval=10, timeout_msg=msg)
    def change_verify_service_state(self, service_name, action, new_state,
                                    service_state_in_influx,
                                    down_backends_in_haproxy, toolchain_node,
                                    controller_nodes, nagios_driver):
        """Verify that the alerts for services show up in the Grafana
            and Nagios UI.
        :param service_name: name of the service to change state of.
            Format [service name, service name
            on dashboard] e.g. ['nova-api', 'nova']
        :type service_name: list.
        :param action: action to perform (e.g. stop, start).
        :type action: str
        :param new_state: new state of the service.
        :type new_state: str
        :param service_state_in_influx: new state of the service in influx.
        :type new_state: int
        :param down_backends_in_haproxy: amout of backends in 'down' state.
        :type down_backends_in_haproxy: int
        :param toolchain_node: toolchain node with
            infrastructure_alerting_ui vip.
        :type toolchain_node: dict
        :param controller_nodes: list of the controller nodes to change
            service state on.
        :type controller_nodes: list
        :param nagios_driver: selenium web driver
            service state on.
        :type nagios_driver: WebDriver
        """
        logger.info("Changing state of service {0}. "
                    "New state is {1}".format(service_name[0], new_state))
        with self.fuel_web.get_ssh_for_nailgun_node(toolchain_node) as remote:
            self.remote_ops.clear_local_mail(remote)
        for node in controller_nodes:
            with self.helpers.fuel_web.get_ssh_for_nailgun_node(
                    node) as remote:
                self.remote_ops.manage_service(remote, service_name[0], action)
        self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
            nagios_driver, {service_name[1]: new_state})
        self.INFLUXDB_GRAFANA.check_cluster_status(
            service_name[1], service_state_in_influx)
        self.INFLUXDB_GRAFANA.check_count_of_haproxy_backends(
            service_name[0], expected_count=down_backends_in_haproxy)
        with self.helpers.fuel_web.get_ssh_for_nailgun_node(
                toolchain_node) as remote:
            self.checkers.check_local_mail(
                remote, toolchain_node["name"], service_name[1], new_state)
    def change_verify_node_service_state(self, services, state, influx_state,
                                         percent, toolchain_node,
                                         controller_nodes, nagios_driver):
        """Verify that the alerts for nodes show up in the Grafana
            and Nagios UI.
        :param services: list of services to check new status of. Format
            ['mysql', 'mysql-nodes.mysql-fs']
        :type services: list
        :param state: new state of the service.
        :type state: str
        :param influx_state: new influx state.
        :type influx_state: int
        :param percent: amount of space to be filled on a node.
        :type percent: int
        :param toolchain_node: toolchain node with
            infrastructure_alerting_ui vip.
        :type toolchain_node: dict
        :param controller_nodes: list of the controller nodes to change
            service state on.
        :type controller_nodes: list
        :param nagios_driver: selenium web driver
            service state on.
        :type nagios_driver: WebDriver
        """
        with self.fuel_web.get_ssh_for_nailgun_node(toolchain_node) as remote:
            self.remote_ops.clear_local_mail(remote)
        with self.fuel_web.get_ssh_for_nailgun_node(
                controller_nodes[0]) as remote:
            self.remote_ops.fill_up_filesystem(
                remote, "/dev/mapper/mysql-root", percent,
                "/var/lib/mysql/test/bigfile")
        self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
            nagios_driver, {services[0]: 'OK'})
        self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
            nagios_driver, {services[1]: state},
            [controller_nodes[0]['hostname']])
        self.INFLUXDB_GRAFANA.check_cluster_status(services[0],
                                                   self.settings.OKAY)
        with self.fuel_web.get_ssh_for_nailgun_node(
                controller_nodes[1]) as remote:
            self.remote_ops.fill_up_filesystem(
                remote, "/dev/mapper/mysql-root", percent,
                "/var/lib/mysql/test/bigfile")
        for node in controller_nodes:
            self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
                nagios_driver, {services[0]: state})
            self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
                nagios_driver, {services[1]: state}, [node['hostname']])
        self.INFLUXDB_GRAFANA.check_cluster_status(services[0], influx_state)
        with self.helpers.fuel_web.get_ssh_for_nailgun_node(
                toolchain_node) as remote:
            self.checkers.check_local_mail(
                remote, toolchain_node["name"], services[0], state)
        for node in controller_nodes:
            with self.fuel_web.get_ssh_for_nailgun_node(node) as remote:
                self.remote_ops.clean_filesystem(remote,
                                                 "/var/lib/mysql/test/bigfile")
        for node in controller_nodes:
            self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
                nagios_driver, {services[0]: 'OK'})
            self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
                nagios_driver, {services[1]: 'OK'}, [node['hostname']])
        self.INFLUXDB_GRAFANA.check_cluster_status(services[0],
                                                   self.settings.OKAY)
        with self.helpers.fuel_web.get_ssh_for_nailgun_node(
                toolchain_node) as remote:
            self.checkers.check_local_mail(
                remote, toolchain_node["name"], services[0], 'OK')
--- a/stacklight_tests/toolchain/test_functional.py
+++ b/stacklight_tests/toolchain/test_functional.py
@ -13,6 +13,7 @@
 #    under the License.
 from fuelweb_test.helpers.decorators import log_snapshot_after_test
 from fuelweb_test import logger
 from proboscis import test
 from stacklight_tests.toolchain import api
@ -234,3 +235,336 @@ class TestFunctionalToolchain(api.ToolchainApi):
        self.check_plugins_online()
        self.check_cinder_notifications()
    @test(depends_on_groups=["deploy_ha_toolchain"],
          groups=["toolchain_warning_alert_service", "service_restart",
                  "toolchain", "functional"])
    @log_snapshot_after_test
    def toolchain_warning_alert_service(self):
        """Verify that the warning alerts for services show up in the
         Grafana and Nagios UI.
        Scenario:
            1. Connect to one of the controller nodes using ssh and
             stop the nova-api service.
            2. Wait for at least 1 minute.
            3. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'WARN' with an orange background,
                    - the API panels report 1 entity as down.
            4. On Nagios, check the following items:
                    - the 'nova' service is in 'WARNING' state,
                    - the local user root on the lma node has received
                     an email about the service
                     being in warning state.
            5. Restart the nova-api service.
            6. Wait for at least 1 minute.
            7. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'OKAY' with an green background,
                    - the API panels report 0 entity as down.
            8. On Nagios, check the following items:
                    - the 'nova' service is in 'OK' state,
                    - the local user root on the lma node has received
                    an email about the recovery
                     of the service.
            9. Repeat steps 2 to 8 for the following services:
                    - Nova (stopping and starting the nova-api and
                     nova-scheduler)
                    - Cinder (stopping and starting the cinder-api and
                    cinder-scheduler services respectively).
                    - Neutron (stopping and starting the neutron-server
                    and neutron-openvswitch-agent services respectively).
                    - Glance (stopping and starting the glance-api service).
                    - Heat (stopping and starting the heat-api service).
                    - Keystone (stopping and starting the Apache service).
        Duration 45m
        """
        self.env.revert_snapshot("deploy_ha_toolchain")
        services = {
            'nova': ['nova-api', 'nova-scheduler'],
            'cinder': ['cinder-api', 'cinder-scheduler'],
            'neutron': ['neutron-server', 'neutron-openvswitch-agent'],
            'glance': ['glance-api'],
            'heat': ['heat-api'],
            'keystone': ['apache2']
        }
        lma_devops_node = self.helpers.get_node_with_vip(
            self.settings.stacklight_roles,
            self.helpers.full_vip_name(
                self.LMA_INFRASTRUCTURE_ALERTING.settings.failover_vip))
        toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
            lma_devops_node)
        url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
        with self.ui_tester.ui_driver(url, "//frame[2]",
                                      "Nagios Core") as driver:
            self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
                driver, 'Services', "//table[@class='headertable']")
            controller_node = (
                self.fuel_web.get_nailgun_cluster_nodes_by_roles(
                    self.helpers.cluster_id, ['controller'])[0])
            for key in services:
                for service in services[key]:
                    self.change_verify_service_state(
                        service_name=[service, key], action='stop',
                        new_state='WARNING',
                        service_state_in_influx=self.settings.WARN,
                        down_backends_in_haproxy=1,
                        toolchain_node=toolchain_node,
                        controller_nodes=[controller_node],
                        nagios_driver=driver)
                    self.change_verify_service_state(
                        service_name=[service, key], action='start',
                        new_state='OK',
                        service_state_in_influx=self.settings.OKAY,
                        down_backends_in_haproxy=0,
                        toolchain_node=toolchain_node,
                        controller_nodes=[controller_node],
                        nagios_driver=driver)
    @test(depends_on_groups=["deploy_ha_toolchain"],
          groups=["toolchain_critical_alert_service", "service_restart",
                  "toolchain", "functional"])
    # @log_snapshot_after_test
    def toolchain_critical_alert_service(self):
        """Verify that the critical alerts for services show up in
        the Grafana and Nagios UI.
        Scenario:
            1. Open the Nagios URL
            2. Connect to one of the controller nodes using ssh and
            stop the nova-api service.
            3. Connect to a second controller node using ssh and stop
            the nova-api service.
            4. Wait for at least 1 minute.
            5. On Nagios, check the following items:
                    - the 'nova' service is in 'WARNING' state,
                    - the local user root on the lma node has received
                     an email about the service
                     being in warning state.
            6. Restart the nova-api service on both nodes.
            7. Wait for at least 1 minute.
            8. On Nagios, check the following items:
                    - the 'nova' service is in 'OK' state,
                    - the local user root on the lma node has received
                    an email about the recovery
                     of the service.
            9. Repeat steps 2 to 8 for the following services:
                    - Nova (stopping and starting the nova-api and
                     nova-scheduler)
                    - Cinder (stopping and starting the cinder-api and
                    cinder-scheduler services respectively).
                    - Neutron (stopping and starting the neutron-server
                    and neutron-openvswitch-agent services respectively).
                    - Glance (stopping and starting the glance-api service).
                    - Heat (stopping and starting the heat-api service).
                    - Keystone (stopping and starting the Apache service).
        Duration 45m
        """
        self.env.revert_snapshot("deploy_ha_toolchain")
        services = {
            'nova': ['nova-api', 'nova-scheduler'],
            'cinder': ['cinder-api', 'cinder-scheduler'],
            'neutron': ['neutron-server', 'neutron-openvswitch-agent'],
            'glance': ['glance-api'],
            'heat': ['heat-api'],
            'keystone': ['apache2']
        }
        lma_devops_node = self.helpers.get_node_with_vip(
            self.settings.stacklight_roles,
            self.helpers.full_vip_name(
                self.LMA_INFRASTRUCTURE_ALERTING.settings.failover_vip))
        toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
            lma_devops_node)
        url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
        with self.ui_tester.ui_driver(url, "//frame[2]",
                                      "Nagios Core") as driver:
            self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
                driver, 'Services', "//table[@class='headertable']")
            controller_nodes = (
                self.fuel_web.get_nailgun_cluster_nodes_by_roles(
                    self.helpers.cluster_id, ['controller']))
            for key in services:
                for service in services[key]:
                    logger.info("Checking service {0}".format(service))
                    self.change_verify_service_state(
                        service_name=[service, key], action='stop',
                        new_state='CRITICAL',
                        service_state_in_influx=self.settings.CRIT,
                        down_backends_in_haproxy=2,
                        toolchain_node=toolchain_node,
                        controller_nodes=[controller_nodes[0],
                                          controller_nodes[1]],
                        nagios_driver=driver)
                    self.change_verify_service_state(
                        service_name=[service, key], action='start',
                        new_state='OK',
                        service_state_in_influx=self.settings.OKAY,
                        down_backends_in_haproxy=0,
                        toolchain_node=toolchain_node,
                        controller_nodes=[controller_nodes[0],
                                          controller_nodes[1]],
                        nagios_driver=driver)
    @test(depends_on_groups=["deploy_ha_toolchain"],
          groups=["toolchain_warning_alert_node", "node_alert_warning",
                  "toolchain", "functional"])
    @log_snapshot_after_test
    def toolchain_warning_alert_node(self):
        """Verify that the warning alerts for nodes show up in the
         Grafana and Nagios UI.
        Scenario:
            1. Open the Nagios URL
            2. Open the Grafana URl
            3. Connect to one of the controller nodes using ssh and
               run:
                    fallocate -l $(df | grep /dev/mapper/mysql-root
                    | awk '{ printf("%.0f\n", 1024 * ((($3 + $4) * 96
                     / 100) - $3))}') /var/lib/mysql/test
            4. Wait for at least 1 minute.
            5. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'OKAY' with an green background,
            6. On Nagios, check the following items:
                    - the 'mysql' service is in 'OK' state,
                    - the 'mysql-nodes.mysql-fs' service is in 'WARNING'
                     state for the node.
            7. Connect to a second controller node using ssh and run:
                    fallocate -l $(df | grep /dev/mapper/mysql-root
                    | awk '{ printf("%.0f\n", 1024 * ((($3 + $4) * 96
                     / 100) - $3))}') /var/lib/mysql/test
            8. Wait for at least 1 minute.
            9. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'WARN' with an orange background,
                    - an annotation telling that the service went from 'OKAY'
                     to 'WARN' is displayed.
            10. On Nagios, check the following items:
                    - the 'mysql' service is in 'WARNING' state,
                    - the 'mysql-nodes.mysql-fs' service is in 'WARNING'
                     state for the 2 nodes,
                    - the local user root on the lma node has received an
                     email about the service
                    being in warning state.
            11. Run the following command on both controller nodes:
                    rm /var/lib/mysql/test
            12. Wait for at least 1 minutes.
            13. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'OKAY' with an green background,
                    - an annotation telling that the service went from 'WARN'
                     to 'OKAY' is displayed.
            14. On Nagios, check the following items:
                    - the 'mysql' service is in 'OK' state,
                    - the 'mysql-nodes.mysql-fs' service is in 'OKAY' state
                     for the 2 nodes,
                    - the local user root on the lma node has received an
                     email about the recovery of the service.
        Duration 15m
        """
        self.env.revert_snapshot("deploy_ha_toolchain")
        lma_devops_node = self.helpers.get_node_with_vip(
            self.settings.stacklight_roles,
            self.helpers.full_vip_name("infrastructure_alerting_mgmt_vip"))
        toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
            lma_devops_node)
        nailgun_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
            self.helpers.cluster_id, ['controller'])
        url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
        with self.ui_tester.ui_driver(url, "//frame[2]",
                                      "Nagios Core") as driver:
            self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
                driver, 'Services', "//table[@class='headertable']")
            self.change_verify_node_service_state(
                ['mysql', 'mysql-nodes.mysql-fs'], 'WARNING',
                self.settings.WARN, '96', toolchain_node,
                [nailgun_nodes[0], nailgun_nodes[1]], driver)
    @test(depends_on_groups=["deploy_ha_toolchain"],
          groups=["toolchain_critical_alert_node", "node_alert_critical",
                  "toolchain", "functional"])
    @log_snapshot_after_test
    def toolchain_critical_alert_node(self):
        """Verify that the critical alerts for nodes show up in the
         Grafana and Nagios UI.
        Scenario:
            1. Open the Nagios URL
            2. Open the Grafana URl
            3. Connect to one of the controller nodes using ssh and run:
                    fallocate -l $(df | grep /dev/mapper/mysql-root
                    | awk '{ printf("%.0f\n", 1024 * ((($3 + $4) *
                    98 / 100) - $3))}') /var/lib/mysql/test
            4. Wait for at least 1 minute.
            5. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'OKAY' with an green background,
            6. On Nagios, check the following items:
                    - the 'mysql' service is in 'OK' state,
                    - the 'mysql-nodes.mysql-fs' service is in 'CRITICAL'
                     state for the node.
            7. Connect to a second controller node using ssh and run:
                    fallocate -l $(df | grep /dev/mapper/mysql-root
                    | awk '{ printf("%.0f\n", 1024 * ((($3 + $4) *
                    98 / 100) - $3))}') /var/lib/mysql/test
            8. Wait for at least 1 minute.
            9. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'CRIT' with an orange background,
                    - an annotation telling that the service went from 'OKAY'
                     to 'WARN' is displayed.
            10. On Nagios, check the following items:
                    - the 'mysql' service is in 'CRITICAL' state,
                    - the 'mysql-nodes.mysql-fs' service is in 'CRITICAL'
                     state for the 2 nodes,
                    - the local user root on the lma node has received an
                    email about the service
                    being in warning state.
            11. Run the following command on both controller nodes:
                    rm /var/lib/mysql/test
            12. Wait for at least 1 minutes.
            13. On Grafana, check the following items:
                    - the box in the upper left corner of the dashboard
                     displays 'OKAY' with an green background,
                    - an annotation telling that the service went from 'CRIT'
                     to 'OKAY' is displayed.
            14. On Nagios, check the following items:
                    - the 'mysql' service is in OK' state,
                    - the 'mysql-nodes.mysql-fs' service is in 'OKAY' state
                     for the 2 nodes,
                    - the local user root on the lma node has received an
                    email about the recovery of the service.
        Duration 15m
        """
        self.env.revert_snapshot("deploy_ha_toolchain")
        lma_devops_node = self.helpers.get_node_with_vip(
            self.settings.stacklight_roles,
            self.helpers.full_vip_name("infrastructure_alerting_mgmt_vip"))
        toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
            lma_devops_node)
        nailgun_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
            self.helpers.cluster_id, ['controller'])
        url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
        with self.ui_tester.ui_driver(url, "//frame[2]",
                                      "Nagios Core") as driver:
            self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
                driver, 'Services', "//table[@class='headertable']")
            self.change_verify_node_service_state(
                ['mysql', 'mysql-nodes.mysql-fs'], 'CRITICAL',
                self.settings.UNKW, '98', toolchain_node,
                [nailgun_nodes[0], nailgun_nodes[1]], driver)
--- a/stacklight_tests/toolchain/toolchain_settings.py
+++ b/stacklight_tests/toolchain/toolchain_settings.py
@ -27,6 +27,12 @@ stacklight_roles = (elasticsearch_settings.role_name +
                    collector_settings.role_name +
                    infrastructure_alerting_settings.role_name)
 OKAY = 0
 WARN = 1
 UNKW = 2
 CRIT = 3
 DOWN = 4
 base_nodes = {
    'slave-01': ['controller'],
    'slave-02': ['compute', 'cinder'],