From e4b4ef93de67e8a682ec2d551e273a782c8548e2 Mon Sep 17 00:00:00 2001
From: Vladimir Ushakov <vushakov@mera.ru>
Date: Thu, 16 Jun 2016 13:08:39 +0300
Subject: [PATCH] Add four toolchain functional tests

Add four toolchain functional tests.
Add toolchain helpers methods.

Change-Id: I0ea3613d8e8e44a2dfa9d5a9bea26c0e9b793ee4
---
 stacklight_tests/helpers/checkers.py          |  31 ++
 stacklight_tests/helpers/helpers.py           |   4 +-
 stacklight_tests/helpers/remote_ops.py        |  22 +-
 stacklight_tests/influxdb_grafana/api.py      |  36 ++
 .../lma_infrastructure_alerting/api.py        |  91 ++++-
 stacklight_tests/toolchain/api.py             | 133 ++++++-
 stacklight_tests/toolchain/test_functional.py | 334 ++++++++++++++++++
 .../toolchain/toolchain_settings.py           |   6 +
 8 files changed, 639 insertions(+), 18 deletions(-)

diff --git a/stacklight_tests/helpers/checkers.py b/stacklight_tests/helpers/checkers.py
index efa36d6..b11c4d1 100644
--- a/stacklight_tests/helpers/checkers.py
+++ b/stacklight_tests/helpers/checkers.py
@@ -15,6 +15,8 @@
 from contextlib import closing
 import socket
 
+from devops.error import DevopsCalledProcessError
+from devops.helpers import helpers as devops_helpers
 from proboscis import asserts
 import requests
 from requests.packages.urllib3 import poolmanager
@@ -83,3 +85,32 @@ def check_port(address, port):
     """
     with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
         return sock.connect_ex((address, port)) == 0
+
+
+def check_local_mail(remote, node_name, service, state, timeout=10 * 60):
+    """Check that email from LMA Infrastructure Alerting plugin about service
+    changing it's state is presented on a host.
+
+    :param remote: SSH connection to the node.
+    :type remote: SSHClient
+    :param node_name: name of the node to check for email on.
+    :type node_name: str
+    :param message: message to look for.
+    :type message: str
+    :param timeout: timeout to wait for email to arrive.
+    :rtype timeout: int
+    """
+    def check_mail():
+        try:
+            responce = remote.check_call("cat $MAIL")
+            if not responce:
+                return False
+            if ("Service: {}\n".format(service) in responce['stdout'] and
+                    "State: {}\n".format(state) in responce['stdout']):
+                return True
+        except DevopsCalledProcessError:
+            return False
+    msg = ("Email about service {0} in {1} state was not "
+           "found on {2} after {3} seconds").format(
+        service, state, node_name, timeout)
+    devops_helpers.wait(check_mail, timeout=timeout, timeout_msg=msg)
diff --git a/stacklight_tests/helpers/helpers.py b/stacklight_tests/helpers/helpers.py
index c8395fe..7777965 100644
--- a/stacklight_tests/helpers/helpers.py
+++ b/stacklight_tests/helpers/helpers.py
@@ -576,7 +576,7 @@ class PluginHelper(object):
                 for service in ha_services:
                     remote_ops.manage_pacemaker_service(remote, service)
                 for service in non_ha_services:
-                    remote_ops.manage_initctl_service(remote, service)
+                    remote_ops.manage_service(remote, service)
 
         logger.info("Restarting services on computes")
         compute_services = (
@@ -586,7 +586,7 @@ class PluginHelper(object):
         for compute in computes:
             with self.fuel_web.get_ssh_for_nailgun_node(compute) as remote:
                 for service in compute_services:
-                    remote_ops.manage_initctl_service(remote, service)
+                    remote_ops.manage_service(remote, service)
 
     @staticmethod
     def check_notifications(got_list, expected_list):
diff --git a/stacklight_tests/helpers/remote_ops.py b/stacklight_tests/helpers/remote_ops.py
index a866329..90edadd 100644
--- a/stacklight_tests/helpers/remote_ops.py
+++ b/stacklight_tests/helpers/remote_ops.py
@@ -129,7 +129,7 @@ def manage_pacemaker_service(remote, name, operation="restart"):
         operation=operation, service=name))
 
 
-def manage_initctl_service(remote, name, operation="restart"):
+def manage_service(remote, name, operation="restart"):
     """Operate service on remote node.
 
         :param remote: SSH connection to the node.
@@ -139,8 +139,24 @@ def manage_initctl_service(remote, name, operation="restart"):
         :param operation: type of operation, usually start, stop or restart.
         :type operation: str
     """
-    remote.check_call("initctl {operation} {service}".format(
-        operation=operation, service=name))
+
+    if remote.execute("service {} status".format(name))['exit_code'] == 0:
+        service_cmd = 'service {service} {operation}'
+    elif remote.execute("initctl status {}".format(name))['exit_code'] == 0:
+        service_cmd = 'initctl {operation} {service}'
+    else:
+        raise Exception('no service handler!')
+
+    remote.check_call(service_cmd.format(service=name, operation=operation))
+
+
+def clear_local_mail(remote):
+    """Clean local mail
+
+        :param remote: SSH connection to the node.
+        :type remote: SSHClient
+    """
+    remote.check_call("rm -f $MAIL")
 
 
 def fill_up_filesystem(remote, fs, percent, file_name):
diff --git a/stacklight_tests/influxdb_grafana/api.py b/stacklight_tests/influxdb_grafana/api.py
index bb1eb8c..91f35b4 100644
--- a/stacklight_tests/influxdb_grafana/api.py
+++ b/stacklight_tests/influxdb_grafana/api.py
@@ -22,6 +22,10 @@ from stacklight_tests.influxdb_grafana.grafana_ui import api as ui_api
 from stacklight_tests.influxdb_grafana import plugin_settings
 
 
+class NotFound(Exception):
+    pass
+
+
 class InfluxdbPluginApi(base_test.PluginApi):
     def __init__(self):
         super(InfluxdbPluginApi, self).__init__()
@@ -179,3 +183,35 @@ class InfluxdbPluginApi(base_test.PluginApi):
         if result:
             return result["series"][0]["values"]
         return []
+
+    def check_cluster_status(self, name, expected_status, interval='3m'):
+        output = ("SELECT last(value) FROM cluster_status WHERE "
+                  "time > now() - {0} AND cluster_name='{1}'".format(interval,
+                                                                     name))
+        msg_header = "Wrong '{0}' service state has been found!".format(
+            name)
+        self._check_influx_query_last_value(output, expected_status,
+                                            msg_header)
+
+    def check_count_of_haproxy_backends(self, service, node_state='down',
+                                        expected_count=0, interval='3m'):
+
+        query = ("SELECT last(value) FROM haproxy_backend_servers WHERE "
+                 "backend='{0}' AND state='{1}' and "
+                 "time > now() - {2}".format(service, node_state, interval))
+
+        msg_header = ("Wrong amout of nodes with service '{0}' "
+                      "in '{1}' state!".format(service, node_state))
+        self._check_influx_query_last_value(query, expected_count, msg_header)
+
+    def _check_influx_query_last_value(self, query, expected_value,
+                                       msg_header):
+        output = self.do_influxdb_query(query)
+        lines = output.json()
+        if not lines['results'][0]:
+            logger.error("The query ['result'] is empty!")
+            raise NotFound
+        state = lines['results'][0]['series'][0]['values'][0][1]
+        asserts.assert_equal(expected_value, state,
+                             msg_header + " Expected {0} but"
+                             " found {1}".format(expected_value, state))
diff --git a/stacklight_tests/lma_infrastructure_alerting/api.py b/stacklight_tests/lma_infrastructure_alerting/api.py
index 5206c86..6279538 100644
--- a/stacklight_tests/lma_infrastructure_alerting/api.py
+++ b/stacklight_tests/lma_infrastructure_alerting/api.py
@@ -11,10 +11,13 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
+import six.moves as sm
 
+from devops.helpers import helpers
 from fuelweb_test import logger
 from proboscis import asserts
 
+from selenium.common.exceptions import StaleElementReferenceException
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.ui import WebDriverWait
@@ -90,9 +93,7 @@ class InfraAlertingPluginApi(base_test.PluginApi):
         return "{0}://{1}:{2}".format(self.nagios_protocol,
                                       self.get_nagios_vip(), self.nagios_port)
 
-    def open_nagios_page(self, link_text, anchor):
-        driver = self.ui_tester.get_driver(self.get_authenticated_nagios_url(),
-                                           "//frame[2]", "Nagios Core")
+    def open_nagios_page(self, driver, link_text, anchor):
         driver.switch_to.default_content()
         driver.switch_to.frame(driver.find_element_by_name("side"))
         link = driver.find_element_by_link_text(link_text)
@@ -104,19 +105,19 @@ class InfraAlertingPluginApi(base_test.PluginApi):
         return driver
 
     def check_node_in_nagios(self, changed_node, state):
-        driver = self.open_nagios_page(
-            'Hosts', "//table[@class='headertable']")
-        try:
+        with self.ui_tester.ui_driver(
+                self.get_authenticated_nagios_url(),
+                "//frame[2]", "Nagios Core") as driver:
+            driver = self.open_nagios_page(
+                driver, 'Hosts', "//table[@class='headertable']")
             asserts.assert_equal(state, self.node_is_present(
-                driver, changed_node), "Failed to find node '{0}' on nagios!"
-                .format(changed_node))
-        finally:
-            driver.close()
+                driver, changed_node), "Failed to find node '{0}' "
+                                       "on nagios!".format(changed_node))
 
     def node_is_present(self, driver, name):
         table = self.ui_tester.get_table(driver,
                                          "/html/body/div[2]/table/tbody")
-        for ind in xrange(2, self.ui_tester.get_table_size(table) + 1):
+        for ind in sm.xrange(2, self.ui_tester.get_table_size(table) + 1):
             node_name = self.ui_tester.get_table_cell(
                 table, ind, 1).text.rstrip()
             if name == node_name:
@@ -131,3 +132,71 @@ class InfraAlertingPluginApi(base_test.PluginApi):
     def check_uninstall_failure(self):
         return self.helpers.check_plugin_cannot_be_uninstalled(
             self.settings.name, self.settings.version)
+
+    def get_services_for_node(self, table, node_name, driver,
+                              table_xpath="/html/body/table[3]/tbody"):
+        services = {}
+        found_node = False
+        ind = 2
+        while ind < self.ui_tester.get_table_size(table) + 1:
+            try:
+                if not self.ui_tester.get_table_row(table, ind).text:
+                    if found_node:
+                        break
+                    else:
+                        continue
+                if self.ui_tester.get_table_cell(
+                        table, ind, 1).text == node_name:
+                    found_node = True
+                if found_node:
+                    services[self.ui_tester.get_table_cell(
+                        table, ind, 2).text] = (
+                        self.ui_tester.get_table_cell(table, ind, 3).text)
+            except StaleElementReferenceException:
+                table = self.ui_tester.get_table(driver, table_xpath)
+                ind -= 1
+            ind += 1
+
+        return services
+
+    def check_service_state_on_nagios(self, driver, service_state=None,
+                                      node_names=None):
+        self.open_nagios_page(
+            driver, 'Services', "//table[@class='headertable']")
+        table = self.ui_tester.get_table(driver, "/html/body/table[3]/tbody")
+        if not node_names:
+            node_names = [self.ui_tester.get_table_cell(table, 2, 1).text]
+        for node in node_names:
+            node_services = self.get_services_for_node(table, node, driver)
+            if service_state:
+                for service in service_state:
+                    if service_state[service] != node_services[service]:
+                        return False
+            else:
+                for service in node_services:
+                    if 'OK' != node_services[service]:
+                        return False
+        return True
+
+    def wait_service_state_on_nagios(self, driver, service_state=None,
+                                     node_names=None):
+        msg = ("Fail to get expected service states for services: {0} "
+               "on nodes: {1}")
+
+        if not service_state or not node_names:
+            self.open_nagios_page(
+                driver, 'Services', "//table[@class='headertable']")
+            table = self.ui_tester.get_table(driver,
+                                             "/html/body/table[3]/tbody")
+            if not node_names:
+                node_names = [self.ui_tester.get_table_cell(table, 2, 1).text]
+            if not service_state:
+                service_state = dict((key, 'OK') for key in
+                                     self.get_services_for_node(
+                                         table, node_names[0], driver))
+
+        msg = msg.format([key for key in service_state], node_names)
+
+        helpers.wait(lambda: self.check_service_state_on_nagios(
+            driver, service_state, node_names), timeout=60 * 5,
+            timeout_msg=msg)
diff --git a/stacklight_tests/toolchain/api.py b/stacklight_tests/toolchain/api.py
index fd73e7c..2349f63 100644
--- a/stacklight_tests/toolchain/api.py
+++ b/stacklight_tests/toolchain/api.py
@@ -45,8 +45,8 @@ class ToolchainApi(object):
         self.ELASTICSEARCH_KIBANA = elasticsearch_api.ElasticsearchPluginApi()
         self.INFLUXDB_GRAFANA = influx_api.InfluxdbPluginApi()
         self.LMA_COLLECTOR = collector_api.LMACollectorPluginApi()
-        self.LMA_INFRASTRUCTURE_ALERTING = \
-            infrastructure_alerting_api.InfraAlertingPluginApi()
+        self.LMA_INFRASTRUCTURE_ALERTING = (
+            infrastructure_alerting_api.InfraAlertingPluginApi())
         self._plugins = {
             self.ELASTICSEARCH_KIBANA,
             self.INFLUXDB_GRAFANA,
@@ -373,3 +373,132 @@ class ToolchainApi(object):
         msg = "Failed to set vm_memory_high_watermark to {}".format(limit)
         devops_helpers.wait(check_result, timeout=timeout,
                             interval=10, timeout_msg=msg)
+
+    def change_verify_service_state(self, service_name, action, new_state,
+                                    service_state_in_influx,
+                                    down_backends_in_haproxy, toolchain_node,
+                                    controller_nodes, nagios_driver):
+        """Verify that the alerts for services show up in the Grafana
+            and Nagios UI.
+
+        :param service_name: name of the service to change state of.
+            Format [service name, service name
+            on dashboard] e.g. ['nova-api', 'nova']
+        :type service_name: list.
+        :param action: action to perform (e.g. stop, start).
+        :type action: str
+        :param new_state: new state of the service.
+        :type new_state: str
+        :param service_state_in_influx: new state of the service in influx.
+        :type new_state: int
+        :param down_backends_in_haproxy: amout of backends in 'down' state.
+        :type down_backends_in_haproxy: int
+        :param toolchain_node: toolchain node with
+            infrastructure_alerting_ui vip.
+        :type toolchain_node: dict
+        :param controller_nodes: list of the controller nodes to change
+            service state on.
+        :type controller_nodes: list
+        :param nagios_driver: selenium web driver
+            service state on.
+        :type nagios_driver: WebDriver
+        """
+
+        logger.info("Changing state of service {0}. "
+                    "New state is {1}".format(service_name[0], new_state))
+        with self.fuel_web.get_ssh_for_nailgun_node(toolchain_node) as remote:
+            self.remote_ops.clear_local_mail(remote)
+        for node in controller_nodes:
+            with self.helpers.fuel_web.get_ssh_for_nailgun_node(
+                    node) as remote:
+                self.remote_ops.manage_service(remote, service_name[0], action)
+        self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
+            nagios_driver, {service_name[1]: new_state})
+        self.INFLUXDB_GRAFANA.check_cluster_status(
+            service_name[1], service_state_in_influx)
+        self.INFLUXDB_GRAFANA.check_count_of_haproxy_backends(
+            service_name[0], expected_count=down_backends_in_haproxy)
+        with self.helpers.fuel_web.get_ssh_for_nailgun_node(
+                toolchain_node) as remote:
+            self.checkers.check_local_mail(
+                remote, toolchain_node["name"], service_name[1], new_state)
+
+    def change_verify_node_service_state(self, services, state, influx_state,
+                                         percent, toolchain_node,
+                                         controller_nodes, nagios_driver):
+        """Verify that the alerts for nodes show up in the Grafana
+            and Nagios UI.
+
+        :param services: list of services to check new status of. Format
+            ['mysql', 'mysql-nodes.mysql-fs']
+        :type services: list
+        :param state: new state of the service.
+        :type state: str
+        :param influx_state: new influx state.
+        :type influx_state: int
+        :param percent: amount of space to be filled on a node.
+        :type percent: int
+        :param toolchain_node: toolchain node with
+            infrastructure_alerting_ui vip.
+        :type toolchain_node: dict
+        :param controller_nodes: list of the controller nodes to change
+            service state on.
+        :type controller_nodes: list
+        :param nagios_driver: selenium web driver
+            service state on.
+        :type nagios_driver: WebDriver
+
+        """
+
+        with self.fuel_web.get_ssh_for_nailgun_node(toolchain_node) as remote:
+            self.remote_ops.clear_local_mail(remote)
+
+        with self.fuel_web.get_ssh_for_nailgun_node(
+                controller_nodes[0]) as remote:
+            self.remote_ops.fill_up_filesystem(
+                remote, "/dev/mapper/mysql-root", percent,
+                "/var/lib/mysql/test/bigfile")
+
+        self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
+            nagios_driver, {services[0]: 'OK'})
+        self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
+            nagios_driver, {services[1]: state},
+            [controller_nodes[0]['hostname']])
+        self.INFLUXDB_GRAFANA.check_cluster_status(services[0],
+                                                   self.settings.OKAY)
+
+        with self.fuel_web.get_ssh_for_nailgun_node(
+                controller_nodes[1]) as remote:
+            self.remote_ops.fill_up_filesystem(
+                remote, "/dev/mapper/mysql-root", percent,
+                "/var/lib/mysql/test/bigfile")
+
+        for node in controller_nodes:
+            self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
+                nagios_driver, {services[0]: state})
+            self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
+                nagios_driver, {services[1]: state}, [node['hostname']])
+        self.INFLUXDB_GRAFANA.check_cluster_status(services[0], influx_state)
+
+        with self.helpers.fuel_web.get_ssh_for_nailgun_node(
+                toolchain_node) as remote:
+            self.checkers.check_local_mail(
+                remote, toolchain_node["name"], services[0], state)
+
+        for node in controller_nodes:
+            with self.fuel_web.get_ssh_for_nailgun_node(node) as remote:
+                self.remote_ops.clean_filesystem(remote,
+                                                 "/var/lib/mysql/test/bigfile")
+
+        for node in controller_nodes:
+            self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
+                nagios_driver, {services[0]: 'OK'})
+            self.LMA_INFRASTRUCTURE_ALERTING.wait_service_state_on_nagios(
+                nagios_driver, {services[1]: 'OK'}, [node['hostname']])
+        self.INFLUXDB_GRAFANA.check_cluster_status(services[0],
+                                                   self.settings.OKAY)
+
+        with self.helpers.fuel_web.get_ssh_for_nailgun_node(
+                toolchain_node) as remote:
+            self.checkers.check_local_mail(
+                remote, toolchain_node["name"], services[0], 'OK')
diff --git a/stacklight_tests/toolchain/test_functional.py b/stacklight_tests/toolchain/test_functional.py
index a64f62c..125aa2f 100644
--- a/stacklight_tests/toolchain/test_functional.py
+++ b/stacklight_tests/toolchain/test_functional.py
@@ -13,6 +13,7 @@
 #    under the License.
 
 from fuelweb_test.helpers.decorators import log_snapshot_after_test
+from fuelweb_test import logger
 from proboscis import test
 
 from stacklight_tests.toolchain import api
@@ -234,3 +235,336 @@ class TestFunctionalToolchain(api.ToolchainApi):
         self.check_plugins_online()
 
         self.check_cinder_notifications()
+
+    @test(depends_on_groups=["deploy_ha_toolchain"],
+          groups=["toolchain_warning_alert_service", "service_restart",
+                  "toolchain", "functional"])
+    @log_snapshot_after_test
+    def toolchain_warning_alert_service(self):
+        """Verify that the warning alerts for services show up in the
+         Grafana and Nagios UI.
+
+        Scenario:
+            1. Connect to one of the controller nodes using ssh and
+             stop the nova-api service.
+            2. Wait for at least 1 minute.
+            3. On Grafana, check the following items:
+                    - the box in the upper left corner of the dashboard
+                     displays 'WARN' with an orange background,
+                    - the API panels report 1 entity as down.
+            4. On Nagios, check the following items:
+                    - the 'nova' service is in 'WARNING' state,
+                    - the local user root on the lma node has received
+                     an email about the service
+                     being in warning state.
+            5. Restart the nova-api service.
+            6. Wait for at least 1 minute.
+            7. On Grafana, check the following items:
+                    - the box in the upper left corner of the dashboard
+                     displays 'OKAY' with an green background,
+                    - the API panels report 0 entity as down.
+            8. On Nagios, check the following items:
+                    - the 'nova' service is in 'OK' state,
+                    - the local user root on the lma node has received
+                    an email about the recovery
+                     of the service.
+            9. Repeat steps 2 to 8 for the following services:
+                    - Nova (stopping and starting the nova-api and
+                     nova-scheduler)
+                    - Cinder (stopping and starting the cinder-api and
+                    cinder-scheduler services respectively).
+                    - Neutron (stopping and starting the neutron-server
+                    and neutron-openvswitch-agent services respectively).
+                    - Glance (stopping and starting the glance-api service).
+                    - Heat (stopping and starting the heat-api service).
+                    - Keystone (stopping and starting the Apache service).
+
+        Duration 45m
+        """
+        self.env.revert_snapshot("deploy_ha_toolchain")
+
+        services = {
+            'nova': ['nova-api', 'nova-scheduler'],
+            'cinder': ['cinder-api', 'cinder-scheduler'],
+            'neutron': ['neutron-server', 'neutron-openvswitch-agent'],
+            'glance': ['glance-api'],
+            'heat': ['heat-api'],
+            'keystone': ['apache2']
+        }
+
+        lma_devops_node = self.helpers.get_node_with_vip(
+            self.settings.stacklight_roles,
+            self.helpers.full_vip_name(
+                self.LMA_INFRASTRUCTURE_ALERTING.settings.failover_vip))
+        toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
+            lma_devops_node)
+
+        url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
+        with self.ui_tester.ui_driver(url, "//frame[2]",
+                                      "Nagios Core") as driver:
+            self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
+                driver, 'Services', "//table[@class='headertable']")
+            controller_node = (
+                self.fuel_web.get_nailgun_cluster_nodes_by_roles(
+                    self.helpers.cluster_id, ['controller'])[0])
+            for key in services:
+                for service in services[key]:
+                    self.change_verify_service_state(
+                        service_name=[service, key], action='stop',
+                        new_state='WARNING',
+                        service_state_in_influx=self.settings.WARN,
+                        down_backends_in_haproxy=1,
+                        toolchain_node=toolchain_node,
+                        controller_nodes=[controller_node],
+                        nagios_driver=driver)
+                    self.change_verify_service_state(
+                        service_name=[service, key], action='start',
+                        new_state='OK',
+                        service_state_in_influx=self.settings.OKAY,
+                        down_backends_in_haproxy=0,
+                        toolchain_node=toolchain_node,
+                        controller_nodes=[controller_node],
+                        nagios_driver=driver)
+
+    @test(depends_on_groups=["deploy_ha_toolchain"],
+          groups=["toolchain_critical_alert_service", "service_restart",
+                  "toolchain", "functional"])
+    # @log_snapshot_after_test
+    def toolchain_critical_alert_service(self):
+        """Verify that the critical alerts for services show up in
+        the Grafana and Nagios UI.
+
+        Scenario:
+            1. Open the Nagios URL
+            2. Connect to one of the controller nodes using ssh and
+            stop the nova-api service.
+            3. Connect to a second controller node using ssh and stop
+            the nova-api service.
+            4. Wait for at least 1 minute.
+            5. On Nagios, check the following items:
+                    - the 'nova' service is in 'WARNING' state,
+                    - the local user root on the lma node has received
+                     an email about the service
+                     being in warning state.
+            6. Restart the nova-api service on both nodes.
+            7. Wait for at least 1 minute.
+            8. On Nagios, check the following items:
+                    - the 'nova' service is in 'OK' state,
+                    - the local user root on the lma node has received
+                    an email about the recovery
+                     of the service.
+            9. Repeat steps 2 to 8 for the following services:
+                    - Nova (stopping and starting the nova-api and
+                     nova-scheduler)
+                    - Cinder (stopping and starting the cinder-api and
+                    cinder-scheduler services respectively).
+                    - Neutron (stopping and starting the neutron-server
+                    and neutron-openvswitch-agent services respectively).
+                    - Glance (stopping and starting the glance-api service).
+                    - Heat (stopping and starting the heat-api service).
+                    - Keystone (stopping and starting the Apache service).
+
+        Duration 45m
+        """
+        self.env.revert_snapshot("deploy_ha_toolchain")
+
+        services = {
+            'nova': ['nova-api', 'nova-scheduler'],
+            'cinder': ['cinder-api', 'cinder-scheduler'],
+            'neutron': ['neutron-server', 'neutron-openvswitch-agent'],
+            'glance': ['glance-api'],
+            'heat': ['heat-api'],
+            'keystone': ['apache2']
+        }
+
+        lma_devops_node = self.helpers.get_node_with_vip(
+            self.settings.stacklight_roles,
+            self.helpers.full_vip_name(
+                self.LMA_INFRASTRUCTURE_ALERTING.settings.failover_vip))
+        toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
+            lma_devops_node)
+
+        url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
+        with self.ui_tester.ui_driver(url, "//frame[2]",
+                                      "Nagios Core") as driver:
+            self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
+                driver, 'Services', "//table[@class='headertable']")
+            controller_nodes = (
+                self.fuel_web.get_nailgun_cluster_nodes_by_roles(
+                    self.helpers.cluster_id, ['controller']))
+            for key in services:
+                for service in services[key]:
+                    logger.info("Checking service {0}".format(service))
+                    self.change_verify_service_state(
+                        service_name=[service, key], action='stop',
+                        new_state='CRITICAL',
+                        service_state_in_influx=self.settings.CRIT,
+                        down_backends_in_haproxy=2,
+                        toolchain_node=toolchain_node,
+                        controller_nodes=[controller_nodes[0],
+                                          controller_nodes[1]],
+                        nagios_driver=driver)
+                    self.change_verify_service_state(
+                        service_name=[service, key], action='start',
+                        new_state='OK',
+                        service_state_in_influx=self.settings.OKAY,
+                        down_backends_in_haproxy=0,
+                        toolchain_node=toolchain_node,
+                        controller_nodes=[controller_nodes[0],
+                                          controller_nodes[1]],
+                        nagios_driver=driver)
+
+    @test(depends_on_groups=["deploy_ha_toolchain"],
+          groups=["toolchain_warning_alert_node", "node_alert_warning",
+                  "toolchain", "functional"])
+    @log_snapshot_after_test
+    def toolchain_warning_alert_node(self):
+        """Verify that the warning alerts for nodes show up in the
+         Grafana and Nagios UI.
+
+        Scenario:
+            1. Open the Nagios URL
+            2. Open the Grafana URl
+            3. Connect to one of the controller nodes using ssh and
+               run:
+                    fallocate -l $(df | grep /dev/mapper/mysql-root
+                    | awk '{ printf("%.0f\n", 1024 * ((($3 + $4) * 96
+                     / 100) - $3))}') /var/lib/mysql/test
+            4. Wait for at least 1 minute.
+            5. On Grafana, check the following items:
+                    - the box in the upper left corner of the dashboard
+                     displays 'OKAY' with an green background,
+            6. On Nagios, check the following items:
+                    - the 'mysql' service is in 'OK' state,
+                    - the 'mysql-nodes.mysql-fs' service is in 'WARNING'
+                     state for the node.
+            7. Connect to a second controller node using ssh and run:
+                    fallocate -l $(df | grep /dev/mapper/mysql-root
+                    | awk '{ printf("%.0f\n", 1024 * ((($3 + $4) * 96
+                     / 100) - $3))}') /var/lib/mysql/test
+            8. Wait for at least 1 minute.
+            9. On Grafana, check the following items:
+                    - the box in the upper left corner of the dashboard
+                     displays 'WARN' with an orange background,
+                    - an annotation telling that the service went from 'OKAY'
+                     to 'WARN' is displayed.
+            10. On Nagios, check the following items:
+                    - the 'mysql' service is in 'WARNING' state,
+                    - the 'mysql-nodes.mysql-fs' service is in 'WARNING'
+                     state for the 2 nodes,
+                    - the local user root on the lma node has received an
+                     email about the service
+                    being in warning state.
+            11. Run the following command on both controller nodes:
+                    rm /var/lib/mysql/test
+            12. Wait for at least 1 minutes.
+            13. On Grafana, check the following items:
+                    - the box in the upper left corner of the dashboard
+                     displays 'OKAY' with an green background,
+                    - an annotation telling that the service went from 'WARN'
+                     to 'OKAY' is displayed.
+            14. On Nagios, check the following items:
+                    - the 'mysql' service is in 'OK' state,
+                    - the 'mysql-nodes.mysql-fs' service is in 'OKAY' state
+                     for the 2 nodes,
+                    - the local user root on the lma node has received an
+                     email about the recovery of the service.
+
+        Duration 15m
+        """
+        self.env.revert_snapshot("deploy_ha_toolchain")
+
+        lma_devops_node = self.helpers.get_node_with_vip(
+            self.settings.stacklight_roles,
+            self.helpers.full_vip_name("infrastructure_alerting_mgmt_vip"))
+        toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
+            lma_devops_node)
+        nailgun_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
+            self.helpers.cluster_id, ['controller'])
+
+        url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
+        with self.ui_tester.ui_driver(url, "//frame[2]",
+                                      "Nagios Core") as driver:
+            self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
+                driver, 'Services', "//table[@class='headertable']")
+            self.change_verify_node_service_state(
+                ['mysql', 'mysql-nodes.mysql-fs'], 'WARNING',
+                self.settings.WARN, '96', toolchain_node,
+                [nailgun_nodes[0], nailgun_nodes[1]], driver)
+
+    @test(depends_on_groups=["deploy_ha_toolchain"],
+          groups=["toolchain_critical_alert_node", "node_alert_critical",
+                  "toolchain", "functional"])
+    @log_snapshot_after_test
+    def toolchain_critical_alert_node(self):
+        """Verify that the critical alerts for nodes show up in the
+         Grafana and Nagios UI.
+
+        Scenario:
+            1. Open the Nagios URL
+            2. Open the Grafana URl
+            3. Connect to one of the controller nodes using ssh and run:
+                    fallocate -l $(df | grep /dev/mapper/mysql-root
+                    | awk '{ printf("%.0f\n", 1024 * ((($3 + $4) *
+                    98 / 100) - $3))}') /var/lib/mysql/test
+            4. Wait for at least 1 minute.
+            5. On Grafana, check the following items:
+                    - the box in the upper left corner of the dashboard
+                     displays 'OKAY' with an green background,
+            6. On Nagios, check the following items:
+                    - the 'mysql' service is in 'OK' state,
+                    - the 'mysql-nodes.mysql-fs' service is in 'CRITICAL'
+                     state for the node.
+            7. Connect to a second controller node using ssh and run:
+                    fallocate -l $(df | grep /dev/mapper/mysql-root
+                    | awk '{ printf("%.0f\n", 1024 * ((($3 + $4) *
+                    98 / 100) - $3))}') /var/lib/mysql/test
+            8. Wait for at least 1 minute.
+            9. On Grafana, check the following items:
+                    - the box in the upper left corner of the dashboard
+                     displays 'CRIT' with an orange background,
+                    - an annotation telling that the service went from 'OKAY'
+                     to 'WARN' is displayed.
+            10. On Nagios, check the following items:
+                    - the 'mysql' service is in 'CRITICAL' state,
+                    - the 'mysql-nodes.mysql-fs' service is in 'CRITICAL'
+                     state for the 2 nodes,
+                    - the local user root on the lma node has received an
+                    email about the service
+                    being in warning state.
+            11. Run the following command on both controller nodes:
+                    rm /var/lib/mysql/test
+            12. Wait for at least 1 minutes.
+            13. On Grafana, check the following items:
+                    - the box in the upper left corner of the dashboard
+                     displays 'OKAY' with an green background,
+                    - an annotation telling that the service went from 'CRIT'
+                     to 'OKAY' is displayed.
+            14. On Nagios, check the following items:
+                    - the 'mysql' service is in OK' state,
+                    - the 'mysql-nodes.mysql-fs' service is in 'OKAY' state
+                     for the 2 nodes,
+                    - the local user root on the lma node has received an
+                    email about the recovery of the service.
+
+        Duration 15m
+        """
+        self.env.revert_snapshot("deploy_ha_toolchain")
+
+        lma_devops_node = self.helpers.get_node_with_vip(
+            self.settings.stacklight_roles,
+            self.helpers.full_vip_name("infrastructure_alerting_mgmt_vip"))
+        toolchain_node = self.fuel_web.get_nailgun_node_by_devops_node(
+            lma_devops_node)
+        nailgun_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
+            self.helpers.cluster_id, ['controller'])
+
+        url = self.LMA_INFRASTRUCTURE_ALERTING.get_authenticated_nagios_url()
+        with self.ui_tester.ui_driver(url, "//frame[2]",
+                                      "Nagios Core") as driver:
+            self.LMA_INFRASTRUCTURE_ALERTING.open_nagios_page(
+                driver, 'Services', "//table[@class='headertable']")
+            self.change_verify_node_service_state(
+                ['mysql', 'mysql-nodes.mysql-fs'], 'CRITICAL',
+                self.settings.UNKW, '98', toolchain_node,
+                [nailgun_nodes[0], nailgun_nodes[1]], driver)
diff --git a/stacklight_tests/toolchain/toolchain_settings.py b/stacklight_tests/toolchain/toolchain_settings.py
index 2d77b5a..c7ee014 100644
--- a/stacklight_tests/toolchain/toolchain_settings.py
+++ b/stacklight_tests/toolchain/toolchain_settings.py
@@ -27,6 +27,12 @@ stacklight_roles = (elasticsearch_settings.role_name +
                     collector_settings.role_name +
                     infrastructure_alerting_settings.role_name)
 
+OKAY = 0
+WARN = 1
+UNKW = 2
+CRIT = 3
+DOWN = 4
+
 base_nodes = {
     'slave-01': ['controller'],
     'slave-02': ['compute', 'cinder'],