Merge "Added new atomic scenarios"

2015-06-16 09:23:35 +00:00 · 2015-06-16 09:23:35 +00:00 · fc4e2f4c88
commit fc4e2f4c88
parent 7144793314 51f973e460
18 changed files with 269 additions and 242 deletions
--- a/.gitignore
+++ b/.gitignore
@ -18,3 +18,4 @@ doc/source/api/
 .testrepository/
 .tox/
 .installed
+.idea*
--- a/README.md
+++ b/README.md
@ -7,36 +7,30 @@ Introduction
 HAOS is a suite of HA/destructive tests for OpenStack clouds. These tests
 are written as Rally plugins and are executed by Rally and in
 parallel with the load/performance tests to simulate some disaster/failover
-scenarios with the OpenStack clouds. HAOS uses Shaker for remote execution
-of commands on OpenStack nodes and for data-plane performance load.
+scenarios with the OpenStack clouds. HAOS uses HAOS agent for remote execution
+of commands on OpenStack nodes and virtual machines in the cloud.


 How to install
 --------------

 1. Clone the repository:
-```bash
-git clone git://git.openstack.org/stackforge/haos
-```
-
-2. Make sure that ``sshpass`` is installed - on Ubuntu do ``sudo apt-get install sshpass``
-
-3. Fill in your ``openrc`` file based on the sample provided in ``etc/openrc``
-
+``git clone git://git.openstack.org/stackforge/haos``
+2. Make sure that ``sshpass`` is installed - for example, on Ubuntu execute the following command: ``sudo apt-get install sshpass``
+3. Edit etc/openrc.local file, set IP addresses, credentials and parameters for your cloud
 4. Import ``openrc`` into your environment by doing
-```bash
-source etc/openrc.local
-```
-
+``source etc/openrc.local``
 5. Run tox:
-```bash
-tox -erun
-```
+``tox -e run``

 How to run tests
 ----------------

 Run scenario with the command:
-```bash
-tox -erun <scenario>
-```
+``tox -e run <scenario>``
+
+How to run tests on MOS environments
+------------------------------------
+
+Run scenario with the command:
+``tox -e run-for-mos <scenario>``
--- a/haos/rally/context/cloud.py
+++ b/haos/rally/context/cloud.py
@ -1,62 +0,0 @@
-# Copyright (c) 2015 Mirantis Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-from rally.benchmark.context import base
-from rally.benchmark.context.cleanup import manager as resource_manager
-from rally.common import log as logging
-from rally import consts
-
-from haos.remote import server
-
-
-LOG = logging.getLogger(__name__)
-
-
-@base.context(name="cloud", order=800)
-class CloudNodesContext(base.Context):
-    """This context allows to define the list of nodes in the cloud."""
-
-    CONFIG_SCHEMA = {
-        "type": "object",
-        "$schema": consts.JSON_SCHEMA,
-        "additionalProperties": False,
-        "properties": {
-        }
-    }
-
-    def setup(self):
-        env_vars = {
-            'HAOS_SERVER_ENDPOINT': None,
-            'HAOS_IMAGE': None,
-            'HAOS_FLAVOR': None,
-            'HAOS_JOIN_TIMEOUT': 100,
-            'HAOS_COMMAND_TIMEOUT': 10
-        }
-
-        for var, def_value in env_vars.items():
-            value = os.environ.get(var) or def_value
-            if not value:
-                raise ValueError('Env var %s must be set', var)
-            self.context[var.lower()] = value
-
-        boss_inst = server.Server(self.context["haos_server_endpoint"])
-        self.context["haos_remote_control"] = boss_inst.remote_control
-
-    def cleanup(self):
-        """This method is called after the task finish."""
-        resource_manager.cleanup(names=["nova.servers"],
-                                 users=self.context.get("users", []))
--- a/haos/rally/context/cloud_nodes_context.py
+++ b/haos/rally/context/cloud_nodes_context.py
@ -1,5 +1,16 @@
+import os
+
 from rally.benchmark.context import base
+from rally.benchmark.context.cleanup import manager as resource_manager
+from rally.common import log as logging
 from rally import consts
+from rally import exceptions
+
+from haos.remote import server
+from haos.remote import ssh_remote_control
+
+
+LOG = logging.getLogger(__name__)


@base.context(name="cloud_nodes", order=800)
@ -19,21 +30,9 @@ class CloudNodesContext(base.Context):
                "type": "object",
                "default": {}
            },
-            "shaker_endpoint": {
+            "remote_control_type": {
                "type": "string",
-                "default": ""
-            },
-            "shaker_image": {
-                "type": "string",
-                "default": "TestVM"
-            },
-            "default_flavor": {
-                "type": "string",
-                "default": "m1.micro"
-            },
-            "shaker": {
-                "type": "object",
-                "default": {}
+                "default": "ssh"
            }
        }
    }
@ -41,12 +40,40 @@ class CloudNodesContext(base.Context):
    def setup(self):
        """This method is called before the task start."""
        self.context["controllers"] = self.config.get("controllers")
+        remote_control_type = self.config.get("remote_control_type")
+        self.context["remote_control_type"] = remote_control_type
        power_control_node = self.config.get("power_control_node")
        self.context["power_control_node"] = power_control_node
-        self.context["shaker_endpoint"] = self.config.get("shaker_endpoint")
-        self.context["shaker_image"] = self.config.get("shaker_image")
-        self.context["default_flavor"] = self.config.get("default_flavor")
+
+        env_vars = {
+            'HAOS_SERVER_ENDPOINT': None,
+            'HAOS_IMAGE': None,
+            'HAOS_FLAVOR': None,
+            'HAOS_JOIN_TIMEOUT': 100,
+            'HAOS_COMMAND_TIMEOUT': 10
+        }
+
+        for var, def_value in env_vars.items():
+            value = os.environ.get(var) or def_value
+            if value:
+                self.context[var.lower()] = value
+            else:
+                LOG.debug('Env var %s must be set'.format(var))
+
+        if self.context["remote_control_type"] == "ssh":
+            ssh = ssh_remote_control.SSHConnection()
+            self.context["haos_remote_control"] = ssh.remote_control
+        elif self.context["remote_control_type"] == "haos_agents":
+            boss_inst = server.Server(self.context["haos_server_endpoint"])
+            self.context["haos_remote_control"] = boss_inst.remote_control
+        else:
+            msg = "remote_control_type {0} doesn't implemented yet.".format(
+                self.context["remote_control_type"]
+            )
+            raise exceptions.RallyException(msg)

    def cleanup(self):
        """This method is called after the task finish."""
        self.context["controllers"] = []
+        resource_manager.cleanup(names=["nova.servers"],
+                                 users=self.context.get("users", []))
--- a/haos/rally/context/recover_cloud_context.py
+++ b/haos/rally/context/recover_cloud_context.py
@ -1,8 +1,6 @@
 from rally.benchmark.context import base
 from rally import consts

-from haos.rally.utils import run_command
-

@base.context(name="recover_cloud", order=900)
 class CloudNodesContext(base.Context):
@ -20,31 +18,6 @@ class CloudNodesContext(base.Context):
        }
    }

-    def check_rabbitmq_cluster_status(self, controllers):
-        command = "rabbitmqctl cluster_status"
-
-        for controller in controllers:
-            nodes = []
-            active_nodes = []
-
-            output = run_command(self.context, controller["agent_endpoint"],
-                                 command)
-            rabbit_nodes = lambda str: [node for node in str.split("'")
-                                        if "rabbit" in node]
-            for line in output.splitlines():
-                if "running_nodes" in line:
-                    active_nodes = rabbit_nodes(line)
-                elif "nodes" in line:
-                    nodes = rabbit_nodes(line)
-
-            if not nodes or len(active_nodes) < len(nodes):
-                return False
-
-            for node in nodes:
-                if node not in active_nodes:
-                    return False
-        return True
-
    def setup(self):
        """This method is called before the task start."""
        self.context["recover_commands"] = []
@ -53,12 +26,3 @@ class CloudNodesContext(base.Context):
    def cleanup(self):
        """This method is called after the task finish."""
        pass
-        # for action in self.context["recover_commands"]:
-        #     run_command(self.context, action["node"], action["command"],
-        #                 action["executor"])
-        #     time.sleep(action.get("timeout", 0))
-        #
-        # controllers = self.context["controllers"]
-        # if "rabbitmq_cluster_status" in self.context["checks"]:
-        #     if self.check_rabbitmq_cluster_status(controllers) is False:
-        #         raise Exception("RabbitMQ cluster wasn't recovered")
--- a/haos/rally/plugin/base_disaster.py
+++ b/haos/rally/plugin/base_disaster.py
@ -161,7 +161,7 @@ class BaseDisaster(neutron_utils.NeutronScenario,

    # Add tcp rule for 22 port and icmp rule
    def add_rules_for_ping(self):
-        #self._clients = self._admin_clients
+        # self._clients = self._admin_clients
        sec_groups = self._list_security_groups()

        self.clients("nova").security_group_rules.create(
--- a/haos/rally/plugin/neutron_disaster.py
+++ b/haos/rally/plugin/neutron_disaster.py
@ -223,7 +223,7 @@ class NeutronDisaster(base_disaster.BaseDisaster):
        vm1_floating_ip = self.define_floating_ip_for_vm(vm1, net1_name)
        vm2_floating_ip = self.define_floating_ip_for_vm(vm2, net2_name)

-         # Find primary controller
+        # Find primary controller
        primary_context_controller = None
        primary_controller = self.find_primary_controller()
        for controller in self.context["controllers"]:
@ -435,7 +435,7 @@ class NeutronDisaster(base_disaster.BaseDisaster):
        vm1_floating_ip = self.define_floating_ip_for_vm(vm1, net1_name)
        vm2_floating_ip = self.define_floating_ip_for_vm(vm2, net2_name)

-         # Find primary controller
+        # Find primary controller
        primary_controller = self.find_primary_controller()
        for controller in self.context["controllers"]:
            if controller['agent_endpoint'] == primary_controller:
@ -555,7 +555,7 @@ class NeutronDisaster(base_disaster.BaseDisaster):
        vm1_floating_ip = self.define_floating_ip_for_vm(vm1, net1_name)
        vm2_floating_ip = self.define_floating_ip_for_vm(vm2, net2_name)

-         # Find primary controller
+        # Find primary controller
        non_primary_context_controller = self.find_non_primary_controller()
        non_primary_controller = \
            non_primary_context_controller['agent_endpoint']
--- a/haos/rally/plugin/power_off_random_controller.py
+++ b/haos/rally/plugin/power_off_random_controller.py
@ -0,0 +1,34 @@
+import random
+import time
+
+from haos.rally.plugin import base_disaster
+from rally.benchmark.scenarios import base
+from rally.common import log as logging
+
+
+LOG = logging.getLogger(__name__)
+
+
+class ControllerShutdown(base_disaster.BaseDisaster):
+
+    @base.scenario()
+    def power_off_and_on_one_controller(self):
+        """This scenario selects one controller and shutdown it
+
+        Controller will be selected randomly, after the shutdown
+        this controller will be started again.
+
+        Setup:
+        OpenStack cloud with at least 3 controllers.
+        """
+        controller_id = random.randint(0, len(self.context["controllers"]) - 1)
+        controller = self.context["controllers"][controller_id]
+        power_control_node = self.context["power_control_node"]
+
+        self.run_remote_command(power_control_node,
+                                command=controller["hardware_power_off_cmd"])
+        time.sleep(controller["power_off_timeout"])
+
+        self.run_remote_command(power_control_node,
+                                command=controller["hardware_power_on_cmd"])
+        time.sleep(controller["power_on_timeout"])
--- a/haos/rally/plugin/rabbitmq_disaster.py
+++ b/haos/rally/plugin/rabbitmq_disaster.py
@ -1,26 +0,0 @@
-import random
-
-from rally.benchmark.scenarios import base
-
-from haos.rally.plugin import base_disaster
-
-
-class RabbitMQDisaster(base_disaster.BaseDisaster):
-
-    @base.scenario()
-    def power_off_one_controller(self):
-        """Poweroff one contoller and verify cloud
-
-        Setup:
-        OpenStack cloud with at least 3 controllers
-
-        Scenario:
-        1. Poweroff one controller
-        2. Verify cloud: create VM 10 times
-        """
-
-        controller_id = random.randint(0, len(self.context["controllers"]) - 1)
-        self.power_off_controller(controller_id)
-
-        for i in xrange(0, 10):
-            self.boot_server("test{0}".format(i))
--- a/haos/rally/plugin/run_command_on_random_controller.py
+++ b/haos/rally/plugin/run_command_on_random_controller.py
@ -0,0 +1,24 @@
+import random
+
+from haos.rally.plugin import base_disaster
+from rally.benchmark.scenarios import base
+from rally.common import log as logging
+
+
+LOG = logging.getLogger(__name__)
+
+
+class RunCommand(base_disaster.BaseDisaster):
+
+    @base.scenario()
+    def run_command_on_random_controller(self, command='', timeout=300):
+        """This scenario executes bash command on random controller
+
+        :param command: command which should be executed
+        :param timeout: how long we will wait for command execution
+        """
+        controller_id = random.randint(0, len(self.context["controllers"]) - 1)
+        controller = self.context["controllers"][controller_id]
+
+        LOG.info('Running command on controller: %s', controller)
+        self.run_remote_command(controller, command, timeout)
--- a/haos/remote/ssh_remote_control.py
+++ b/haos/remote/ssh_remote_control.py
@ -0,0 +1,29 @@
+import paramiko
+
+from rally.common import log as logging
+
+
+LOG = logging.getLogger(__name__)
+
+
+def run(host, username, password, command, timeout):
+    msg = 'Running command "{0}" on server {1}'
+    LOG.info(msg.format(command, host))
+
+    ssh = paramiko.SSHClient()
+    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+    ssh.connect(host, username=username, password=password)
+    _, ssh_stdout, ssh_stderr = ssh.exec_command(command, timeout=timeout)
+
+    while not ssh_stdout.channel.exit_status_ready():
+        "Wait while all commands will be finished successfully."
+        pass
+
+    return ssh_stdout, ssh_stderr
+
+
+class SSHConnection(object):
+
+    def remote_control(self, host, command, timeout=30):
+        return run(host["public_ip"], host["user"], host["password"], command,
+                   timeout)
--- a/haos/scenarios/block_mysql_port_on_random_controller.json
+++ b/haos/scenarios/block_mysql_port_on_random_controller.json
@ -0,0 +1,36 @@
+{
+  "RunCommand.run_command_on_random_controller": [
+    {
+      "args": {
+        "command": "iptables -I INPUT 1 -p tcp --destination-port galeracheck -j DROP && sleep 20 && iptables -D INPUT -p tcp --destination-port galeracheck -j DROP",
+        "timeout": 300
+      },
+      "runner": {
+        "type": "serial",
+        "times": 10
+      },
+      "context": {
+        "cloud_nodes": {
+          "controllers": [
+            {
+              "public_ip": "172.16.0.4",
+              "user": "root",
+              "password": "secret"
+            },
+            {
+              "public_ip": "172.16.0.5",
+              "user": "root",
+              "password": "secret"
+            },
+            {
+              "public_ip": "172.16.0.4",
+              "user": "root",
+              "password": "secret"
+            }
+          ],
+          "remote_control_type": "ssh"
+        }
+      }
+    }
+  ]
+}
--- a/haos/scenarios/block_rabbitmq_port_on_random_controller.json
+++ b/haos/scenarios/block_rabbitmq_port_on_random_controller.json
@ -0,0 +1,36 @@
+{
+  "RunCommand.run_command_on_random_controller": [
+    {
+      "args": {
+        "command": "iptables -I INPUT 1 -p tcp --destination-port 5673 -j DROP && sleep 20 && iptables -D INPUT -p tcp --destination-port 5673 -j DROP",
+        "timeout": 300
+      },
+      "runner": {
+        "type": "serial",
+        "times": 10
+      },
+      "context": {
+        "cloud_nodes": {
+          "controllers": [
+            {
+              "public_ip": "172.16.0.4",
+              "user": "root",
+              "password": "secret"
+            },
+            {
+              "public_ip": "172.16.0.5",
+              "user": "root",
+              "password": "secret"
+            },
+            {
+              "public_ip": "172.16.0.4",
+              "user": "root",
+              "password": "secret"
+            }
+          ],
+          "remote_control_type": "ssh"
+        }
+      }
+    }
+  ]
+}
--- a/haos/scenarios/destroy_non_primary_controller.json
+++ b/haos/scenarios/destroy_non_primary_controller.json
@ -1,41 +0,0 @@
-{
-  "NeutronDisaster.destroy_non_primary_controller": [
-    {
-      "runner": {
-        "type": "serial",
-        "times": 1
-      },
-      "context": {
-        "users": {
-          "tenants": 1,
-          "users_per_tenant": 1
-        },
-        "roles":[
-          "admin"
-        ],
-        "recover_cloud": {
-          "checks": ["rabbitmq_cluster_status"]
-        },
-        "network": {
-          "networks_per_tenant": 2
-        },
-        "cloud_nodes": {
-          "controllers": [
-            {
-              "agent_endpoint": "node-1.domain.tld"
-            },
-            {
-              "agent_endpoint": "node-2.domain.tld"
-            },
-            {
-              "agent_endpoint": "node-3.domain.tld"
-            }
-          ],
-          "shaker_endpoint": "172.18.76.21:5999",
-          "shaker_image": "2fb29a22-b351-4466-83ff-21446097b8c9",
-          "default_flavor": "18"
-        }
-      }
-    }
-  ]
-}
--- a/haos/scenarios/drop_rabbitmq_port.json
+++ b/haos/scenarios/drop_rabbitmq_port.json
@ -1,35 +0,0 @@
-{
-  "NeutronDisaster.drop_rabbit_port": [
-    {
-      "runner": {
-        "type": "serial",
-        "times": 1
-      },
-      "context": {
-        "users": {
-          "tenants": 1,
-          "users_per_tenant": 1
-        },
-        "recover_cloud": {
-          "checks": ["rabbitmq_cluster_status"]
-        },
-        "cloud_nodes": {
-          "controllers": [
-            {
-              "agent_endpoint": "node-1.domain.tld"
-            },
-            {
-              "agent_endpoint": "node-2.domain.tld"
-            },
-            {
-              "agent_endpoint": "node-3.domain.tld"
-            }
-          ],
-          "shaker_endpoint": "172.18.76.21:5999",
-          "shaker_image": "2fb29a22-b351-4466-83ff-21446097b8c9",
-          "default_flavor": "18"
-        }
-      }
-    }
-  ]
-}
--- a/haos/scenarios/power_off_and_on_random_controller.json
+++ b/haos/scenarios/power_off_and_on_random_controller.json
@ -0,0 +1,40 @@
+{
+  "ControllerShutdown.power_off_and_on_one_controller": [
+    {
+      "runner": {
+        "type": "serial",
+        "times": 1
+      },
+      "context": {
+        "cloud_nodes": {
+          "controllers": [
+            {
+              "hardware_power_on_cmd": "VBoxManage startvm fuel-slave-1 --type headless",
+              "hardware_power_off_cmd": "VBoxManage controlvm fuel-slave-1 poweroff",
+              "power_off_timeout": 180,
+              "power_on_timeout": 90
+            },
+            {
+              "hardware_power_on_cmd": "VBoxManage startvm fuel-slave-2 --type headless",
+              "hardware_power_off_cmd": "VBoxManage controlvm fuel-slave-2 poweroff",
+              "power_off_timeout": 180,
+              "power_on_timeout": 90
+            },
+            {
+              "hardware_power_on_cmd": "VBoxManage startvm fuel-slave-3 --type headless",
+              "hardware_power_off_cmd": "VBoxManage controlvm fuel-slave-3 poweroff",
+              "power_off_timeout": 180,
+              "power_on_timeout": 90
+            }
+          ],
+          "power_control_node": {
+            "public_ip": "172.18.78.30",
+            "user": "xwizard",
+            "password": "xWizard707"
+          },
+          "remote_control_type": "ssh"
+        }
+      }
+    }
+  ]
+}
--- a/tools/run_rally.sh
+++ b/tools/run_rally.sh
@ -5,5 +5,5 @@ TOP_DIR=$(cd $(dirname "$0") && pwd)
 SCENARIO=$1

 if [ ! -z ${SCENARIO} ]; then
-    rally --verbose --plugin-path ${TOP_DIR}/../haos/rally/context,${TOP_DIR}/../haos/rally/plugin task start ${SCENARIO}
+    rally --debug --plugin-path ${TOP_DIR}/../haos/rally/context,${TOP_DIR}/../haos/rally/plugin task start ${SCENARIO}
 fi
--- a/tox.ini
+++ b/tox.ini
@ -23,6 +23,12 @@ commands =
        bash tools/run_rally.sh {posargs}
 whitelist_externals = bash

+[testenv:run-for-mos]
+commands =
+        bash tools/make_env.sh
+        bash tools/run_rally.sh {posargs}
+whitelist_externals = bash
+
 [flake8]
 # E125 continuation line does not distinguish itself from next logical line
 ignore = E125