Igor Degtiarov d4b943b4ff Add autoscaling test for Telemetry plugin
Change-Id: I3d806358ba1a557d8b2765a808a0e6585f4fe74c
2016-10-07 14:57:42 +03:00

550 lines
23 KiB
Python

# Copyright 2016 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import datetime
import os
import ceilometerclient.v2.client
from fuelweb_test.helpers import checkers as fuelweb_checkers
from fuelweb_test import logger
import heatclient.v1.client
import proboscis
from stacklight_tests import base_test
from stacklight_tests.helpers import checkers
from stacklight_tests.helpers import helpers
from stacklight_tests.influxdb_grafana import api as influx_api
from stacklight_tests.openstack_telemetry import plugin_settings
class OpenstackTelemeteryPluginApi(base_test.PluginApi):
def __init__(self):
super(OpenstackTelemeteryPluginApi, self).__init__()
self._ceilometer = None
self._heat_cli = None
@property
def keystone_access(self):
return self.helpers.os_conn.keystone_access
@property
def nova_cli(self):
return self.helpers.os_conn.nova
@property
def neutron_cli(self):
return self.helpers.os_conn.neutron
@property
def auth_url(self):
return self.keystone_access.service_catalog.url_for(
service_type='identity', service_name='keystone',
interface='internal')
@property
def heat_cli(self):
if self._heat_cli is None:
endpoint = self.keystone_access.service_catalog.url_for(
service_type='orchestration', service_name='heat',
interface='internal')
if not endpoint:
raise self.helpers.NotFound(
"Cannot find Heat endpoint")
self._heat_cli = heatclient.v1.client.Client(
auth_url=self.auth_url,
endpoint=endpoint,
token=(lambda: self.keystone_access.auth_token)()
)
return self._heat_cli
@property
def ceilometer_client(self):
if self._ceilometer is None:
endpoint = self.keystone_access.service_catalog.url_for(
service_type='metering', service_name='ceilometer',
interface='internal')
if not endpoint:
raise self.helpers.NotFound(
"Cannot find Ceilometer endpoint")
aodh_endpoint = self.keystone_access.service_catalog.url_for(
service_type='alarming', service_name='aodh',
interface='internal')
if not aodh_endpoint:
raise self.helpers.NotFound(
"Cannot find AODH (alarm) endpoint")
self._ceilometer = ceilometerclient.v2.Client(
aodh_endpoint=aodh_endpoint,
auth_url=self.auth_url,
endpoint=endpoint,
token=lambda: self.keystone_access.auth_token)
return self._ceilometer
def get_plugin_settings(self):
return plugin_settings
def prepare_plugin(self):
self.helpers.prepare_plugin(self.settings.plugin_path)
def activate_plugin(self, options=None):
if options is None:
options = self.settings.default_options
self.helpers.activate_plugin(
self.settings.name, self.settings.version, options)
def is_kafka_enabled(self):
return (
self.fuel_web.get_nailgun_cluster_nodes_by_roles(
self.helpers.cluster_id, ["kafka"])
)
def check_services_on_nodes(self, services, node_role):
node_ips = [
node["ip"] for node in
self.fuel_web.get_nailgun_cluster_nodes_by_roles(
self.helpers.cluster_id, [node_role])]
logger.info("Check {} services on {} nodes".format(
services, node_role))
for ip in node_ips:
for service in services:
fuelweb_checkers.verify_service(
ip, service, ignore_count_of_proccesses=True)
def check_plugin_online(self):
non_ha_pcmk_resources = ["p_ceilometer-agent-central",
"p_aodh-evaluator"]
ha_pcmk_resources = ["telemetry-collector-heka"]
controller_services = ["ceilometer-agent-notification",
"ceilometer-api", "aodh-api"]
compute_services = ["ceilometer-polling"]
if self.is_kafka_enabled():
kafka_services = ["telemetry-collector-hindsight"]
self.check_services_on_nodes(kafka_services, "kafka")
ha_pcmk_resources = non_ha_pcmk_resources
else:
logger.info("Check {} pacemaker resources".format(
non_ha_pcmk_resources))
for resource in non_ha_pcmk_resources:
self.helpers.check_pacemaker_resource(
resource, "controller", is_ha=False)
logger.info(
"Check {} pacemaker resources".format(ha_pcmk_resources))
for resource in ha_pcmk_resources:
self.helpers.check_pacemaker_resource(resource, "controller")
self.check_services_on_nodes(controller_services, "controller")
self.check_services_on_nodes(compute_services, "compute")
logger.info("Check Ceilometer API")
keystone_access = self.helpers.os_conn.keystone_access
endpoint = keystone_access.service_catalog.url_for(
service_type="metering", service_name="ceilometer",
interface="internal")
if not endpoint:
raise helpers.NotFound("Cannot find Ceilometer endpoint")
headers = {
"X-Auth-Token": keystone_access.auth_token,
"content-type": "application/json"
}
checkers.check_http_get_response("{}/v2/capabilities".format(endpoint),
headers=headers)
logger.info("Check Ceilometer database in InfluxDB")
influx_api.InfluxdbPluginApi().do_influxdb_query(
"show measurements", db="ceilometer")
def uninstall_plugin(self):
return self.helpers.uninstall_plugin(self.settings.name,
self.settings.version)
def check_uninstall_failure(self):
return self.helpers.check_plugin_cannot_be_uninstalled(
self.settings.name, self.settings.version)
def check_ceilometer_sample_functionality(self):
logger.info("Start checking Ceilometer Samples API")
fail_msg = "Failed to get sample list."
msg = "getting samples list"
self.helpers.verify(60, self.ceilometer_client.new_samples.list, 1,
fail_msg, msg, limit=10)
fail_msg = "Failed to get statistic of metric."
msg = "getting statistic of metric"
an_hour_ago = (datetime.datetime.now() -
datetime.timedelta(hours=1)).isoformat()
query = [{"field": "timestamp", "op": "gt", "value": an_hour_ago}]
self.helpers.verify(600, self.ceilometer_client.statistics.list, 2,
fail_msg, msg, meter_name="image", q=query)
def check_ceilometer_alarm_functionality(self):
logger.info("Start checking Ceilometer AODH(Alarms) API")
fail_msg = "Failed to create alarm."
msg = "creating alarm"
alarm = self.helpers.verify(60, self.create_alarm, 1, fail_msg, msg,
meter_name="image",
threshold=0.9,
name="ceilometer-fake-alarm",
period=600,
statistic="avg",
comparison_operator="lt")
fail_msg = "Failed to get alarm."
msg = "getting alarm"
self.helpers.verify(60, self.ceilometer_client.alarms.get, 2,
fail_msg, msg, alarm_id=alarm.alarm_id)
fail_msg = "Failed while waiting for alarm state to become 'ok'."
msg = "waiting for alarm state to become 'ok'"
self.helpers.verify(1000, self.check_alarm_state, 3,
fail_msg, msg, alarm_id=alarm.alarm_id, state="ok")
fail_msg = "Failed to update alarm."
msg = "updating alarm"
self.helpers.verify(60, self.ceilometer_client.alarms.update, 4,
fail_msg, msg, alarm_id=alarm.alarm_id,
threshold=1.1)
fail_msg = "Failed while waiting for alarm state to become 'alarm'."
msg = "waiting for alarm state to become 'alarm'"
self.helpers.verify(1000, self.check_alarm_state, 5,
fail_msg, msg, alarm_id=alarm.alarm_id,
state="alarm")
fail_msg = "Failed to get alarm history."
msg = "getting alarm history"
self.helpers.verify(60, self.ceilometer_client.alarms.get_history, 6,
fail_msg, msg, alarm_id=alarm.alarm_id)
fail_msg = "Failed to delete alarm."
msg = "deleting alarm"
self.helpers.verify(60, self.ceilometer_client.alarms.delete, 7,
fail_msg, msg, alarm_id=alarm.alarm_id)
def check_ceilometer_event_functionality(self):
logger.info("Start checking Ceilometer Events API")
fail_msg = "Failed to get event list."
msg = "getting event list"
events_list = self.helpers.verify(
600, self.ceilometer_client.events.list, 1, fail_msg, msg,
limit=10
)
event_type = events_list[0].event_type
message_id = events_list[0].message_id
traits = events_list[0].traits
fail_msg = ("Failed to find '{event_type}' event type in certain "
"'{message_id}' event.".format(event_type=event_type,
message_id=message_id))
msg = ("searching '{event_type}' event type in certain "
"'{message_id}' event.".format(event_type=event_type,
message_id=message_id))
self.helpers.verify(60, self.ceilometer_client.events.get, 2, fail_msg,
msg, message_id=message_id)
fail_msg = "Failed to get event types list."
msg = "getting event types list"
self.helpers.verify(60, self.ceilometer_client.event_types.list, 3,
fail_msg, msg)
fail_msg = "Failed to get trait list."
msg = "getting trait list"
self.helpers.verify(60, self.ceilometer_client.traits.list, 4,
fail_msg, msg, event_type=event_type,
trait_name=traits[0]["name"])
fail_msg = "Failed to check event traits description."
msg = "checking event traits description"
self.helpers.verify(60, self.ceilometer_client.trait_descriptions.list,
5, fail_msg, msg, event_type=event_type)
def check_ceilometer_resource_functionality(self):
logger.info("Start checking Ceilometer Resource API")
fail_msg = "Failed to get resource list."
msg = "getting resources list"
resources_list = self.helpers.verify(
600, self.ceilometer_client.resources.list, 1, fail_msg, msg,
limit=10)
for resource in resources_list:
# We need to check that resource_id doesn't contain char '/'
# because if it is GET resource request fails
if "/" not in resource.resource_id:
resource_id = resource.resource_id
break
fail_msg = ("Failed to find '{}' resource with certain resource "
"ID.".format(resource_id))
msg = ("searching '{}' resource with certain resource "
"ID".format(resource_id))
self.helpers.verify(60, self.ceilometer_client.resources.get, 2,
fail_msg, msg, resource_id=resource_id)
fail_msg = "Failed to get meters list."
msg = "getting meters list"
self.helpers.verify(60, self.ceilometer_client.meters.list, 3,
fail_msg, msg, limit=10)
fail_msg = "Failed to get unique meters list."
msg = "getting unique meters list"
self.helpers.verify(60, self.ceilometer_client.meters.list, 4,
fail_msg, msg, limit=10, unique=True)
def check_ceilometer_autoscaling(self):
logger.info("Start checking autoscaling")
# check required resources available
self._check_required_resources()
# create test flavor
fail_msg = "Failed to create test heat flavor"
heat_flavor = self.helpers.verify(
60, self.helpers.os_conn.create_flavor, 1, fail_msg,
"creating test heat flavor",
name="ostf_test-flavor-autoscaling", ram=256, vcpus=1, disk=2
)
# create keypair
fail_msg = "Failed to create test keypair"
keypair = self.helpers.verify(
60, self.helpers.os_conn.create_key, 2, fail_msg,
"creating test keypair", key_name="ostf_test-keypair-autoscaling")
# create security group
fail_msg = "Failed to create test seurity group"
msg = "creating test security group"
sec_group = self.helpers.verify(60, self._create_securtity_group, 3,
fail_msg, msg)
parameters = {
'KeyName': keypair.name,
'InstanceType': heat_flavor.name,
'ImageId': "TestVM",
'SecurityGroup': sec_group.name
}
net_provider = self.helpers.nailgun_client.get_cluster(
self.helpers.cluster_id)["net_provider"]
if "neutron" in net_provider:
template = self._load_template("heat_autoscaling_neutron.yaml")
fail_msg = "Failed to create test network resources"
msg = "creating network resources"
parameters['Net'] = self.helpers.verify(
60, self._create_network_resources, 4, fail_msg, msg,
tenant_id=self.keystone_access.tenant_id)
else:
template = self._load_temlate("heat_autoscaling_nova.yaml")
# create Heat stack
fail_msg = "Failed to create Heat stack"
msg = "creating Heat stack"
stack_name = 'ostf_test-heat-stack'
stack_id = self.helpers.verify(60, self.heat_cli.stacks.create, 5,
fail_msg, msg,
stack_name=stack_name,
template=template,
parameters=parameters,
disable_rollback=True)['stack']['id']
# get Heat stack
fail_msg = "Failed to get Heat stack"
msg = "getting Heat stack"
stack = self.helpers.verify(60, self.heat_cli.stacks.get, 6, fail_msg,
msg, stack_id=stack_id)
# check stack creation comleted
fail_msg = "Stack was not created properly."
self.helpers.verify(
600, self._check_stack_status,
6, fail_msg,
"stack status becoming 'CREATE_COMPLETE'",
stack_id=stack_id, status="CREATE_COMPLETE"
)
# getting instances list
reduced_stack_name = "{0}-{1}".format(
stack.stack_name[:2], stack.stack_name[-4:])
instances = self.helpers.verify(60, self._get_instances_by_name_mask,
7, "Failed to get instances list",
"getting instances list",
mask_name=reduced_stack_name)
# launching the second instance during autoscaling
fail_msg = "Failed to launch the 2nd instance per autoscaling alarm."
msg = "launching the new instance per autoscaling alarm"
self.helpers.verify(
1500, self._check_instance_scaling, 8, fail_msg, msg,
exp_lenght=(len(instances) + 2),
reduced_stack_name=reduced_stack_name
)
# termination of the second instance during autoscaling
fail_msg = ("Failed to terminate the 2nd instance per autoscaling "
"alarm.")
msg = "terminating the 2nd instance per autoscaling alarm"
self.helpers.verify(
1500, self._check_instance_scaling, 9, fail_msg, msg,
exp_lenght=(len(instances) + 1),
reduced_stack_name=reduced_stack_name
)
# delete Heat stack
self.helpers.verify(60, self.heat_cli.stacks.delete, 10,
"Failed to delete Heat stack",
"deleting Heat stack", stack_id=stack_id)
self.helpers.verify(
600, self._check_instance_scaling, 11,
"Not all stack instances was deleted",
"checking all stack instances was deleted",
exp_lenght=(len(instances) - 1),
reduced_stack_name=reduced_stack_name
)
def _create_securtity_group(self, name="ostf_test-secgroup-autoscaling"):
logger.info("Creating test security group for Heat autoscaling...")
sg_desc = name + " description"
sec_group = None
for sgp in self.nova_cli.security_groups.list():
if name == sgp.name:
sec_group = sgp
break
if not sec_group:
sec_group = self.nova_cli.security_groups.create(name, sg_desc)
return sec_group
def _create_network_resources(self, tenant_id):
"""This method creates network resources.
It creates a network, an internal subnet on the network, a router and
links the network to the router. All resources created by this method
will be automatically deleted.
"""
logger.info("Creating network resources...")
net_name = "ostf-autoscaling-test-service-net"
net_body = {
"network": {
"name": net_name,
"tenant_id": tenant_id
}
}
ext_net = None
net = None
for network in self.neutron_cli.list_networks()["networks"]:
if not net and network["name"] == net_name:
net = network
if not ext_net and network["router:external"]:
ext_net = network
if not net:
net = self.neutron_cli.create_network(net_body)["network"]
subnet = self.helpers.os_conn.create_subnet(
"sub" + net_name, net["id"], "10.1.7.0/24", tenant_id=tenant_id
)
router_name = 'ostf-autoscaling-test-service-router'
router = self.helpers.os_conn.create_router(
router_name, self.helpers.os_conn.get_tenant("admin"))
self.neutron_cli.add_interface_router(
router["id"], {"subnet_id": subnet["id"]})
return net["id"]
@staticmethod
def _load_template(file_name):
"""Load specified template file from etc directory."""
filepath = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'../../fixtures/autoscaling_templates', file_name)
with open(filepath) as f:
return f.read()
def create_alarm(self, **kwargs):
for alarm in self.ceilometer_client.alarms.list():
if alarm.name == kwargs['name']:
self.ceilometer_client.alarms.delete(alarm.alarm_id)
return self.ceilometer_client.alarms.create(**kwargs)
def check_alarm_state(self, alarm_id, state=None):
try:
alarm_state = self.ceilometer_client.alarms.get_state(
alarm_id)
except Exception:
alarm_state = None
if state:
if alarm_state == state:
return True
elif alarm_state == 'alarm' or 'ok':
return True
return False
def _check_instance_scaling(self, exp_lenght, reduced_stack_name):
return exp_lenght == len(self._get_instances_by_name_mask(
reduced_stack_name))
def _check_stack_status(self, stack_id, status):
try:
stack_status = self.heat_cli.stacks.get(stack_id).stack_status
except Exception:
stack_status = None
if stack_status and stack_status == status:
return True
return False
def _get_instances_by_name_mask(self, mask_name):
"""This method retuns list of instances with certain names."""
instances = []
instance_list = self.nova_cli.servers.list()
logger.info('Instances list is {0}'.format(instance_list))
logger.info(
'Expected instance name should inlude {0}'.format(mask_name))
for inst in instance_list:
logger.info('Instance name is {0}'.format(inst.name))
if inst.name.startswith(mask_name):
instances.append(inst)
return instances
def _get_info_about_available_resources(self, min_ram, min_hdd, min_vcpus):
"""This function allows to get the information about resources.
We need to collect the information about available RAM, HDD and vCPUs
on all compute nodes for cases when we will create more than 1 VM.
This function returns the count of VMs with required parameters which
we can successfully run on existing cloud.
"""
vms_count = 0
for hypervisor in self.nova_cli.hypervisors.list():
if hypervisor.free_ram_mb >= min_ram:
if hypervisor.free_disk_gb >= min_hdd:
if hypervisor.vcpus - hypervisor.vcpus_used >= min_vcpus:
# We need to determine how many VMs we can run
# on this hypervisor
free_cpu = hypervisor.vcpus - hypervisor.vcpus_used
k1 = int(hypervisor.free_ram_mb / min_ram)
k2 = int(hypervisor.free_disk_gb / min_hdd)
k3 = int(free_cpu / min_vcpus)
vms_count += min(k1, k2, k3)
return vms_count
def _check_required_resources(self, min_required_ram_mb=4096,
hdd=40, vCpu=2):
vms_count = self._get_info_about_available_resources(
min_required_ram_mb, hdd, vCpu)
if vms_count < 1:
msg = ('This test requires more hardware resources of your '
'OpenStack cluster: your cloud should allow to create '
'at least 1 VM with {0} MB of RAM, {1} HDD and {2} vCPUs. '
'You need to remove some resources or add compute nodes '
'to have an ability to run this OSTF test.'
.format(min_required_ram_mb, hdd, vCpu))
raise proboscis.SkipTest(msg)