Merge "Add load balancer pool member evaluator"
This commit is contained in:
commit
b892f23703
153
aodh/evaluator/loadbalancer.py
Normal file
153
aodh/evaluator/loadbalancer.py
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
# Copyright 2019 Catalyst Cloud Ltd.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
from dateutil import parser
|
||||||
|
from octaviaclient.api.v2 import octavia
|
||||||
|
from oslo_config import cfg
|
||||||
|
from oslo_log import log
|
||||||
|
from oslo_utils import timeutils
|
||||||
|
import six
|
||||||
|
|
||||||
|
from aodh import evaluator
|
||||||
|
from aodh.evaluator import threshold
|
||||||
|
from aodh import keystone_client as aodh_keystone
|
||||||
|
|
||||||
|
LOG = log.getLogger(__name__)
|
||||||
|
|
||||||
|
ALARM_TYPE = "loadbalancer_member_health"
|
||||||
|
|
||||||
|
OPTS = [
|
||||||
|
cfg.IntOpt('member_creation_time',
|
||||||
|
default=120,
|
||||||
|
help='The time in seconds to wait for the load balancer '
|
||||||
|
'member creation.'
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class LoadBalancerMemberHealthEvaluator(evaluator.Evaluator):
|
||||||
|
def __init__(self, conf):
|
||||||
|
super(LoadBalancerMemberHealthEvaluator, self).__init__(conf)
|
||||||
|
self._lb_client = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def lb_client(self):
|
||||||
|
if self._lb_client is None:
|
||||||
|
endpoint = aodh_keystone.url_for(
|
||||||
|
self.conf,
|
||||||
|
service_type='load-balancer',
|
||||||
|
interface="internal",
|
||||||
|
region_name=self.conf.service_credentials.region_name
|
||||||
|
)
|
||||||
|
self._lb_client = octavia.OctaviaAPI(
|
||||||
|
session=aodh_keystone.get_session(self.conf),
|
||||||
|
service_type='load-balancer',
|
||||||
|
endpoint=endpoint
|
||||||
|
)
|
||||||
|
|
||||||
|
return self._lb_client
|
||||||
|
|
||||||
|
def _get_unhealthy_members(self, pool_id):
|
||||||
|
"""Get number of unhealthy members in a pool.
|
||||||
|
|
||||||
|
The member(virutual machine) operating_status keeps ERROR after
|
||||||
|
creation before the application is up and running inside, it should be
|
||||||
|
ignored during the check.
|
||||||
|
"""
|
||||||
|
unhealthy_members = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
ret = self.lb_client.member_list(pool_id)
|
||||||
|
except Exception as e:
|
||||||
|
LOG.warning("Failed to communicate with load balancing service, "
|
||||||
|
"error: %s", six.text_type(e))
|
||||||
|
raise threshold.InsufficientDataError(
|
||||||
|
'failed to communicate with load balancing service',
|
||||||
|
[]
|
||||||
|
)
|
||||||
|
|
||||||
|
if getattr(ret, 'status_code', None):
|
||||||
|
# Some error happened
|
||||||
|
raise threshold.InsufficientDataError(ret.content, [])
|
||||||
|
|
||||||
|
for m in ret.get("members", []):
|
||||||
|
try:
|
||||||
|
created_time = parser.parse(m['created_at'], ignoretz=True)
|
||||||
|
except ValueError as e:
|
||||||
|
LOG.warning('Failed to parse the member created time.')
|
||||||
|
continue
|
||||||
|
|
||||||
|
now = timeutils.utcnow()
|
||||||
|
t = self.conf.member_creation_time
|
||||||
|
if now - created_time < datetime.timedelta(seconds=t):
|
||||||
|
LOG.debug("Ignore member which was created within %ss", t)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if m["admin_state_up"] and m["operating_status"] == "ERROR":
|
||||||
|
unhealthy_members.append(m)
|
||||||
|
|
||||||
|
return unhealthy_members
|
||||||
|
|
||||||
|
def _transition_alarm(self, alarm, new_state, members,
|
||||||
|
count, unknown_reason, pool_id=None,
|
||||||
|
stack_id=None, asg_id=None):
|
||||||
|
transition = alarm.state != new_state
|
||||||
|
last = members[-1] if members else None
|
||||||
|
|
||||||
|
reason_data = {
|
||||||
|
'type': ALARM_TYPE,
|
||||||
|
'count': count,
|
||||||
|
'most_recent': last,
|
||||||
|
'unhealthy_members': members,
|
||||||
|
"pool_id": pool_id,
|
||||||
|
"stack_id": stack_id,
|
||||||
|
"asg_id": asg_id
|
||||||
|
}
|
||||||
|
|
||||||
|
if transition:
|
||||||
|
reason = ('Transition to %(state)s due to %(count)d members'
|
||||||
|
' unhealthy, most recent: %(most_recent)s' %
|
||||||
|
dict(state=new_state, count=count, most_recent=last))
|
||||||
|
else:
|
||||||
|
reason = ('Remaining as %(state)s' % dict(state=new_state))
|
||||||
|
|
||||||
|
reason = unknown_reason or reason
|
||||||
|
|
||||||
|
# Refresh and trigger alarm based on state transition.
|
||||||
|
self._refresh(alarm, new_state, reason, reason_data)
|
||||||
|
|
||||||
|
def evaluate(self, alarm):
|
||||||
|
if not self.within_time_constraint(alarm):
|
||||||
|
LOG.debug('Attempted to evaluate alarm %s, but it is not '
|
||||||
|
'within its time constraint.', alarm.alarm_id)
|
||||||
|
return
|
||||||
|
|
||||||
|
LOG.debug("Evaluating %s rule alarm %s ...", ALARM_TYPE,
|
||||||
|
alarm.alarm_id)
|
||||||
|
|
||||||
|
pool_id = alarm.rule["pool_id"]
|
||||||
|
error_mems = []
|
||||||
|
try:
|
||||||
|
error_mems = self._get_unhealthy_members(pool_id)
|
||||||
|
except threshold.InsufficientDataError as e:
|
||||||
|
evaluation = (evaluator.UNKNOWN, e.statistics, 0, e.reason)
|
||||||
|
else:
|
||||||
|
state = evaluator.ALARM if len(error_mems) > 0 else evaluator.OK
|
||||||
|
evaluation = (state, error_mems, len(error_mems), None)
|
||||||
|
|
||||||
|
self._transition_alarm(alarm, *evaluation, pool_id=pool_id,
|
||||||
|
stack_id=alarm.rule.get("stack_id"),
|
||||||
|
asg_id=alarm.rule.get("autoscaling_group_id"))
|
@ -88,6 +88,11 @@ def delete_trust_id(trust_id, auth_plugin):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def url_for(conf, **kwargs):
|
||||||
|
sess = get_session(conf)
|
||||||
|
return sess.get_endpoint(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
OPTS = [
|
OPTS = [
|
||||||
cfg.StrOpt('region-name',
|
cfg.StrOpt('region-name',
|
||||||
default=os.environ.get('OS_REGION_NAME'),
|
default=os.environ.get('OS_REGION_NAME'),
|
||||||
|
@ -22,6 +22,8 @@ import aodh.coordination
|
|||||||
import aodh.evaluator
|
import aodh.evaluator
|
||||||
import aodh.evaluator.event
|
import aodh.evaluator.event
|
||||||
import aodh.evaluator.gnocchi
|
import aodh.evaluator.gnocchi
|
||||||
|
import aodh.evaluator.loadbalancer
|
||||||
|
import aodh.evaluator.threshold
|
||||||
import aodh.event
|
import aodh.event
|
||||||
import aodh.keystone_client
|
import aodh.keystone_client
|
||||||
import aodh.notifier.rest
|
import aodh.notifier.rest
|
||||||
@ -37,6 +39,7 @@ def list_opts():
|
|||||||
aodh.evaluator.OPTS,
|
aodh.evaluator.OPTS,
|
||||||
aodh.evaluator.event.OPTS,
|
aodh.evaluator.event.OPTS,
|
||||||
aodh.evaluator.threshold.OPTS,
|
aodh.evaluator.threshold.OPTS,
|
||||||
|
aodh.evaluator.loadbalancer.OPTS,
|
||||||
aodh.notifier.rest.OPTS,
|
aodh.notifier.rest.OPTS,
|
||||||
aodh.queue.OPTS,
|
aodh.queue.OPTS,
|
||||||
aodh.service.OPTS)),
|
aodh.service.OPTS)),
|
||||||
|
158
aodh/tests/unit/evaluator/test_loadbalancer.py
Normal file
158
aodh/tests/unit/evaluator/test_loadbalancer.py
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
# Copyright 2019 Catalyst Cloud Ltd.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
import mock
|
||||||
|
from oslo_utils import timeutils
|
||||||
|
from oslo_utils import uuidutils
|
||||||
|
|
||||||
|
from aodh import evaluator
|
||||||
|
from aodh.evaluator import loadbalancer
|
||||||
|
from aodh.storage import models
|
||||||
|
from aodh.tests import constants
|
||||||
|
from aodh.tests.unit.evaluator import base
|
||||||
|
|
||||||
|
|
||||||
|
@mock.patch('octaviaclient.api.v2.octavia.OctaviaAPI')
|
||||||
|
@mock.patch('aodh.keystone_client.get_session')
|
||||||
|
class TestLoadBalancerMemberHealthEvaluator(base.TestEvaluatorBase):
|
||||||
|
EVALUATOR = loadbalancer.LoadBalancerMemberHealthEvaluator
|
||||||
|
|
||||||
|
def test_evaluate(self, mock_session, mock_octavia):
|
||||||
|
alarm = models.Alarm(
|
||||||
|
name='lb_member_alarm',
|
||||||
|
description='lb_member_alarm',
|
||||||
|
type=loadbalancer.ALARM_TYPE,
|
||||||
|
enabled=True,
|
||||||
|
user_id=uuidutils.generate_uuid(),
|
||||||
|
project_id=uuidutils.generate_uuid(dashed=False),
|
||||||
|
alarm_id=uuidutils.generate_uuid(),
|
||||||
|
state='insufficient data',
|
||||||
|
state_reason='insufficient data',
|
||||||
|
state_timestamp=constants.MIN_DATETIME,
|
||||||
|
timestamp=constants.MIN_DATETIME,
|
||||||
|
insufficient_data_actions=[],
|
||||||
|
ok_actions=[],
|
||||||
|
alarm_actions=[],
|
||||||
|
repeat_actions=False,
|
||||||
|
time_constraints=[],
|
||||||
|
severity='low',
|
||||||
|
rule=dict(
|
||||||
|
pool_id=uuidutils.generate_uuid(),
|
||||||
|
stack_id=uuidutils.generate_uuid(),
|
||||||
|
autoscaling_group_id=uuidutils.generate_uuid(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_client = mock.MagicMock()
|
||||||
|
mock_octavia.return_value = mock_client
|
||||||
|
created_at = timeutils.utcnow() - datetime.timedelta(days=1)
|
||||||
|
mock_client.member_list.return_value = {
|
||||||
|
'members': [
|
||||||
|
{
|
||||||
|
'created_at': created_at.isoformat(),
|
||||||
|
'admin_state_up': True,
|
||||||
|
'operating_status': 'ERROR',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
self.evaluator.evaluate(alarm)
|
||||||
|
|
||||||
|
self.assertEqual(evaluator.ALARM, alarm.state)
|
||||||
|
|
||||||
|
def test_evaluate_octavia_error(self, mock_session, mock_octavia):
|
||||||
|
class Response(object):
|
||||||
|
def __init__(self, status_code, content):
|
||||||
|
self.status_code = status_code
|
||||||
|
self.content = content
|
||||||
|
|
||||||
|
alarm = models.Alarm(
|
||||||
|
name='lb_member_alarm',
|
||||||
|
description='lb_member_alarm',
|
||||||
|
type=loadbalancer.ALARM_TYPE,
|
||||||
|
enabled=True,
|
||||||
|
user_id=uuidutils.generate_uuid(),
|
||||||
|
project_id=uuidutils.generate_uuid(dashed=False),
|
||||||
|
alarm_id=uuidutils.generate_uuid(),
|
||||||
|
state='insufficient data',
|
||||||
|
state_reason='insufficient data',
|
||||||
|
state_timestamp=constants.MIN_DATETIME,
|
||||||
|
timestamp=constants.MIN_DATETIME,
|
||||||
|
insufficient_data_actions=[],
|
||||||
|
ok_actions=[],
|
||||||
|
alarm_actions=[],
|
||||||
|
repeat_actions=False,
|
||||||
|
time_constraints=[],
|
||||||
|
severity='low',
|
||||||
|
rule=dict(
|
||||||
|
pool_id=uuidutils.generate_uuid(),
|
||||||
|
stack_id=uuidutils.generate_uuid(),
|
||||||
|
autoscaling_group_id=uuidutils.generate_uuid(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_client = mock.MagicMock()
|
||||||
|
mock_octavia.return_value = mock_client
|
||||||
|
msg = 'Pool NotFound'
|
||||||
|
mock_client.member_list.return_value = Response(404, msg)
|
||||||
|
|
||||||
|
self.evaluator.evaluate(alarm)
|
||||||
|
|
||||||
|
self.assertEqual(evaluator.UNKNOWN, alarm.state)
|
||||||
|
self.assertEqual(msg, alarm.state_reason)
|
||||||
|
|
||||||
|
def test_evaluate_alarm_to_ok(self, mock_session, mock_octavia):
|
||||||
|
alarm = models.Alarm(
|
||||||
|
name='lb_member_alarm',
|
||||||
|
description='lb_member_alarm',
|
||||||
|
type=loadbalancer.ALARM_TYPE,
|
||||||
|
enabled=True,
|
||||||
|
user_id=uuidutils.generate_uuid(),
|
||||||
|
project_id=uuidutils.generate_uuid(dashed=False),
|
||||||
|
alarm_id=uuidutils.generate_uuid(),
|
||||||
|
state=evaluator.ALARM,
|
||||||
|
state_reason='alarm',
|
||||||
|
state_timestamp=constants.MIN_DATETIME,
|
||||||
|
timestamp=constants.MIN_DATETIME,
|
||||||
|
insufficient_data_actions=[],
|
||||||
|
ok_actions=[],
|
||||||
|
alarm_actions=[],
|
||||||
|
repeat_actions=False,
|
||||||
|
time_constraints=[],
|
||||||
|
severity='low',
|
||||||
|
rule=dict(
|
||||||
|
pool_id=uuidutils.generate_uuid(),
|
||||||
|
stack_id=uuidutils.generate_uuid(),
|
||||||
|
autoscaling_group_id=uuidutils.generate_uuid(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_client = mock.MagicMock()
|
||||||
|
mock_octavia.return_value = mock_client
|
||||||
|
created_at = timeutils.utcnow() - datetime.timedelta(days=1)
|
||||||
|
mock_client.member_list.return_value = {
|
||||||
|
'members': [
|
||||||
|
{
|
||||||
|
'created_at': created_at.isoformat(),
|
||||||
|
'admin_state_up': True,
|
||||||
|
'operating_status': 'ACTIVE',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
self.evaluator.evaluate(alarm)
|
||||||
|
|
||||||
|
self.assertEqual(evaluator.OK, alarm.state)
|
@ -0,0 +1,4 @@
|
|||||||
|
features:
|
||||||
|
- Added a new evaluator for the alarms of type
|
||||||
|
``loadbalancer_member_health`` which evaluates the alarm by checking the
|
||||||
|
operating status of the members in a given load balancer pool.
|
@ -35,3 +35,5 @@ cachetools>=1.1.6
|
|||||||
cotyledon
|
cotyledon
|
||||||
keystoneauth1>=2.1
|
keystoneauth1>=2.1
|
||||||
debtcollector>=1.2.0 # Apache-2.0
|
debtcollector>=1.2.0 # Apache-2.0
|
||||||
|
python-octaviaclient>=1.8.0
|
||||||
|
python-dateutil # BSD
|
||||||
|
@ -79,6 +79,7 @@ aodh.evaluator =
|
|||||||
gnocchi_aggregation_by_metrics_threshold = aodh.evaluator.gnocchi:GnocchiAggregationMetricsThresholdEvaluator
|
gnocchi_aggregation_by_metrics_threshold = aodh.evaluator.gnocchi:GnocchiAggregationMetricsThresholdEvaluator
|
||||||
gnocchi_aggregation_by_resources_threshold = aodh.evaluator.gnocchi:GnocchiAggregationResourcesThresholdEvaluator
|
gnocchi_aggregation_by_resources_threshold = aodh.evaluator.gnocchi:GnocchiAggregationResourcesThresholdEvaluator
|
||||||
composite = aodh.evaluator.composite:CompositeEvaluator
|
composite = aodh.evaluator.composite:CompositeEvaluator
|
||||||
|
loadbalancer_member_health = aodh.evaluator.loadbalancer:LoadBalancerMemberHealthEvaluator
|
||||||
|
|
||||||
aodh.notifier =
|
aodh.notifier =
|
||||||
log = aodh.notifier.log:LogAlarmNotifier
|
log = aodh.notifier.log:LogAlarmNotifier
|
||||||
|
Loading…
x
Reference in New Issue
Block a user