From 1643abed422374733b3e73b4dcc701fd51114b84 Mon Sep 17 00:00:00 2001 From: Jaromir Wysoglad Date: Tue, 6 Aug 2024 10:01:56 -0400 Subject: [PATCH] Add evaluation results counters to /metrics This change adds code for collection and storage of evaluation result counters. The values of the counters can then be retreived through a new /metrics api endpoint. Counters are collected and served for each alarm separately. Only metrics for the current project will be shown on the /metrics endpoint. To enable this feature set "enable_evaluation_results_metrics" to true inside [DEFAULT]. Change-Id: I3fceb9a244f248f1e62a1267408c82d1b35a4d37 --- aodh/api/controllers/v2/metrics.py | 120 ++++++++++++++++++ aodh/api/controllers/v2/root.py | 2 + aodh/api/policies.py | 14 +- aodh/evaluator/__init__.py | 5 + aodh/evaluator/threshold.py | 4 + aodh/opts.py | 10 +- aodh/storage/base.py | 10 ++ aodh/storage/impl_sqlalchemy.py | 62 +++++++++ aodh/storage/models.py | 11 ++ aodh/storage/sqlalchemy/alembic/alembic.ini | 2 +- aodh/storage/sqlalchemy/alembic/env.py | 28 ++-- .../versions/008_added_counter_table.py | 67 ++++++++++ aodh/storage/sqlalchemy/models.py | 20 +++ aodh/tests/functional/api/v2/policy.yaml-test | 1 + aodh/tests/functional/api/v2/test_metrics.py | 120 ++++++++++++++++++ aodh/tests/functional/db.py | 1 + .../storage/test_storage_scenarios.py | 73 +++++++++++ aodh/tests/unit/evaluator/test_gnocchi.py | 9 ++ ...n-counter-collection-f324ebda00fa5c6c.yaml | 9 ++ 19 files changed, 557 insertions(+), 11 deletions(-) create mode 100644 aodh/api/controllers/v2/metrics.py create mode 100644 aodh/storage/sqlalchemy/alembic/versions/008_added_counter_table.py create mode 100644 aodh/tests/functional/api/v2/test_metrics.py create mode 100644 releasenotes/notes/add-metrics-endpoint-and-evaluation-counter-collection-f324ebda00fa5c6c.yaml diff --git a/aodh/api/controllers/v2/metrics.py b/aodh/api/controllers/v2/metrics.py new file mode 100644 index 000000000..c3df9b2d9 --- /dev/null +++ b/aodh/api/controllers/v2/metrics.py @@ -0,0 +1,120 @@ +# +# Copyright 2024 Red Hat, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +import collections +from oslo_log import log +import pecan +from pecan import rest +import wsmeext.pecan as wsme_pecan + +from aodh.api.controllers.v2 import base +from aodh.api import rbac +from aodh import evaluator +from aodh.i18n import _ +from aodh import profiler + +LOG = log.getLogger(__name__) + + +class EvaluationResultOutput(base.Base): + """A class for representing evaluation result data""" + alarm_id = str + project_id = str + state_counters = {str: int} + + @classmethod + def sample(cls): + return cls( + alarm_id="b8e17f58-089a-43fc-a96b-e9bcac4d4b53", + project_id="2dd8edd6c8c24f49bf04670534f6b357", + state_counters={ + "ok": 20, + "insufficient data": 5, + "alarm": 10, + } + ) + + +class MetricsOutput(base.Base): + """A class for representing data from metrics API endpoint""" + + evaluation_results = [EvaluationResultOutput] + "The evaluation result counters" + + # This could be extended for other metrics in the future + + @classmethod + def sample(cls): + return cls(evaluation_results=[{ + "alarm_id": "b8e17f58-089a-43fc-a96b-e9bcac4d4b53", + "project_id": "2dd8edd6c8c24f49bf04670534f6b357", + "state_counters": { + "ok": 20, + "insufficient data": 5, + "alarm": 10, + } + }]) + + +@profiler.trace_cls('api') +class MetricsController(rest.RestController): + """Manages the metrics api endpoint""" + + @staticmethod + def group_counters(counters): + result = collections.defaultdict(lambda: collections.defaultdict(dict)) + for c in counters: + result[c.project_id][c.alarm_id][c.state] = c.value + return result + + @wsme_pecan.wsexpose(MetricsOutput) + def get_all(self): + """Return all metrics""" + if not pecan.request.cfg.enable_evaluation_results_metrics: + raise base.ClientSideError(_( + "metrics endpoint is disabled" + ), 403) + + project_id = pecan.request.headers.get('X-Project-Id') + target = {"project_id": project_id} + + rbac.enforce('get_metrics', pecan.request.headers, + pecan.request.enforcer, target) + + content = MetricsOutput() + alarm_states = [evaluator.UNKNOWN, evaluator.OK, evaluator.ALARM] + + LOG.debug('Getting evaluation result counters from database') + grouped_counters = self.group_counters( + pecan.request.storage.get_alarm_counters(project_id=project_id) + ) + evaluation_results = [] + for project, alarms in grouped_counters.items(): + for alarm, states in alarms.items(): + evaluation_results.append( + EvaluationResultOutput( + project_id=project, + alarm_id=alarm, + state_counters={ + state: states.get(state.replace(" ", "_"), 0) + for state in alarm_states + } + ) + ) + + content.evaluation_results = evaluation_results + + return content diff --git a/aodh/api/controllers/v2/root.py b/aodh/api/controllers/v2/root.py index ffdbf328f..e7ae3f9b7 100644 --- a/aodh/api/controllers/v2/root.py +++ b/aodh/api/controllers/v2/root.py @@ -20,6 +20,7 @@ from aodh.api.controllers.v2 import alarms from aodh.api.controllers.v2 import capabilities +from aodh.api.controllers.v2 import metrics from aodh.api.controllers.v2 import query from aodh.api.controllers.v2 import quotas @@ -31,3 +32,4 @@ class V2Controller(object): query = query.QueryController() capabilities = capabilities.CapabilitiesController() quotas = quotas.QuotasController() + metrics = metrics.MetricsController() diff --git a/aodh/api/policies.py b/aodh/api/policies.py index acd6c4fda..8d33e5392 100644 --- a/aodh/api/policies.py +++ b/aodh/api/policies.py @@ -329,7 +329,19 @@ rules = [ } ], deprecated_rule=deprecated_delete_quotas - ) + ), + policy.DocumentedRuleDefault( + name="telemetry:get_metrics", + check_str=PROJECT_READER, + scope_types=['project'], + description='Get all metrics.', + operations=[ + { + 'path': '/v2/metrics', + 'method': 'GET' + } + ] + ), ] diff --git a/aodh/evaluator/__init__.py b/aodh/evaluator/__init__.py index c1938e6be..3a20c374a 100644 --- a/aodh/evaluator/__init__.py +++ b/aodh/evaluator/__init__.py @@ -112,6 +112,11 @@ class Evaluator(object, metaclass=abc.ABCMeta): self.alarm_change_notifier.info({}, notification, payload) + def _increment_evaluation_result(self, alarm_id, project_id, state): + if self.conf.enable_evaluation_results_metrics: + self._storage_conn.increment_alarm_counter( + alarm_id, project_id, state) + def _refresh(self, alarm, state, reason, reason_data, always_record=False): """Refresh alarm state.""" try: diff --git a/aodh/evaluator/threshold.py b/aodh/evaluator/threshold.py index 8d423b251..eebe61f42 100644 --- a/aodh/evaluator/threshold.py +++ b/aodh/evaluator/threshold.py @@ -176,3 +176,7 @@ class ThresholdEvaluator(evaluator.Evaluator): evaluation = (evaluator.UNKNOWN, None, e.statistics, 0, e.reason) self._transition_alarm(alarm, *evaluation) + if evaluation[0] is not None: + self._increment_evaluation_result(alarm.alarm_id, + alarm.project_id, + evaluation[0].replace(" ", "_")) diff --git a/aodh/opts.py b/aodh/opts.py index 8c75a8bf1..b7d9b59e9 100644 --- a/aodh/opts.py +++ b/aodh/opts.py @@ -14,6 +14,7 @@ import itertools from keystoneauth1 import loading +from oslo_config import cfg import aodh.api import aodh.api.controllers.v2.alarm_rules.gnocchi @@ -32,6 +33,12 @@ import aodh.notifier.zaqar import aodh.service import aodh.storage +OPTS = [ + cfg.BoolOpt('enable_evaluation_results_metrics', + default=False, + help=("Whether metric collection should be enabled.")), +] + def list_opts(): return [ @@ -44,7 +51,8 @@ def list_opts(): aodh.evaluator.loadbalancer.OPTS, aodh.notifier.rest.OPTS, aodh.queue.OPTS, - aodh.service.OPTS)), + aodh.service.OPTS, + OPTS)), ('api', itertools.chain( aodh.api.OPTS, diff --git a/aodh/storage/base.py b/aodh/storage/base.py index 64764804c..feef8f455 100644 --- a/aodh/storage/base.py +++ b/aodh/storage/base.py @@ -111,6 +111,16 @@ class Connection(object): """Delete an alarm and its history data.""" raise aodh.NotImplementedError('Alarms not implemented') + @staticmethod + def increment_alarm_counter(alarm_id, project_id, state): + """Increment a counter.""" + raise aodh.NotImplementedError('Alarm counters not implemented') + + @staticmethod + def get_alarm_counters(alarm_id=None, project_id=None, state=None): + """Get value of a counter.""" + raise aodh.NotImplementedError('Alarm counters not implemented') + @staticmethod def get_alarm_changes(alarm_id, on_behalf_of, user=None, project=None, alarm_type=None, diff --git a/aodh/storage/impl_sqlalchemy.py b/aodh/storage/impl_sqlalchemy.py index ba929e7ce..6302826d9 100644 --- a/aodh/storage/impl_sqlalchemy.py +++ b/aodh/storage/impl_sqlalchemy.py @@ -56,6 +56,9 @@ AVAILABLE_STORAGE_CAPABILITIES = { 'storage': {'production_ready': True}, } +# int type should be 32b long in both mysql and postgresql +COUNTER_RESET_AT_VALUE = 2000000000 + def apply_filters(query, model, **filters): filter_dict = {} @@ -314,6 +317,9 @@ class Connection(base.Connection): :param alarm_id: ID of the alarm to delete """ with _session_for_write() as session: + session.query(models.AlarmCounter).filter( + models.AlarmCounter.alarm_id == alarm_id, + ).delete() session.query(models.Alarm).filter( models.Alarm.alarm_id == alarm_id, ).delete() @@ -322,6 +328,45 @@ class Connection(base.Connection): models.AlarmChange.alarm_id == alarm_id, ).delete() + def increment_alarm_counter(self, alarm_id, project_id, state): + """Increment a counter. + + :param alarm_id: the id of alarm to which the counter belongs + :param project_id: the id of the project of the alarm + :param state: the state of the alarm to increment + """ + with _session_for_write() as session: + LOG.debug("Incrementing counter %(state)s for alarm %(alarm_id)s", + {'alarm_id': alarm_id, 'state': state}) + + counter = self.get_alarm_counters(alarm_id, project_id, state) + counter_value = 0 + if len(counter) == 1: + counter_value = counter[0].value + new_value = counter_value + 1 + if counter_value >= COUNTER_RESET_AT_VALUE: + LOG.debug("Resetting counter %(state)s for alarm %(alarm_id)s", + {'alarm_id': alarm_id, 'state': state}) + new_value = 1 + + elif counter_value == 0: + # We have a new uninitialized counter + counter = models.AlarmCounter( + alarm_id=alarm_id, + project_id=project_id, + state=state + ) + counter.update({'value': new_value}) + session.add(counter) + else: + session.query(models.AlarmCounter).filter( + models.AlarmCounter.alarm_id == alarm_id, + models.AlarmCounter.project_id == project_id, + models.AlarmCounter.state == state, + ).update({'value': new_value}) + + return state + @staticmethod def _row_to_alarm_change_model(row): return alarm_api_models.AlarmChange(event_id=row.event_id, @@ -338,6 +383,23 @@ class Connection(base.Connection): """Yields a lists of alarms that match filter.""" return self._retrieve_data(filter_expr, orderby, limit, models.Alarm) + def get_alarm_counters(self, alarm_id=None, project_id=None, state=None): + """Yields a counter based on its alarm_id, project_id and state.""" + filters = {} + if alarm_id is not None: + filters['alarm_id'] = alarm_id + if project_id is not None: + filters['project_id'] = project_id + if state is not None: + filters['state'] = state + with _session_for_read() as session: + counters = session.query(models.AlarmCounter).filter_by( + **filters + ).all() + if counters is None: + return [] + return counters + def _retrieve_alarm_history(self, query): return (self._row_to_alarm_change_model(x) for x in query.all()) diff --git a/aodh/storage/models.py b/aodh/storage/models.py index 73dab85fa..69258e51e 100644 --- a/aodh/storage/models.py +++ b/aodh/storage/models.py @@ -160,3 +160,14 @@ class Quota(base.Model): project_id=project_id, resource=resource, limit=limit) + + +class AlarmCounter(base.Model): + def __init__(self, alarm_id, project_id, state): + base.Model.__init__( + self, + alarm_id=alarm_id, + project_id=project_id, + state=state, + value=0 + ) diff --git a/aodh/storage/sqlalchemy/alembic/alembic.ini b/aodh/storage/sqlalchemy/alembic/alembic.ini index 57732fed3..eaef4f0df 100644 --- a/aodh/storage/sqlalchemy/alembic/alembic.ini +++ b/aodh/storage/sqlalchemy/alembic/alembic.ini @@ -1,6 +1,6 @@ [alembic] script_location = aodh.storage.sqlalchemy:alembic -sqlalchemy.url = +sqlalchemy.url = sqlite:///aodh.db [loggers] keys = root,sqlalchemy,alembic diff --git a/aodh/storage/sqlalchemy/alembic/env.py b/aodh/storage/sqlalchemy/alembic/env.py index 69804bad8..1a3ba6c42 100644 --- a/aodh/storage/sqlalchemy/alembic/env.py +++ b/aodh/storage/sqlalchemy/alembic/env.py @@ -16,7 +16,8 @@ from logging.config import fileConfig from alembic import context -from oslo_db.sqlalchemy import enginefacade +from sqlalchemy import engine_from_config +from sqlalchemy import pool from aodh.storage.sqlalchemy import models @@ -27,7 +28,8 @@ config = context.config # Interpret the config file for Python logging. # This line sets up loggers basically. -fileConfig(config.config_file_name) +if config.attributes.get('configure_logger', True): + fileConfig(config.config_file_name) # add your model's MetaData object here # for 'autogenerate' support @@ -53,9 +55,13 @@ def run_migrations_offline(): script output. """ - conf = config.conf - context.configure(url=conf.database.connection, - target_metadata=target_metadata) + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) with context.begin_transaction(): context.run_migrations() @@ -71,15 +77,21 @@ def run_migrations_online(): connectable = config.attributes.get('connection', None) if connectable is None: - engine = enginefacade.writer.get_engine() - with engine.connect() as connection: + # only create Engine if we don't have a Connection from the outside + connectable = engine_from_config( + config.get_section(config.config_ini_section), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + with connectable.connect() as connection: context.configure( connection=connection, - target_metadata=target_metadata + target_metadata=target_metadata, ) with context.begin_transaction(): context.run_migrations() + else: context.configure( connection=connectable, diff --git a/aodh/storage/sqlalchemy/alembic/versions/008_added_counter_table.py b/aodh/storage/sqlalchemy/alembic/versions/008_added_counter_table.py new file mode 100644 index 000000000..1487f86a8 --- /dev/null +++ b/aodh/storage/sqlalchemy/alembic/versions/008_added_counter_table.py @@ -0,0 +1,67 @@ +# Copyright 2025 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# + +"""added_counter_table + +Revision ID: 008 +Revises: 007 +Create Date: 2025-01-15 10:28:02.087788 + +""" + +# revision identifiers, used by Alembic. +revision = '008' +down_revision = '007' +branch_labels = None +depends_on = None + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + op.create_table( + 'alarm_counter', + sa.Column('id', sa.String(length=36), nullable=False), + sa.Column('alarm_id', sa.String(length=128), nullable=False), + sa.Column('project_id', sa.String(length=128), nullable=False), + sa.Column('state', sa.String(length=128), nullable=False), + sa.Column('value', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint( + ['alarm_id'], + ['alarm.alarm_id'], + name='alarm_fkey_ref', + ), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('alarm_id', 'project_id', 'state') + ) + op.create_index( + 'ix_alarm_counter_alarm_id', + 'alarm_counter', + ['alarm_id'], + unique=False + ) + op.create_index( + 'ix_alarm_counter_project_id', + 'alarm_counter', + ['project_id'], + unique=False + ) + op.create_index( + 'ix_alarm_counter_state', + 'alarm_counter', + ['state'], + unique=False + ) diff --git a/aodh/storage/sqlalchemy/models.py b/aodh/storage/sqlalchemy/models.py index 2a67263b7..0b840c591 100644 --- a/aodh/storage/sqlalchemy/models.py +++ b/aodh/storage/sqlalchemy/models.py @@ -141,3 +141,23 @@ class Quota(Base): project_id = Column(String(128), nullable=False) resource = Column(String(50), nullable=False) limit = Column(Integer, nullable=False) + + +class AlarmCounter(Base): + __tablename__ = 'alarm_counter' + __table_args__ = ( + sa.UniqueConstraint('alarm_id', 'project_id', 'state'), + Index('ix_%s_alarm_id' % __tablename__, + 'alarm_id'), + Index('ix_%s_state' % __tablename__, + 'state'), + Index('ix_%s_project_id' % __tablename__, + 'project_id'), + ) + + id = Column(String(36), primary_key=True, default=uuidutils.generate_uuid) + alarm_id = Column(String(128), sa.ForeignKey('alarm.alarm_id'), + nullable=False) + project_id = Column(String(128), nullable=False) + state = Column(String(128), nullable=False) + value = Column(Integer, nullable=False) diff --git a/aodh/tests/functional/api/v2/policy.yaml-test b/aodh/tests/functional/api/v2/policy.yaml-test index 7a09cb594..99934e9c7 100644 --- a/aodh/tests/functional/api/v2/policy.yaml-test +++ b/aodh/tests/functional/api/v2/policy.yaml-test @@ -6,3 +6,4 @@ "admin_or_owner": "rule:context_is_admin or project_id:%(project_id)s" "default": "rule:admin_or_owner" "telemetry:get_alarms": "role:admin" +"telemetry:get_metrics": "role:admin" diff --git a/aodh/tests/functional/api/v2/test_metrics.py b/aodh/tests/functional/api/v2/test_metrics.py new file mode 100644 index 000000000..393f97b5d --- /dev/null +++ b/aodh/tests/functional/api/v2/test_metrics.py @@ -0,0 +1,120 @@ +# +# Copyright 2024 Red Hat, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import webtest + +from aodh.api import app +from aodh.storage import models +from aodh.tests import constants +from aodh.tests.functional.api import v2 + + +def getTestAlarm(alarm_id, project_id, user_id): + return models.Alarm(name='name1', + type='gnocchi_aggregation_by_metrics_threshold', + enabled=True, + alarm_id=alarm_id, + description='a', + state='insufficient data', + state_reason='Not evaluated', + severity='critical', + state_timestamp=constants.MIN_DATETIME, + timestamp=constants.MIN_DATETIME, + ok_actions=[], + insufficient_data_actions=[], + alarm_actions=[], + repeat_actions=True, + user_id=user_id, + project_id=project_id, + time_constraints=[dict(name='testcons', + start='0 11 * * *', + duration=300)], + rule=dict(comparison_operator='gt', + threshold=2.0, + aggregation_method='mean', + evaluation_periods=60, + granularity=1, + metrics=[ + '41869681-5776-46d6-91ed-cccc43b6e4e3', + 'a1fb80f4-c242-4f57-87c6-68f47521059e' + ]) + ) + + +class TestMetrics(v2.FunctionalTest): + def setUp(self): + super(TestMetrics, self).setUp() + self.project_id = "some_project_id" + self.alarm_id = "some_alarm_id" + self.user_id = "some_user_id" + self.role = "reader" + self.auth_headers = {'X-User-Id': self.user_id, + 'X-Project-Id': self.project_id, + 'X-Roles': self.role} + self.alarm_conn.create_alarm(getTestAlarm( + self.alarm_id, + self.project_id, + self.user_id) + ) + self.alarm_conn.increment_alarm_counter( + self.alarm_id, + self.project_id, + "ok" + ) + self.alarm_conn.increment_alarm_counter( + self.alarm_id, + self.project_id, + "insufficient_data" + ) + self.alarm_conn.increment_alarm_counter( + self.alarm_id, + self.project_id, + "insufficient_data" + ) + + def test_get_all_metrics_inside_project(self): + expected = { + "evaluation_results": + [{ + "alarm_id": self.alarm_id, + "project_id": self.project_id, + "state_counters": { + "ok": 1, + "insufficient data": 2, + "alarm": 0 + } + }] + } + metrics = self.get_json( + '/metrics', + headers=self.auth_headers, + ) + self.assertEqual(expected, metrics) + + def test_get_all_metrics_forbidden(self): + pf = os.path.abspath('aodh/tests/functional/api/v2/policy.yaml-test') + self.CONF.set_override('policy_file', pf, group='oslo_policy') + self.CONF.set_override('auth_mode', None, group='api') + self.app = webtest.TestApp(app.load_app(self.CONF)) + + response = self.get_json('/metrics', + expect_errors=True, + status=403, + headers=self.auth_headers) + faultstring = 'RBAC Authorization Failed' + self.assertEqual(403, response.status_code) + self.assertEqual(faultstring, + response.json['error_message']['faultstring']) diff --git a/aodh/tests/functional/db.py b/aodh/tests/functional/db.py index 14f20b838..2a4fb6870 100644 --- a/aodh/tests/functional/db.py +++ b/aodh/tests/functional/db.py @@ -96,6 +96,7 @@ class TestBase(test_base.BaseTestCase, conf = service.prepare_service(argv=[], config_files=[]) self.CONF = self.useFixture(fixture_config.Config(conf)).conf self.CONF.set_override('connection', db_url, group="database") + self.CONF.set_override('enable_evaluation_results_metrics', True) manager = self.DRIVER_MANAGERS.get(self.engine) if not manager: diff --git a/aodh/tests/functional/storage/test_storage_scenarios.py b/aodh/tests/functional/storage/test_storage_scenarios.py index 7a754bac0..acebf32e6 100644 --- a/aodh/tests/functional/storage/test_storage_scenarios.py +++ b/aodh/tests/functional/storage/test_storage_scenarios.py @@ -245,9 +245,30 @@ class AlarmTest(AlarmTestBase): def test_delete(self): self.add_some_alarms() victim = list(self.alarm_conn.get_alarms(name='orange-alert'))[0] + self.alarm_conn.increment_alarm_counter( + victim.alarm_id, + victim.project_id, + "ok" + ) + self.assertEqual( + 1, + self.alarm_conn.get_alarm_counters( + victim.alarm_id, + victim.project_id, + "ok" + )[0].value + ) self.alarm_conn.delete_alarm(victim.alarm_id) survivors = list(self.alarm_conn.get_alarms()) self.assertEqual(2, len(survivors)) + self.assertEqual( + [], + self.alarm_conn.get_alarm_counters( + victim.alarm_id, + victim.project_id, + "ok" + ) + ) for s in survivors: self.assertNotEqual(victim.name, s.name) @@ -506,3 +527,55 @@ class ComplexAlarmHistoryQueryTest(AlarmTestBase): alarm_models.AlarmChange.RULE_CHANGE, alarm_models.AlarmChange.STATE_TRANSITION], [h.type for h in history]) + + +class AlarmCounterTest(AlarmTestBase): + def test_get_value_of_empty_counter(self): + counter_name = "some_empty_unused_counter" + self.assertEqual([], self.alarm_conn.get_alarm_counters( + "some_alarm_id", + "some_project_id", + counter_name)) + + def test_counter_increment(self): + self.add_some_alarms() + alarm = list(self.alarm_conn.get_alarms(name='orange-alert'))[0] + counter_name = "counter_for_increment_testing" + project_id = alarm.project_id + alarm_id = alarm.alarm_id + + self.assertEqual([], self.alarm_conn.get_alarm_counters( + alarm_id, + project_id, + counter_name + )) + + for i in range(5): + self.alarm_conn.increment_alarm_counter( + alarm_id, + project_id, + counter_name + ) + self.assertEqual( + 5, + self.alarm_conn.get_alarm_counters( + alarm_id, + project_id, + counter_name + )[0].value + ) + + for i in range(3): + self.alarm_conn.increment_alarm_counter( + alarm_id, + project_id, + counter_name + ) + self.assertEqual( + 8, + self.alarm_conn.get_alarm_counters( + alarm_id, + project_id, + counter_name + )[0].value + ) diff --git a/aodh/tests/unit/evaluator/test_gnocchi.py b/aodh/tests/unit/evaluator/test_gnocchi.py index 024493f3d..e7fe7d96b 100644 --- a/aodh/tests/unit/evaluator/test_gnocchi.py +++ b/aodh/tests/unit/evaluator/test_gnocchi.py @@ -150,12 +150,21 @@ class TestGnocchiEvaluatorBase(base.TestEvaluatorBase): self._assert_all_alarms('ok') def _test_simple_insufficient(self): + self.conf.set_override('enable_evaluation_results_metrics', True) self._set_all_alarms('ok') self._evaluate_all_alarms() self._assert_all_alarms('insufficient data') expected = [mock.call(alarm) for alarm in self.alarms] update_calls = self.storage_conn.update_alarm.call_args_list self.assertEqual(expected, update_calls) + expected = [mock.call( + alarm.alarm_id, + alarm.project_id, + "insufficient_data") + for alarm in self.alarms] + counter_increments = ( + self.storage_conn.increment_alarm_counter.call_args_list) + self.assertEqual(expected, counter_increments) expected = [mock.call( alarm, 'ok', diff --git a/releasenotes/notes/add-metrics-endpoint-and-evaluation-counter-collection-f324ebda00fa5c6c.yaml b/releasenotes/notes/add-metrics-endpoint-and-evaluation-counter-collection-f324ebda00fa5c6c.yaml new file mode 100644 index 000000000..37f53b34f --- /dev/null +++ b/releasenotes/notes/add-metrics-endpoint-and-evaluation-counter-collection-f324ebda00fa5c6c.yaml @@ -0,0 +1,9 @@ +--- +features: + - | + Added collection of alarm evaluation counters. These show the number of + times alarms were evaluated as ``alarm``, ``ok`` and ``insufficient data`` + per alarm. These counters are presented by the /v2/metrics API endpoint. + This feature can be enabled or disabled with + the ``[DEFAULT].enable_evaluation_results_metrics`` configuration option. + It's disabled by default.