
This is the action item to implement the spec: doc/source/specs/2025.1/chart_versioning.rst Also add overrides env variables - OSH_VALUES_OVERRIDES_PATH - OSH_INFRA_VALUES_OVERRIDES_PATH This commit temporarily disables all jobs that involve scripts in the OSH git repo because they need to be updated to work with the new values_overrides structure in the OSH-infra repo. Once this is merged I4974785c904cf7c8730279854e3ad9b6b7c35498 all these disabled test jobs must be enabled. Depends-On: I327103c18fc0e10e989a17f69b3bff9995c45eb4 Change-Id: I7bfdef3ea2128bbb4e26e3a00161fe30ce29b8e7
34 lines
1.6 KiB
YAML
34 lines
1.6 KiB
YAML
---
|
|
conf:
|
|
prometheus:
|
|
rules:
|
|
alertmanager:
|
|
groups:
|
|
- name: alertmanager.rules
|
|
rules:
|
|
- alert: AlertmanagerConfigInconsistent
|
|
expr: count_values("config_hash", alertmanager_config_hash) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_alertmanager_spec_replicas, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
description: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.
|
|
summary: Alertmanager configurations are inconsistent
|
|
- alert: AlertmanagerDownOrMissing
|
|
expr: label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1", "alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up) BY (job) != 1
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: An unexpected number of Alertmanagers are scraped or Alertmanagers disappeared from discovery.
|
|
summary: Alertmanager down or not discovered
|
|
- alert: FailedReload
|
|
expr: alertmanager_config_last_reload_successful == 0
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod }}.
|
|
summary: Alertmanager configuration reload has failed
|
|
...
|