Matt Pryor b88d3ec06a
Add blackbox exporter addon (#241)
* Add blackbox exporter addon

* Fix typo in ingress alerts
2024-02-03 16:41:14 +00:00

123 lines
4.7 KiB
YAML

{{- if and .Values.ingress.enabled .Values.ingress.nginx.enabled }}
---
apiVersion: v1
kind: Secret
metadata:
name: {{ include "cluster-addons.componentName" (list . "ingress-nginx") }}-config
labels:
{{- include "cluster-addons.componentLabels" (list . "ingress-nginx") | nindent 4 }}
addons.stackhpc.com/watch: ""
stringData:
defaults: |
controller:
{{- if .Values.monitoring.enabled }}
metrics:
enabled: true
serviceMonitor:
enabled: true
{{- end }}
overrides: |
{{- toYaml .Values.ingress.nginx.release.values | nindent 4 }}
---
apiVersion: addons.stackhpc.com/v1alpha1
kind: HelmRelease
metadata:
name: {{ include "cluster-addons.componentName" (list . "ingress-nginx") }}
labels: {{ include "cluster-addons.componentLabels" (list . "ingress-nginx") | nindent 4 }}
annotations:
# Tell Argo to ignore the non-controller owner references for this object
argocd.argoproj.io/sync-options: "ControllerReferencesOnly=true"
spec:
clusterName: {{ include "cluster-addons.clusterName" . }}
bootstrap: true
chart: {{ toYaml .Values.ingress.nginx.chart | nindent 4 }}
targetNamespace: {{ .Values.ingress.nginx.release.namespace }}
releaseName: ingress-nginx
valuesSources:
- secret:
name: {{ include "cluster-addons.componentName" (list . "ingress-nginx") }}-config
key: defaults
- secret:
name: {{ include "cluster-addons.componentName" (list . "ingress-nginx") }}-config
key: overrides
{{- if .Values.monitoring.enabled }}
---
apiVersion: addons.stackhpc.com/v1alpha1
kind: Manifests
metadata:
name: {{ include "cluster-addons.componentName" (list . "ingress-nginx-dashboards") }}
labels: {{ include "cluster-addons.componentLabels" (list . "ingress-nginx-dashboards") | nindent 4 }}
annotations:
# Tell Argo to ignore the non-controller owner references for this object
argocd.argoproj.io/sync-options: "ControllerReferencesOnly=true"
spec:
clusterName: {{ include "cluster-addons.clusterName" . }}
bootstrap: true
targetNamespace: {{ .Values.ingress.nginx.release.namespace }}
releaseName: ingress-nginx-dashboards
manifestSources:
- template: |
apiVersion: v1
kind: ConfigMap
metadata:
name: ingress-nginx-dashboards
labels:
grafana_dashboard: "1"
data:
nginx-ingress-dashboard.json: |
{% raw %}
{{- .Files.Get "grafana-dashboards/ingress-nginx-dashboard.json" | nindent 12 }}
{% endraw %}
---
apiVersion: addons.stackhpc.com/v1alpha1
kind: Manifests
metadata:
name: {{ include "cluster-addons.componentName" (list . "ingress-nginx-alerts") }}
labels: {{ include "cluster-addons.componentLabels" (list . "ingress-nginx-alerts") | nindent 4 }}
annotations:
# Tell Argo to ignore the non-controller owner references for this object
argocd.argoproj.io/sync-options: "ControllerReferencesOnly=true"
spec:
clusterName: {{ include "cluster-addons.clusterName" . }}
bootstrap: true
targetNamespace: {{ .Values.ingress.nginx.release.namespace }}
releaseName: ingress-nginx-alerts
manifestSources:
- template: |
{% raw %}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: ingress-nginx-alerts
spec:
groups:
- name: ingress_nginx.rules
rules:
- alert: NginxIngressCertificateExpiresSoon
expr: |
(7 * 24 * 3600) <= (last_over_time(nginx_ingress_controller_ssl_expire_time_seconds[10m]) - time()) < (30 * 24 * 3600)
for: 0m
annotations:
message: "The cert {{ "{{" }} $labels.name {{ "}}" }} is {{ "{{" }} $value | humanizeDuration {{ "}}" }} from expiry."
labels:
severity: warning
- alert: NginxIngressCertificateExpiresVerySoon
expr: |
0 <= (last_over_time(nginx_ingress_controller_ssl_expire_time_seconds[10m]) - time()) < (7 * 24 * 3600)
for: 0m
annotations:
message: "The cert {{ "{{" }} $labels.name {{ "}}" }} is {{ "{{" }} $value | humanizeDuration {{ "}}" }} from expiry."
labels:
severity: critical
- alert: NginxIngressCertificateExpired
expr: |
(last_over_time(nginx_ingress_controller_ssl_expire_time_seconds[10m]) - time()) < 0
for: 0m
annotations:
message: "The cert {{ "{{" }} $labels.name {{ "}}" }} has expired."
labels:
severity: critical
{% endraw %}
{{- end }}
{{- end }}