142 lines
5.5 KiB
YAML

{{- if .Values.monitoring.enabled }}
---
apiVersion: v1
kind: Secret
metadata:
name: {{ include "cluster-addons.componentName" (list . "kube-prometheus-stack") }}-config
labels:
{{- include "cluster-addons.componentLabels" (list . "kube-prometheus-stack") | nindent 4 }}
addons.stackhpc.com/watch: ""
stringData:
defaults: |
alertmanager:
# Don't apply the namespace grouping by default
config:
route:
group_by: []
alertmanagerSpec:
image:
registry: {{ include "cluster-addons.imagePrefix" . }}quay.io
# Make sure that alertmanager finds configurations with the alertmanager name as a label
alertmanagerConfigSelector:
matchLabels:
alertmanager: kube-prometheus-stack-alertmanager
# Do NOT add the namespace matcher to routes from AlertmanagerConfig resources
alertmanagerConfigMatcherStrategy:
type: None
prometheusOperator:
admissionWebhooks:
patch:
image:
registry: {{ include "cluster-addons.imagePrefix" . }}registry.k8s.io
image:
registry: {{ include "cluster-addons.imagePrefix" . }}quay.io
prometheusConfigReloader:
image:
registry: {{ include "cluster-addons.imagePrefix" . }}quay.io
thanosImage:
registry: {{ include "cluster-addons.imagePrefix" . }}quay.io
prometheus:
prometheusSpec:
image:
registry: {{ include "cluster-addons.imagePrefix" . }}quay.io
# Tell Prometheus to pick up all monitors, regardless of labels
podMonitorSelectorNilUsesHelmValues: false
serviceMonitorSelectorNilUsesHelmValues: false
{{-
$storageSize := dig
"prometheus"
"prometheusSpec"
"storageSpec"
"volumeClaimTemplate"
"spec"
"resources"
"requests"
"storage"
""
.Values.monitoring.kubePrometheusStack.release.values
}}
{{- if $storageSize }}
# Set the retention size to 95% of the given volume size
{{- $storageAmount := mustRegexFind "^([0-9]*[.])?[0-9]+" $storageSize | float64 }}
{{- $storageUnits := mustRegexFind "(K|M|G|T|E|P)i?$" $storageSize }}
retentionSize: {{ mulf 0.95 $storageAmount }}{{ $storageUnits }}B
{{- end }}
thanosRuler:
thanosRulerSpec:
image:
registry: {{ include "cluster-addons.imagePrefix" . }}quay.io
kube-state-metrics:
image:
repository: {{ include "cluster-addons.imagePrefix" . }}registry.k8s.io/kube-state-metrics/kube-state-metrics
prometheus-node-exporter:
image:
registry: {{ include "cluster-addons.imagePrefix" . }}quay.io
grafana:
image:
repository: {{ include "cluster-addons.imagePrefix" . }}docker.io/grafana/grafana
sidecar:
image:
repository: {{ include "cluster-addons.imagePrefix" . }}quay.io/kiwigrid/k8s-sidecar
# Tell Grafana to include dashboards from all namespaces
dashboards:
searchNamespace: ALL
downloadDashboardsImage:
repository: {{ include "cluster-addons.imagePrefix" . }}docker.io/curlimages/curl
initChownData:
image:
repository: {{ include "cluster-addons.imagePrefix" . }}docker.io/busybox
imageRenderer:
image:
repository: {{ include "cluster-addons.imagePrefix" . }}docker.io/grafana/grafana-image-renderer
overrides: |
{{- toYaml .Values.monitoring.kubePrometheusStack.release.values | nindent 4 }}
---
apiVersion: addons.stackhpc.com/v1alpha1
kind: HelmRelease
metadata:
name: {{ include "cluster-addons.componentName" (list . "kube-prometheus-stack") }}
labels: {{ include "cluster-addons.componentLabels" (list . "kube-prometheus-stack") | nindent 4 }}
annotations:
# Tell Argo to ignore the non-controller owner references for this object
argocd.argoproj.io/sync-options: "ControllerReferencesOnly=true"
spec:
clusterName: {{ include "cluster-addons.clusterName" . }}
bootstrap: true
chart: {{ toYaml .Values.monitoring.kubePrometheusStack.chart | nindent 4 }}
targetNamespace: {{ .Values.monitoring.kubePrometheusStack.release.namespace }}
releaseName: kube-prometheus-stack
valuesSources:
- secret:
name: {{ include "cluster-addons.componentName" (list . "kube-prometheus-stack") }}-config
key: defaults
- secret:
name: {{ include "cluster-addons.componentName" (list . "kube-prometheus-stack") }}-config
key: overrides
---
apiVersion: addons.stackhpc.com/v1alpha1
kind: Manifests
metadata:
name: {{ include "cluster-addons.componentName" (list . "kube-prometheus-stack-dashboards") }}
labels: {{ include "cluster-addons.componentLabels" (list . "kube-prometheus-stack-dashboards") | nindent 4 }}
annotations:
# Tell Argo to ignore the non-controller owner references for this object
argocd.argoproj.io/sync-options: "ControllerReferencesOnly=true"
spec:
clusterName: {{ include "cluster-addons.clusterName" . }}
bootstrap: true
targetNamespace: {{ .Values.monitoring.kubePrometheusStack.release.namespace }}
releaseName: kube-prometheus-stack-dashboards
manifestSources:
- template: |
apiVersion: v1
kind: ConfigMap
metadata:
name: additional-grafana-dashboards
labels:
grafana_dashboard: "1"
data:
nvidia-dcgm-exporter-dashboard.json: |
{{- .Files.Get "grafana-dashboards/nvidia-dcgm-exporter-dashboard_rev3.json" | nindent 12 }}
{{- end }}