142 lines
5.5 KiB
YAML
142 lines
5.5 KiB
YAML
{{- if .Values.monitoring.enabled }}
|
|
---
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata:
|
|
name: {{ include "cluster-addons.componentName" (list . "kube-prometheus-stack") }}-config
|
|
labels:
|
|
{{- include "cluster-addons.componentLabels" (list . "kube-prometheus-stack") | nindent 4 }}
|
|
addons.stackhpc.com/watch: ""
|
|
stringData:
|
|
defaults: |
|
|
alertmanager:
|
|
# Don't apply the namespace grouping by default
|
|
config:
|
|
route:
|
|
group_by: []
|
|
alertmanagerSpec:
|
|
image:
|
|
registry: {{ include "cluster-addons.imagePrefix" . }}quay.io
|
|
# Make sure that alertmanager finds configurations with the alertmanager name as a label
|
|
alertmanagerConfigSelector:
|
|
matchLabels:
|
|
alertmanager: kube-prometheus-stack-alertmanager
|
|
# Do NOT add the namespace matcher to routes from AlertmanagerConfig resources
|
|
alertmanagerConfigMatcherStrategy:
|
|
type: None
|
|
prometheusOperator:
|
|
admissionWebhooks:
|
|
patch:
|
|
image:
|
|
registry: {{ include "cluster-addons.imagePrefix" . }}registry.k8s.io
|
|
image:
|
|
registry: {{ include "cluster-addons.imagePrefix" . }}quay.io
|
|
prometheusConfigReloader:
|
|
image:
|
|
registry: {{ include "cluster-addons.imagePrefix" . }}quay.io
|
|
thanosImage:
|
|
registry: {{ include "cluster-addons.imagePrefix" . }}quay.io
|
|
prometheus:
|
|
prometheusSpec:
|
|
image:
|
|
registry: {{ include "cluster-addons.imagePrefix" . }}quay.io
|
|
# Tell Prometheus to pick up all monitors, regardless of labels
|
|
podMonitorSelectorNilUsesHelmValues: false
|
|
serviceMonitorSelectorNilUsesHelmValues: false
|
|
{{-
|
|
$storageSize := dig
|
|
"prometheus"
|
|
"prometheusSpec"
|
|
"storageSpec"
|
|
"volumeClaimTemplate"
|
|
"spec"
|
|
"resources"
|
|
"requests"
|
|
"storage"
|
|
""
|
|
.Values.monitoring.kubePrometheusStack.release.values
|
|
}}
|
|
{{- if $storageSize }}
|
|
# Set the retention size to 95% of the given volume size
|
|
{{- $storageAmount := mustRegexFind "^([0-9]*[.])?[0-9]+" $storageSize | float64 }}
|
|
{{- $storageUnits := mustRegexFind "(K|M|G|T|E|P)i?$" $storageSize }}
|
|
retentionSize: {{ mulf 0.95 $storageAmount }}{{ $storageUnits }}B
|
|
{{- end }}
|
|
thanosRuler:
|
|
thanosRulerSpec:
|
|
image:
|
|
registry: {{ include "cluster-addons.imagePrefix" . }}quay.io
|
|
kube-state-metrics:
|
|
image:
|
|
repository: {{ include "cluster-addons.imagePrefix" . }}registry.k8s.io/kube-state-metrics/kube-state-metrics
|
|
prometheus-node-exporter:
|
|
image:
|
|
registry: {{ include "cluster-addons.imagePrefix" . }}quay.io
|
|
grafana:
|
|
image:
|
|
repository: {{ include "cluster-addons.imagePrefix" . }}docker.io/grafana/grafana
|
|
sidecar:
|
|
image:
|
|
repository: {{ include "cluster-addons.imagePrefix" . }}quay.io/kiwigrid/k8s-sidecar
|
|
# Tell Grafana to include dashboards from all namespaces
|
|
dashboards:
|
|
searchNamespace: ALL
|
|
downloadDashboardsImage:
|
|
repository: {{ include "cluster-addons.imagePrefix" . }}docker.io/curlimages/curl
|
|
initChownData:
|
|
image:
|
|
repository: {{ include "cluster-addons.imagePrefix" . }}docker.io/busybox
|
|
imageRenderer:
|
|
image:
|
|
repository: {{ include "cluster-addons.imagePrefix" . }}docker.io/grafana/grafana-image-renderer
|
|
overrides: |
|
|
{{- toYaml .Values.monitoring.kubePrometheusStack.release.values | nindent 4 }}
|
|
---
|
|
apiVersion: addons.stackhpc.com/v1alpha1
|
|
kind: HelmRelease
|
|
metadata:
|
|
name: {{ include "cluster-addons.componentName" (list . "kube-prometheus-stack") }}
|
|
labels: {{ include "cluster-addons.componentLabels" (list . "kube-prometheus-stack") | nindent 4 }}
|
|
annotations:
|
|
# Tell Argo to ignore the non-controller owner references for this object
|
|
argocd.argoproj.io/sync-options: "ControllerReferencesOnly=true"
|
|
spec:
|
|
clusterName: {{ include "cluster-addons.clusterName" . }}
|
|
bootstrap: true
|
|
chart: {{ toYaml .Values.monitoring.kubePrometheusStack.chart | nindent 4 }}
|
|
targetNamespace: {{ .Values.monitoring.kubePrometheusStack.release.namespace }}
|
|
releaseName: kube-prometheus-stack
|
|
valuesSources:
|
|
- secret:
|
|
name: {{ include "cluster-addons.componentName" (list . "kube-prometheus-stack") }}-config
|
|
key: defaults
|
|
- secret:
|
|
name: {{ include "cluster-addons.componentName" (list . "kube-prometheus-stack") }}-config
|
|
key: overrides
|
|
---
|
|
apiVersion: addons.stackhpc.com/v1alpha1
|
|
kind: Manifests
|
|
metadata:
|
|
name: {{ include "cluster-addons.componentName" (list . "kube-prometheus-stack-dashboards") }}
|
|
labels: {{ include "cluster-addons.componentLabels" (list . "kube-prometheus-stack-dashboards") | nindent 4 }}
|
|
annotations:
|
|
# Tell Argo to ignore the non-controller owner references for this object
|
|
argocd.argoproj.io/sync-options: "ControllerReferencesOnly=true"
|
|
spec:
|
|
clusterName: {{ include "cluster-addons.clusterName" . }}
|
|
bootstrap: true
|
|
targetNamespace: {{ .Values.monitoring.kubePrometheusStack.release.namespace }}
|
|
releaseName: kube-prometheus-stack-dashboards
|
|
manifestSources:
|
|
- template: |
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: additional-grafana-dashboards
|
|
labels:
|
|
grafana_dashboard: "1"
|
|
data:
|
|
nvidia-dcgm-exporter-dashboard.json: |
|
|
{{- .Files.Get "grafana-dashboards/nvidia-dcgm-exporter-dashboard_rev3.json" | nindent 12 }}
|
|
{{- end }}
|