diff --git a/charts/addon/templates/_helpers.tpl b/charts/addon/templates/_helpers.tpl index d90ae51..f37aa0c 100644 --- a/charts/addon/templates/_helpers.tpl +++ b/charts/addon/templates/_helpers.tpl @@ -311,6 +311,8 @@ hooks: # These should include environment variables, volume mounts etc. if they need # to target a remote cluster using kubeconfigSecret extraInitContainers: [] +# Indicates whether a pre-delete hook should be generated for the addon +generatePreDeleteHook: true backoffLimit: 1000 activeDeadlineSeconds: 3600 podSecurityContext: @@ -343,8 +345,10 @@ pre-upgrade hook is produced to uninstall the addon. {{- include "addon.config.secret" (list $ctx $name $config) }} --- {{- include "addon.job.install" (list $ctx $name $config) }} +{{- if $config.generatePreDeleteHook }} --- {{- include "addon.job.uninstall" (list $ctx $name "pre-delete" $config) }} +{{- end }} {{- else if $ctx.Release.IsUpgrade }} {{- $secretName := include "addon.fullname" (list $ctx $name) | printf "%s-config" }} {{- if lookup "v1" "Secret" $ctx.Release.Namespace $secretName }} diff --git a/charts/addon/templates/_job-install.tpl b/charts/addon/templates/_job-install.tpl index 6b4f29f..364f648 100644 --- a/charts/addon/templates/_job-install.tpl +++ b/charts/addon/templates/_job-install.tpl @@ -53,8 +53,8 @@ template: - name: config mountPath: /config readOnly: true - {{- range $dep := $config.dependsOn }} - - name: wait-for-{{ $dep }} + {{- if $config.dependsOn }} + - name: wait-for-dependencies image: {{ printf "%s:%s" $config.image.repository (default $ctx.Chart.AppVersion $config.image.tag) }} imagePullPolicy: {{ $config.image.pullPolicy }} securityContext: {{ toYaml $config.securityContext | nindent 10 }} @@ -63,6 +63,7 @@ template: - -c - | set -ex + {{- range $dep := $config.dependsOn }} {{- $labels := include "addon.job.selectorLabels" (list $ctx $dep "install") | fromYaml }} {{- range $i, $label := (keys $labels | sortAlpha) -}} {{- if $i }} @@ -72,6 +73,7 @@ template: {{- end }} {{- end }} kubectl wait --for=condition=Complete job -n {{ $ctx.Release.Namespace }} -l "$LABELS" --all --timeout=-1s + {{- end }} resources: {{ toYaml $config.resources | nindent 10 }} {{- end }} {{- range $config.extraInitContainers }} @@ -141,7 +143,11 @@ apiVersion: batch/v1 kind: Job metadata: {{- $checksum := include "addon.job.install.spec" . | sha256sum }} - {{- $jobName := printf "%s-%s" (include "addon.job.name" (list $ctx $name "install")) (trunc 5 $checksum) }} + {{- + $jobName := printf "%s-%s" + (include "addon.job.name" (list $ctx $name "install") | trunc 57 | trimSuffix "-") + (trunc 5 $checksum) + }} name: {{ $jobName }} labels: {{ include "addon.job.labels" (list $ctx $name "install") | nindent 4 }} spec: diff --git a/charts/cluster-addons/templates/_helpers.tpl b/charts/cluster-addons/templates/_helpers.tpl index c22f69f..c5bc271 100644 --- a/charts/cluster-addons/templates/_helpers.tpl +++ b/charts/cluster-addons/templates/_helpers.tpl @@ -90,6 +90,9 @@ extraInitContainers: - "-1s" resources: {{ toYaml $ctx.Values.jobDefaults.resources | nindent 6 }} {{- end }} +# If the addons are deployed as part of a Cluster API cluster, suppress the pre-delete hooks +# If the cluster no longer exists, then neither do the addons! +generatePreDeleteHook: {{ not $ctx.Values.clusterApi | toYaml }} {{- end }} {{/* @@ -118,6 +121,10 @@ Determines if an addon is enabled given the name. {{- $ctx.Values.metricsServer.enabled | toYaml -}} {{- else if eq $name "monitoring" -}} {{- $ctx.Values.monitoring.enabled | toYaml -}} +{{- else if eq $name "nfd" -}} +{{- $ctx.Values.nfd.enabled | toYaml -}} +{{- else if eq $name "nvidia-gpu-operator" -}} +{{- $ctx.Values.nvidiaGPUOperator.enabled | toYaml -}} {{- else if hasKey $ctx.Values.extraAddons $name -}} {{- dig $name "enabled" true $ctx.Values.extraAddons | toYaml -}} {{- else -}} @@ -139,6 +146,8 @@ value: {{- else if eq $name "monitoring" }} - storage - ingress + {{- else if eq $name "nvidia-gpu-operator" }} + - nfd {{- else if hasKey $ctx.Values.extraAddons $name }} {{- dig $name "dependsOn" list $ctx.Values.extraAddons | toYaml | nindent 2 }} {{- else }} diff --git a/charts/cluster-addons/templates/nfd.yaml b/charts/cluster-addons/templates/nfd.yaml new file mode 100644 index 0000000..931b93c --- /dev/null +++ b/charts/cluster-addons/templates/nfd.yaml @@ -0,0 +1,13 @@ +{{- define "cluster-addons.nfd.config" -}} +{{- include "cluster-addons.job.defaults" (list . "nfd") }} +installType: helm +helm: {{ omit .Values.nfd "enabled" | toYaml | nindent 2 }} +{{- end }} + +{{- + include "addon.job" (list + . + "nfd" + "cluster-addons.nfd.config" + ) +}} diff --git a/charts/cluster-addons/templates/nvidia-gpu-operator.yaml b/charts/cluster-addons/templates/nvidia-gpu-operator.yaml new file mode 100644 index 0000000..5c9e0a8 --- /dev/null +++ b/charts/cluster-addons/templates/nvidia-gpu-operator.yaml @@ -0,0 +1,13 @@ +{{- define "cluster-addons.nvidia-gpu-operator.config" -}} +{{- include "cluster-addons.job.defaults" (list . "nvidia-gpu-operator") }} +installType: helm +helm: {{ omit .Values.nvidiaGPUOperator "enabled" | toYaml | nindent 2 }} +{{- end }} + +{{- + include "addon.job" (list + . + "nvidia-gpu-operator" + "cluster-addons.nvidia-gpu-operator.config" + ) +}} diff --git a/charts/cluster-addons/templates/purge-cloud-resources.yaml b/charts/cluster-addons/templates/purge-cloud-resources.yaml new file mode 100644 index 0000000..b821d12 --- /dev/null +++ b/charts/cluster-addons/templates/purge-cloud-resources.yaml @@ -0,0 +1,102 @@ +{{- if and .Values.clusterApi .Values.openstack.enabled }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ printf "%s-%s" (include "cluster-addons.fullname" .) "purge-cloud-resources" | trunc 63 | trimSuffix "-" }} + labels: {{ include "cluster-addons.labels" . | nindent 4 }} + annotations: + helm.sh/hook: pre-delete + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded +spec: + backoffLimit: {{ .Values.jobDefaults.backoffLimit }} + activeDeadlineSeconds: {{ .Values.jobDefaults.activeDeadlineSeconds }} + template: + metadata: + labels: {{ include "cluster-addons.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.jobDefaults.imagePullSecrets }} + imagePullSecrets: {{ toYaml . | nindent 8 }} + {{- end }} + securityContext: {{ toYaml .Values.jobDefaults.podSecurityContext | nindent 8 }} + restartPolicy: OnFailure + serviceAccountName: {{ tpl .Values.serviceAccount.name . }} + {{- if .Values.kubeconfigSecret.name }} + # Use an init container to install the kubeconfig file from the specified secret if required + # We don't use a regular volume for this because we need the hook not to block in the case + # where the secret is not available + initContainers: + - name: install-kubeconfig + image: {{ + printf "%s:%s" + .Values.jobDefaults.image.repository + (default .Chart.AppVersion .Values.jobDefaults.image.tag) + }} + imagePullPolicy: {{ .Values.jobDefaults.image.pullPolicy }} + securityContext: {{ toYaml .Values.jobDefaults.securityContext | nindent 12 }} + args: + - /bin/bash + - -c + - | + set -ex + get_kubeconfig() { + kubectl get secret {{ tpl .Values.kubeconfigSecret.name . }} \ + -n {{ .Release.Namespace }} \ + -o go-template='{{ printf "{{ index .data \"%s\" | base64decode }}" .Values.kubeconfigSecret.key }}' \ + > /config/auth/kubeconfig + } + get_kubeconfig || true + resources: {{ toYaml .Values.jobDefaults.resources | nindent 12 }} + volumeMounts: + - name: kubeconfig + mountPath: /config/auth + {{- end }} + containers: + - name: purge-cloud-resources + image: {{ + printf "%s:%s" + .Values.jobDefaults.image.repository + (default .Chart.AppVersion .Values.jobDefaults.image.tag) + }} + imagePullPolicy: {{ .Values.jobDefaults.image.pullPolicy }} + securityContext: {{ toYaml .Values.jobDefaults.securityContext | nindent 12 }} + # We can only make a best effort to delete the resources as we don't want the hook to block + # So we bail without an error if the kubeconfig doesn't exist, the API is not reachable or + # the deletion fails + args: + - /bin/bash + - -c + - | + set -x + {{- if .Values.kubeconfigSecret.name }} + test -f "$KUBECONFIG" || exit 0 + {{- end }} + kubectl version || exit 0 + for ns in $(kubectl get ns -o jsonpath='{.items[*].metadata.name}'); do + for svc in $(kubectl get svc -n "$ns" -o jsonpath='{.items[?(@.spec.type == "LoadBalancer")].metadata.name}'); do + kubectl delete svc "$svc" -n "$ns" || true + done + done + {{- if .Values.kubeconfigSecret.name }} + env: + - name: KUBECONFIG + value: /config/auth/kubeconfig + {{- end }} + resources: {{ toYaml .Values.jobDefaults.resources | nindent 12 }} + volumeMounts: + - name: kubeconfig + mountPath: /config/auth + readOnly: true + hostNetwork: {{ .Values.jobDefaults.hostNetwork }} + {{- with .Values.jobDefaults.nodeSelector }} + nodeSelector: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.jobDefaults.affinity }} + affinity: {{ toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.jobDefaults.tolerations }} + tolerations: {{ toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: kubeconfig + emptyDir: {} +{{- end }} diff --git a/charts/cluster-addons/values.yaml b/charts/cluster-addons/values.yaml index cf1684d..1fc0f06 100644 --- a/charts/cluster-addons/values.yaml +++ b/charts/cluster-addons/values.yaml @@ -47,25 +47,24 @@ serviceAccount: # This is treated as a template during rendering name: "{{ include \"cluster-addons.fullname\" . }}-deployer" -# Defaults for job settings -# In all cases, the defaults for the version of the addons chart in use are used -# See the values for the addons chart for details +# Default settings for jobs jobDefaults: image: repository: ghcr.io/stackhpc/k8s-utils tag: # Defaults to chart appVersion if not given pullPolicy: IfNotPresent + imagePullSecrets: [] + backoffLimit: 1000 + activeDeadlineSeconds: 3600 + podSecurityContext: + runAsNonRoot: true securityContext: allowPrivilegeEscalation: false resources: {} - # imagePullSecrets: - # backoffLimit: - # activeDeadlineSeconds: - # podSecurityContext: - # hostNetwork: - # tolerations: - # nodeSelector: - # affinity: + hostNetwork: false + tolerations: [] + nodeSelector: {} + affinity: {} # The available categories for dependencies and the addons that belong to them categories: @@ -266,5 +265,65 @@ monitoring: namespace: monitoring-system values: {} +# Settings for node feature discovery (NFD) +nfd: + # Indicates if node feature discovery should be enabled + enabled: true + chart: + repo: https://kubernetes-sigs.github.io/node-feature-discovery/charts + name: node-feature-discovery + version: 0.10.1 + release: + namespace: node-feature-discovery + values: + master: + extraLabelNs: + - nvidia.com + worker: + # Allow the NFD pods to be scheduled on master nodes + tolerations: + - key: "node-role.kubernetes.io/master" + operator: "Equal" + value: "" + effect: "NoSchedule" + - key: "nvidia.com/gpu" + operator: "Equal" + value: "present" + effect: "NoSchedule" + # We want to be able to identify nodes with high-performance hardware + # So the whitelisted device classes are: + # 02 - Network Controllers (e.g. Ethernet, Infiniband) + # 03 - Display Controllers (e.g. GPUs) + # 0b40 - Co-processors + # 12 - Processing Accelerators (e.g. specialised AI inference chips) + config: + sources: + pci: + deviceClassWhitelist: + - "02" + - "03" + - "0b40" + - "12" + deviceLabelFields: + - vendor + +# Settings for the NVIDIA GPU operator +nvidiaGPUOperator: + # Indicates if the NVIDIA GPU operator should be enabled + # Note that because it uses node feature discovery to run only on nodes + # with an NVIDIA GPU available, the overhead of enabling this on clusters + # that do not need it now but may need it in the future is low + enabled: true + chart: + repo: https://nvidia.github.io/gpu-operator + name: gpu-operator + version: v1.9.1 + release: + namespace: gpu-operator + values: + # Use the shared NFD + nfd: + enabled: false + # Map of extra addons in the form "component name" -> "addon spec" extraAddons: {} diff --git a/utils/Dockerfile b/utils/Dockerfile index 6bc6642..737134b 100644 --- a/utils/Dockerfile +++ b/utils/Dockerfile @@ -4,10 +4,11 @@ ENV UTILS_UID 1001 ENV UTILS_GID 1001 ENV UTILS_USER utils ENV UTILS_GROUP utils +ENV UTILS_HOME /home/utils RUN groupadd --gid $UTILS_GID $UTILS_GROUP && \ useradd \ - --no-create-home \ - --no-user-group \ + --home-dir $UTILS_HOME \ + --create-home \ --gid $UTILS_GID \ --shell /sbin/nologin \ --uid $UTILS_UID \ @@ -100,5 +101,6 @@ ENV KUBECTL_VN_LATEST v1.23 COPY ./bin/* /usr/bin/ USER $UTILS_UID +WORKDIR $UTILS_HOME ENTRYPOINT ["tini", "-g", "--"] CMD ["bash"]