Add NFD + NVIDIA GPU operator by default

2022-02-02 13:58:58 +00:00 · 2022-02-02 13:58:58 +00:00 · e736e0bb8c
commit e736e0bb8c
parent 65b7380864
8 changed files with 224 additions and 16 deletions
--- a/charts/addon/templates/_helpers.tpl
+++ b/charts/addon/templates/_helpers.tpl
@ -311,6 +311,8 @@ hooks:
 # These should include environment variables, volume mounts etc. if they need
 # to target a remote cluster using kubeconfigSecret
 extraInitContainers: []
+# Indicates whether a pre-delete hook should be generated for the addon
+generatePreDeleteHook: true
 backoffLimit: 1000
 activeDeadlineSeconds: 3600
 podSecurityContext:
@ -343,8 +345,10 @@ pre-upgrade hook is produced to uninstall the addon.
 {{- include "addon.config.secret" (list $ctx $name $config) }}
 ---
 {{- include "addon.job.install" (list $ctx $name $config) }}
+{{- if $config.generatePreDeleteHook }}
 ---
 {{- include "addon.job.uninstall" (list $ctx $name "pre-delete" $config) }}
+{{- end }}
 {{- else if $ctx.Release.IsUpgrade }}
 {{- $secretName := include "addon.fullname" (list $ctx $name) | printf "%s-config" }}
 {{- if lookup "v1" "Secret" $ctx.Release.Namespace $secretName }}
--- a/charts/addon/templates/_job-install.tpl
+++ b/charts/addon/templates/_job-install.tpl
@ -53,8 +53,8 @@ template:
          - name: config
            mountPath: /config
            readOnly: true
-      {{- range $dep := $config.dependsOn }}
-      - name: wait-for-{{ $dep }}
+      {{- if $config.dependsOn }}
+      - name: wait-for-dependencies
        image: {{ printf "%s:%s" $config.image.repository (default $ctx.Chart.AppVersion $config.image.tag) }}
        imagePullPolicy: {{ $config.image.pullPolicy }}
        securityContext: {{ toYaml $config.securityContext | nindent 10 }}
@ -63,6 +63,7 @@ template:
          - -c
          - |
              set -ex
+              {{- range $dep := $config.dependsOn }}
              {{- $labels := include "addon.job.selectorLabels" (list $ctx $dep "install") | fromYaml }}
              {{- range $i, $label := (keys $labels | sortAlpha) -}}
              {{- if $i }}
@ -72,6 +73,7 @@ template:
              {{- end }}
              {{- end }}
              kubectl wait --for=condition=Complete job -n {{ $ctx.Release.Namespace }} -l "$LABELS" --all --timeout=-1s
+              {{- end }}
        resources: {{ toYaml $config.resources | nindent 10 }}
      {{- end }}
      {{- range $config.extraInitContainers }}
@ -141,7 +143,11 @@ apiVersion: batch/v1
 kind: Job
 metadata:
  {{- $checksum := include "addon.job.install.spec" . | sha256sum }}
-  {{- $jobName := printf "%s-%s" (include "addon.job.name" (list $ctx $name "install")) (trunc 5 $checksum) }}
+  {{-
+    $jobName := printf "%s-%s"
+      (include "addon.job.name" (list $ctx $name "install") | trunc 57 | trimSuffix "-")
+      (trunc 5 $checksum)
+  }}
  name: {{ $jobName }}
  labels: {{ include "addon.job.labels" (list $ctx $name "install") | nindent 4 }}
 spec:
--- a/charts/cluster-addons/templates/_helpers.tpl
+++ b/charts/cluster-addons/templates/_helpers.tpl
@ -90,6 +90,9 @@ extraInitContainers:
      - "-1s"
    resources: {{ toYaml $ctx.Values.jobDefaults.resources | nindent 6 }}
 {{- end }}
+# If the addons are deployed as part of a Cluster API cluster, suppress the pre-delete hooks
+# If the cluster no longer exists, then neither do the addons!
+generatePreDeleteHook: {{ not $ctx.Values.clusterApi | toYaml }}
 {{- end }}

 {{/*
@ -118,6 +121,10 @@ Determines if an addon is enabled given the name.
 {{- $ctx.Values.metricsServer.enabled | toYaml -}}
 {{- else if eq $name "monitoring" -}}
 {{- $ctx.Values.monitoring.enabled | toYaml -}}
+{{- else if eq $name "nfd" -}}
+{{- $ctx.Values.nfd.enabled | toYaml -}}
+{{- else if eq $name "nvidia-gpu-operator" -}}
+{{- $ctx.Values.nvidiaGPUOperator.enabled | toYaml -}}
 {{- else if hasKey $ctx.Values.extraAddons $name -}}
 {{- dig $name "enabled" true $ctx.Values.extraAddons | toYaml -}}
 {{- else -}}
@ -139,6 +146,8 @@ value:
  {{- else if eq $name "monitoring" }}
  - storage
  - ingress
+  {{- else if eq $name "nvidia-gpu-operator" }}
+  - nfd
  {{- else if hasKey $ctx.Values.extraAddons $name }}
  {{- dig $name "dependsOn" list $ctx.Values.extraAddons | toYaml | nindent 2 }}
  {{- else }}
--- a/charts/cluster-addons/templates/nfd.yaml
+++ b/charts/cluster-addons/templates/nfd.yaml
@ -0,0 +1,13 @@
+{{- define "cluster-addons.nfd.config" -}}
+{{- include "cluster-addons.job.defaults" (list . "nfd") }}
+installType: helm
+helm: {{ omit .Values.nfd "enabled" | toYaml | nindent 2 }}
+{{- end }}
+
+{{-
+  include "addon.job" (list
+    .
+    "nfd"
+    "cluster-addons.nfd.config"
+  )
+}}
--- a/charts/cluster-addons/templates/nvidia-gpu-operator.yaml
+++ b/charts/cluster-addons/templates/nvidia-gpu-operator.yaml
@ -0,0 +1,13 @@
+{{- define "cluster-addons.nvidia-gpu-operator.config" -}}
+{{- include "cluster-addons.job.defaults" (list . "nvidia-gpu-operator") }}
+installType: helm
+helm: {{ omit .Values.nvidiaGPUOperator "enabled" | toYaml | nindent 2 }}
+{{- end }}
+
+{{-
+  include "addon.job" (list
+    .
+    "nvidia-gpu-operator"
+    "cluster-addons.nvidia-gpu-operator.config"
+  )
+}}
--- a/charts/cluster-addons/templates/purge-cloud-resources.yaml
+++ b/charts/cluster-addons/templates/purge-cloud-resources.yaml
@ -0,0 +1,102 @@
+{{- if and .Values.clusterApi .Values.openstack.enabled }}
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: {{ printf "%s-%s" (include "cluster-addons.fullname" .) "purge-cloud-resources" | trunc 63 | trimSuffix "-" }}
+  labels: {{ include "cluster-addons.labels" . | nindent 4 }}
+  annotations:
+    helm.sh/hook: pre-delete
+    helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
+spec:
+  backoffLimit: {{ .Values.jobDefaults.backoffLimit }}
+  activeDeadlineSeconds: {{ .Values.jobDefaults.activeDeadlineSeconds }}
+  template:
+    metadata:
+      labels: {{ include "cluster-addons.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.jobDefaults.imagePullSecrets }}
+      imagePullSecrets: {{ toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext: {{ toYaml .Values.jobDefaults.podSecurityContext | nindent 8 }}
+      restartPolicy: OnFailure
+      serviceAccountName: {{ tpl .Values.serviceAccount.name . }}
+      {{- if .Values.kubeconfigSecret.name }}
+      # Use an init container to install the kubeconfig file from the specified secret if required
+      # We don't use a regular volume for this because we need the hook not to block in the case
+      # where the secret is not available
+      initContainers:
+        - name: install-kubeconfig
+          image: {{
+            printf "%s:%s"
+              .Values.jobDefaults.image.repository
+              (default .Chart.AppVersion .Values.jobDefaults.image.tag)
+          }}
+          imagePullPolicy: {{ .Values.jobDefaults.image.pullPolicy }}
+          securityContext: {{ toYaml .Values.jobDefaults.securityContext | nindent 12 }}
+          args:
+            - /bin/bash
+            - -c
+            - |
+                set -ex
+                get_kubeconfig() {
+                  kubectl get secret {{ tpl .Values.kubeconfigSecret.name . }} \
+                    -n {{ .Release.Namespace }} \
+                    -o go-template='{{ printf "{{ index .data \"%s\" | base64decode }}" .Values.kubeconfigSecret.key }}' \
+                    > /config/auth/kubeconfig
+                }
+                get_kubeconfig || true
+          resources: {{ toYaml .Values.jobDefaults.resources | nindent 12 }}
+          volumeMounts:
+            - name: kubeconfig
+              mountPath: /config/auth
+      {{- end }}
+      containers:
+        - name: purge-cloud-resources
+          image: {{
+            printf "%s:%s"
+              .Values.jobDefaults.image.repository
+              (default .Chart.AppVersion .Values.jobDefaults.image.tag)
+          }}
+          imagePullPolicy: {{ .Values.jobDefaults.image.pullPolicy }}
+          securityContext: {{ toYaml .Values.jobDefaults.securityContext | nindent 12 }}
+          # We can only make a best effort to delete the resources as we don't want the hook to block
+          # So we bail without an error if the kubeconfig doesn't exist, the API is not reachable or
+          # the deletion fails
+          args:
+            - /bin/bash
+            - -c
+            - |
+                set -x
+                {{- if .Values.kubeconfigSecret.name }}
+                test -f "$KUBECONFIG" || exit 0
+                {{- end }}
+                kubectl version || exit 0
+                for ns in $(kubectl get ns -o jsonpath='{.items[*].metadata.name}'); do
+                    for svc in $(kubectl get svc -n "$ns" -o jsonpath='{.items[?(@.spec.type == "LoadBalancer")].metadata.name}'); do
+                        kubectl delete svc "$svc" -n "$ns" || true
+                    done
+                done
+          {{- if .Values.kubeconfigSecret.name }}
+          env:
+            - name: KUBECONFIG
+              value: /config/auth/kubeconfig
+          {{- end }}
+          resources: {{ toYaml .Values.jobDefaults.resources | nindent 12 }}
+          volumeMounts:
+            - name: kubeconfig
+              mountPath: /config/auth
+              readOnly: true
+      hostNetwork: {{ .Values.jobDefaults.hostNetwork }}
+      {{- with .Values.jobDefaults.nodeSelector }}
+      nodeSelector: {{ toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.jobDefaults.affinity }}
+      affinity: {{ toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.jobDefaults.tolerations }}
+      tolerations: {{ toYaml . | nindent 8 }}
+      {{- end }}
+      volumes:
+        - name: kubeconfig
+          emptyDir: {}
+{{- end }}
--- a/charts/cluster-addons/values.yaml
+++ b/charts/cluster-addons/values.yaml
@ -47,25 +47,24 @@ serviceAccount:
  # This is treated as a template during rendering
  name: "{{ include \"cluster-addons.fullname\" . }}-deployer"

-# Defaults for job settings
-# In all cases, the defaults for the version of the addons chart in use are used
-# See the values for the addons chart for details
+# Default settings for jobs
 jobDefaults:
  image:
    repository: ghcr.io/stackhpc/k8s-utils
    tag:  # Defaults to chart appVersion if not given
    pullPolicy: IfNotPresent
+  imagePullSecrets: []
+  backoffLimit: 1000
+  activeDeadlineSeconds: 3600
+  podSecurityContext:
+    runAsNonRoot: true
  securityContext:
    allowPrivilegeEscalation: false
  resources: {}
-  # imagePullSecrets:
-  # backoffLimit:
-  # activeDeadlineSeconds:
-  # podSecurityContext:
-  # hostNetwork:
-  # tolerations:
-  # nodeSelector:
-  # affinity:
+  hostNetwork: false
+  tolerations: []
+  nodeSelector: {}
+  affinity: {}

 # The available categories for dependencies and the addons that belong to them
 categories:
@ -266,5 +265,65 @@ monitoring:
    namespace: monitoring-system
    values: {}

+# Settings for node feature discovery (NFD)
+nfd:
+  # Indicates if node feature discovery should be enabled
+  enabled: true
+  chart:
+    repo: https://kubernetes-sigs.github.io/node-feature-discovery/charts
+    name: node-feature-discovery
+    version: 0.10.1
+  release:
+    namespace: node-feature-discovery
+    values:
+      master:
+        extraLabelNs:
+          - nvidia.com
+      worker:
+        # Allow the NFD pods to be scheduled on master nodes
+        tolerations:
+          - key: "node-role.kubernetes.io/master"
+            operator: "Equal"
+            value: ""
+            effect: "NoSchedule"
+          - key: "nvidia.com/gpu"
+            operator: "Equal"
+            value: "present"
+            effect: "NoSchedule"
+        # We want to be able to identify nodes with high-performance hardware
+        # So the whitelisted device classes are:
+        #   02   - Network Controllers (e.g. Ethernet, Infiniband)
+        #   03   - Display Controllers (e.g. GPUs)
+        #   0b40 - Co-processors
+        #   12   - Processing Accelerators (e.g. specialised AI inference chips)
+        config:
+          sources:
+            pci:
+              deviceClassWhitelist:
+                - "02"
+                - "03"
+                - "0b40"
+                - "12"
+              deviceLabelFields:
+                - vendor
+
+# Settings for the NVIDIA GPU operator
+nvidiaGPUOperator:
+  # Indicates if the NVIDIA GPU operator should be enabled
+  # Note that because it uses node feature discovery to run only on nodes
+  # with an NVIDIA GPU available, the overhead of enabling this on clusters
+  # that do not need it now but may need it in the future is low
+  enabled: true
+  chart:
+    repo: https://nvidia.github.io/gpu-operator
+    name: gpu-operator
+    version: v1.9.1
+  release:
+    namespace: gpu-operator
+    values:
+      # Use the shared NFD
+      nfd:
+        enabled: false
+
 # Map of extra addons in the form "component name" -> "addon spec"
 extraAddons: {}
--- a/utils/Dockerfile
+++ b/utils/Dockerfile
@ -4,10 +4,11 @@ ENV UTILS_UID 1001
 ENV UTILS_GID 1001
 ENV UTILS_USER utils
 ENV UTILS_GROUP utils
+ENV UTILS_HOME /home/utils
 RUN groupadd --gid $UTILS_GID $UTILS_GROUP && \
    useradd \
-      --no-create-home \
-      --no-user-group \
+      --home-dir $UTILS_HOME \
+      --create-home \
      --gid $UTILS_GID \
      --shell /sbin/nologin \
      --uid $UTILS_UID \
@ -100,5 +101,6 @@ ENV KUBECTL_VN_LATEST v1.23
 COPY ./bin/* /usr/bin/

 USER $UTILS_UID
+WORKDIR $UTILS_HOME
 ENTRYPOINT ["tini", "-g", "--"]
 CMD ["bash"]