magnum-capi-helm-charts/charts/openstack-cluster/values.yaml

# Configuration that is shared between the cluster and the addons
global:
  # The Kubernetes version of the cluster
  # This should match the version of kubelet and kubeadm in the image
  kubernetesVersion:

# The name of an existing secret containing a clouds.yaml and optional cacert
cloudCredentialsSecretName:
# OR
# Content for the clouds.yaml file
# Having this as a top-level item allows a clouds.yaml file from OpenStack to be used as a values file
clouds:
# The PEM-encoded CA certificate for the specified cloud
cloudCACert:

# The name of the cloud to use from the specified clouds.yaml
cloudName: openstack

# The name of the image to use for cluster machines
# This is used when creating machines using ephemeral root disks
machineImage:
# The ID of the image to use for cluster machines
# This is required when creating machines with volumes as root disks
machineImageId:

# The name of the SSH key to inject into cluster machines
machineSSHKeyName:

# Values for the Kubernetes cluster network
kubeNetwork:
  # By default, use the private network range 172.16.0.0/12 for the cluster network
  # We split it into two equally-sized blocks for pods and services
  # This gives ~500,000 addresses in each block
  pods:
    cidrBlocks:
      - 172.16.0.0/13
  services:
    cidrBlocks:
      - 172.24.0.0/13
  serviceDomain: cluster.local

# Settings for the OpenStack networking for the cluster
clusterNetworking:
  # Custom nameservers to use for the hosts
  dnsNameservers:
  # Indicates if security groups should be managed by the cluster
  manageSecurityGroups: true
  # Indicates if the managed security groups should allow all in-cluster traffic
  # The default CNI installed by the addons is Cilium, so this is true by default
  allowAllInClusterTraffic: true
  # The ID of the external network to use
  # If not given, the external network will be detected
  externalNetworkId:
  # Details of the internal network to use
  internalNetwork:
    # Filter to find an existing network for the cluster internal network
    # See Cluster API documentation for details
    networkFilter:
      # id: e63ca1a0-f69d-4fbf-b306-310857b1afe5
      # name: tenant-internal-net
    # Filter to find an existing subnet for the cluster internal network
    # See Cluster API documentation for details
    subnetFilter:
    # The CIDR to use if creating a cluster network
    # This is only used if neither of networkFilter and subnetFilter are given
    nodeCidr: 192.168.3.0/24

# Settings for registry mirrors
registryMirrors: {}
  # docker.io: ["https://registry.my.domain/v2/dockerhub-public"]

# Settings for node-level registry auth
registryAuth: {}
  # registry-1.docker.io:
  #   username: "<username>"
  #   password: "<password>"

# A map of trusted CAs to add to the system trust on cluster nodes
trustedCAs: {}
  # custom-ca: |
  #   -----BEGIN CERTIFICATE-----
  #   ...certificate data...
  #   -----END CERTIFICATE-----

# List of additional packages to install on cluster nodes
additionalPackages: []
  # - nfs-common

# Settings for the Kubernetes API server
apiServer:
  # Indicates whether to deploy a load balancer for the API server
  enableLoadBalancer: true
  # Indicates whether to associate a floating IP with the API server
  associateFloatingIP: true
  # The specific floating IP to associate with the API server
  # If not given, a new IP will be allocated if required
  floatingIP:
  # The specific fixed IP to associate with the API server
  # If enableLoadBalancer is true, this will become the VIP of the load balancer
  # If enableLoadBalancer and associateFloatingIP are both false, this should be
  # the IP of a pre-allocated port to be used as the VIP
  fixedIP:
  # The port to use for the API server
  port: 6443

# Settings for the control plane
controlPlane:
  # The failure domains to use for control plane nodes
  # If given, should be a list of availability zones
  # Only used when omitFailureDomain = false
  failureDomains:
  # Indicates whether the failure domain should be omitted from control plane nodes
  omitFailureDomain: true
  # The number of control plane machines to deploy
  # For high-availability, this should be greater than 1
  # For etcd quorum, it should be odd - usually 3, or 5 for very large clusters
  machineCount: 3
  # The flavor to use for control plane machines
  machineFlavor:
  # The networks and ports for control plane nodes
  # If neither networks or ports are given, the cluster internal network is used
  # See https://github.com/kubernetes-sigs/cluster-api-provider-openstack/blob/master/docs/book/src/clusteropenstack/configuration.md#network-filters
  machineNetworking:
    networks:
    ports:
  # The root volume spec for control plane machines
  # If not given, the ephemeral root disk from the flavor is used
  machineRootVolume:
    # # The size of the disk to use
    # diskSize:
    # # The volume type to use
    # # If not specified, the default volume type is used
    # volumeType:
    # # The volume availability zone to use
    # # If not specified, the machine availability zone is used
    # availabilityZone:
  # The time to wait for a node to finish draining before it can be removed
  nodeDrainTimeout: 5m
  # The rollout strategy to use for the control plane nodes
  # By default, the strategy allows the control plane to begin provisioning new nodes
  # without first tearing down old ones
  rolloutStrategy:
    type: RollingUpdate
    rollingUpdate:
      # For the control plane, this can only be 0 or 1
      maxSurge: 1
  # The kubeadm config specification for the control plane
  # By default, this uses a simple configuration that enables the external cloud provider
  kubeadmConfigSpec:
    initConfiguration:
      nodeRegistration:
        name: '{{ local_hostname }}'
        kubeletExtraArgs:
          cloud-provider: external
    # As well as enabling an external cloud provider, we set the bind addresses for the
    # etcd metrics, controller-manager, scheduler and kube-proxy to 0.0.0.0 so that Prometheus
    # can reach them to collect metrics
    clusterConfiguration:
      etcd:
        local:
          extraArgs:
            listen-metrics-urls: http://0.0.0.0:2381
      apiServer:
        extraArgs:
          cloud-provider: external
      controllerManager:
        extraArgs:
          cloud-provider: external
          bind-address: 0.0.0.0
      scheduler:
        extraArgs:
          bind-address: 0.0.0.0
    joinConfiguration:
      nodeRegistration:
        name: '{{ local_hostname }}'
        kubeletExtraArgs:
          cloud-provider: external
    kubeProxyConfiguration:
      metricsBindAddress: 0.0.0.0:10249
  # The machine health check for auto-healing of the control plane
  # See https://cluster-api.sigs.k8s.io/tasks/healthcheck.html
  healthCheck:
    # Indicates if the machine health check should be enabled
    enabled: true
    # The spec for the health check
    spec:
      # By default, unhealthy control plane nodes are always remediated
      maxUnhealthy: 100%
      # By default, consider a control plane node that has not been Ready
      # for more than 5 mins unhealthy
      unhealthyConditions:
        - type: Ready
          status: Unknown
          timeout: 300s
        - type: Ready
          status: "False"
          timeout: 300s

# Defaults for node groups
# Each of these can be overridden in the specification for an individual node group
nodeGroupDefaults:
  # Indicates if the node group should be autoscaled
  autoscale: false
  # The failure domain for the node group
  failureDomain:
  # The flavor to use for machines in the node group
  machineFlavor:
  # The default networks and ports for worker nodes
  # If neither networks or ports are given, the cluster internal network is used
  # See https://github.com/kubernetes-sigs/cluster-api-provider-openstack/blob/master/docs/book/src/clusteropenstack/configuration.md#network-filters
  machineNetworking:
    networks:
    ports:
  # The root volume spec for machines in the node group
  # If not given, the ephemeral root disk from the flavor is used
  machineRootVolume:
    # # The size of the disk to use
    # diskSize:
    # # The volume type to use
    # # If not specified, the default volume type is used
    # volumeType:
    # # The volume availability zone to use
    # # If not specified, the machine availability zone is used
    # availabilityZone:
  # The time to wait for a node to finish draining before it can be removed
  nodeDrainTimeout: 5m
  # The rollout strategy to use for the node group
  # By default, this is set to do a rolling update within the existing resource envelope
  # of the node group, even if that means the node group temporarily has zero nodes
  rolloutStrategy:
    type: RollingUpdate
    rollingUpdate:
      # The maximum number of node group machines that can be unavailable during the update
      # Can be an absolute number or a percentage of the desired count
      maxUnavailable: 1
      # The maximum number of machines that can be scheduled above the desired count for
      # the group during an update
      # Can be an absolute number or a percentage of the desired count
      maxSurge: 0
      # One of Random, Newest, Oldest
      deletePolicy: Random
  # The default kubeadm config specification for worker nodes
  # This will be merged with any configuration given for specific node groups
  # By default, this uses a simple configuration that enables the external cloud provider
  kubeadmConfigSpec:
    joinConfiguration:
      nodeRegistration:
        name: '{{ local_hostname }}'
        kubeletExtraArgs:
          cloud-provider: external
  # The default machine health check for worker nodes
  # See https://cluster-api.sigs.k8s.io/tasks/healthcheck.html
  # Note that maxUnhealthy or unhealthRange are evaluated per node group
  healthCheck:
    # Indicates if the machine health check should be enabled
    enabled: true
    # The spec for the health check
    spec:
      # By default, unhealthy worker nodes are always remediated
      maxUnhealthy: 100%
      # If a node takes longer than 10 mins to startup, remediate it
      nodeStartupTimeout: 10m
      # By default, consider a worker node that has not been Ready for
      # more than 5 mins unhealthy
      unhealthyConditions:
        - type: Ready
          status: Unknown
          timeout: 300s
        - type: Ready
          status: "False"
          timeout: 300s

# The worker node groups for the cluster
nodeGroups:
  - # The name of the node group
    name: md-0
    # The number of machines in the node group if autoscale is false
    machineCount: 3
    # The minimum and maximum number of machines in the node group if autoscale is true
    # machineCountMin: 3
    # machineCountMax: 3

# Configuration for the cluster autoscaler
autoscaler:
  # The image to use for the autoscaler component
  image:
    repository: k8s.gcr.io/autoscaling/cluster-autoscaler
    pullPolicy: IfNotPresent
    # The tag depends on the Kubernetes version on the target cluster, but can be overridden
    tag:
    # These are the current latest versions for each Kubernetes minor version
    # If a tag is not specified here for the target Kubernetes version, vX.Y.0 is used
    tags:
      "1.25": v1.25.0
      "1.24": v1.24.0
      "1.23": v1.23.1
      "1.22": v1.22.3
  imagePullSecrets: []
  # Pod-level security context
  podSecurityContext:
    runAsNonRoot: true
    runAsUser: 1001
  # Container-level security context
  securityContext:
    allowPrivilegeEscalation: false
    capabilities:
      drop: [ALL]
    readOnlyRootFilesystem: true
  # Resource requests and limits for pods
  resources: {}
  # Node selector for pods
  nodeSelector: {}
  # Tolerations for pods
  tolerations: []
  # Affinity rules for pods
  affinity: {}

# Configuration for cluster addons
addons:
  # Indicates if cluster addons should be deployed
  enabled: true

  # The label to use to indicate that a configmap or secret should be watched
  watchLabel: addons.stackhpc.com/watch

  # Settings for hook jobs
  hooks:
    image:
      repository: ghcr.io/stackhpc/k8s-utils
      tag:  # Defaults to chart appVersion if not given
      pullPolicy: IfNotPresent
    imagePullSecrets: []
    backoffLimit: 1000
    activeDeadlineSeconds: 3600
    podSecurityContext:
      runAsNonRoot: true
    securityContext:
      allowPrivilegeEscalation: false
      capabilities:
        drop: [ALL]
      readOnlyRootFilesystem: true
    resources: {}
    hostNetwork: false
    tolerations: []
    nodeSelector: {}
    affinity: {}

  # Settings for the CNI addon
  cni:
    # Indicates if a CNI should be deployed
    enabled: true
    # The CNI to deploy - supported values are calico or cilium
    type: calico
    # Settings for the calico CNI
    # See https://projectcalico.docs.tigera.io/getting-started/kubernetes/helm
    calico:
      chart:
        repo: https://projectcalico.docs.tigera.io/charts
        name: tigera-operator
        version: v3.23.3
      release:
        namespace: tigera-operator
        values: {}
    # Settings for the Cilium CNI
    # See https://docs.cilium.io/en/stable/gettingstarted/k8s-install-helm/ for details
    cilium:
      chart:
        repo: https://helm.cilium.io/
        name: cilium
        version: 1.11.1
      release:
        namespace: kube-system
        values: {}

  # Settings for the OpenStack integrations
  openstack:
    # Indicates if the OpenStack integrations should be enabled
    enabled: true
    # The target namespace for the OpenStack integrations
    targetNamespace: openstack-system
    # cloud-config options for the OpenStack integrations
    # The [Global] section is configured to use the target cloud
    # See https://github.com/kubernetes/cloud-provider-openstack/blob/master/docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md#config-openstack-cloud-controller-manager
    # and https://github.com/kubernetes/cloud-provider-openstack/blob/master/docs/cinder-csi-plugin/using-cinder-csi-plugin.md#block-storage
    cloudConfig:
      # By default, ignore volume AZs for Cinder as most clouds have a single globally-attachable Cinder AZ
      BlockStorage:
        ignore-volume-az: true
    # Settings for the Cloud Controller Manager (CCM)
    ccm:
      # Indicates if the OpenStack CCM should be enabled
      # By default, the CCM is enabled if the OpenStack integrations are enabled
      # See https://github.com/kubernetes/cloud-provider-openstack/blob/master/charts/openstack-cloud-controller-manager/values.yaml
      enabled: true
      chart:
        repo: https://kubernetes.github.io/cloud-provider-openstack
        name: openstack-cloud-controller-manager
        version: 1.3.0
      values: {}
    # Settings for the Cinder CSI plugin
    csiCinder:
      # Indicates if the Cinder CSI should be enabled
      # By default, it is enabled if the OpenStack integrations are enabled
      # See https://github.com/kubernetes/cloud-provider-openstack/blob/master/charts/cinder-csi-plugin/values.yaml
      enabled: true
      chart:
        repo: https://kubernetes.github.io/cloud-provider-openstack
        name: openstack-cinder-csi
        version: 2.2.0
      values: {}
      # Variables affecting the definition of the storage class
      storageClass:
        # Indicates if the storage class should be enabled
        enabled: true
        # The name of the storage class
        name: csi-cinder
        # Indicates if the storage class should be annotated as the default storage class
        isDefault: true
        # The reclaim policy for the storage class
        reclaimPolicy: Delete
        # Indicates if volume expansion is allowed
        allowVolumeExpansion: true
        # The Cinder availability zone to use for volumes provisioned by the storage class
        availabilityZone: nova
        # The Cinder volume type to use for volumes provisioned by the storage class
        # If not given, the default volume type will be used
        volumeType:
        # The allowed topologies for the storage class
        allowedTopologies:

  # Settings for the metrics server
  # https://github.com/kubernetes-sigs/metrics-server#helm-chart
  metricsServer:
    # Indicates if the metrics server should be deployed
    enabled: true
    chart:
      repo: https://kubernetes-sigs.github.io/metrics-server
      name: metrics-server
      version: 3.8.2
    release:
      namespace: kube-system
      values: {}

  # Settings for the Kubernetes dashboard
  # https://github.com/kubernetes/dashboard/tree/master/charts/helm-chart/kubernetes-dashboard
  kubernetesDashboard:
    # Indicates if the Kubernetes dashboard should be enabled
    enabled: false
    chart:
      repo: https://kubernetes.github.io/dashboard
      name: kubernetes-dashboard
      version: 5.10.0
    release:
      namespace: kubernetes-dashboard
      values: {}

  # Settings for ingress controllers
  ingress:
    # Settings for the Nginx ingress controller
    # https://github.com/kubernetes/ingress-nginx/tree/main/charts/ingress-nginx#configuration
    nginx:
      # Indicates if the Nginx ingress controller should be enabled
      enabled: false
      chart:
        repo: https://kubernetes.github.io/ingress-nginx
        name: ingress-nginx
        version: 4.2.5
      release:
        namespace: ingress-nginx
        values: {}

  # Settings for cluster monitoring
  monitoring:
    # Indicates if the cluster monitoring should be enabled
    enabled: false
    kubePrometheusStack:
      chart:
        repo: https://prometheus-community.github.io/helm-charts
        name: kube-prometheus-stack
        version: 40.1.0
      release:
        namespace: monitoring-system
        values: {}
    lokiStack:
      enabled: true
      chart:
        repo: https://grafana.github.io/helm-charts
        name: loki-stack
        version: 2.8.2
      release:
        namespace: monitoring-system
        values: {}

  # Settings for node feature discovery
  # https://github.com/kubernetes-sigs/node-feature-discovery/tree/master/deployment/helm/node-feature-discovery
  nodeFeatureDiscovery:
    # Indicates if node feature discovery should be enabled
    enabled: true
    chart:
      repo: https://kubernetes-sigs.github.io/node-feature-discovery/charts
      name: node-feature-discovery
      version: 0.11.2
    release:
      namespace: node-feature-discovery
      values: {}

  # Settings for the NVIDIA GPU operator
  nvidiaGPUOperator:
    # Indicates if the NVIDIA GPU operator should be enabled
    # Note that because it uses node feature discovery to run only on nodes
    # with an NVIDIA GPU available, the overhead of enabling this on clusters
    # that do not need it now but may need it in the future is low
    enabled: true
    chart:
      repo: https://nvidia.github.io/gpu-operator
      name: gpu-operator
      version: v1.11.1
    release:
      namespace: gpu-operator
      values: {}

  # Settings for the Mellanox network operator
  mellanoxNetworkOperator:
    # Indicates if the network operator should be enabled
    # Note that because it uses node feature discovery to run only on nodes
    # with a Mellanox NIC available, the overhead of enabling this on clusters
    # that do not need it now but may need it in the future is low
    enabled: true
    chart:
      repo: https://mellanox.github.io/network-operator
      name: network-operator
      version: 1.3.0
    release:
      namespace: network-operator
      values: {}