--- # The name of an existing secret containing a clouds.yaml and optional cacert cloudCredentialsSecretName: # OR # Content for the clouds.yaml file # Having this as a top-level item allows a clouds.yaml file from OpenStack to be used as a values file clouds: # The PEM-encoded CA certificate for the specified cloud cloudCACert: # The name of the cloud to use from the specified clouds.yaml cloudName: openstack # The Kubernetes version of the cluster # This should match the version of kubelet and kubeadm in the image kubernetesVersion: # The name of the image to use for cluster machines machineImage: # OR # The ID of the image to use for cluster machines machineImageId: # The name of the SSH key to inject into cluster machines machineSSHKeyName: # The prefix used for project labels and annotations projectPrefix: capi.stackhpc.com # Any extra annotations to add to the cluster clusterAnnotations: {} # Values for the Kubernetes cluster network kubeNetwork: # By default, use the private network range 172.16.0.0/12 for the cluster network # We split it into two equally-sized blocks for pods and services # This gives ~500,000 addresses in each block pods: cidrBlocks: - 172.16.0.0/13 services: cidrBlocks: - 172.24.0.0/13 serviceDomain: cluster.local # Settings for the OpenStack networking for the cluster clusterNetworking: # Custom nameservers to use for the hosts dnsNameservers: # Indicates if security groups should be managed by the cluster manageSecurityGroups: true # Indicates if the managed security groups should allow all in-cluster traffic # The default CNI installed by the addons is Cilium, so this is true by default allowAllInClusterTraffic: true # The ID of the external network to use # If not given, the external network will be detected externalNetworkId: # Details of the internal network to use internalNetwork: # Filter to find an existing network for the cluster internal network # see Cluster API documentation for details networkFilter: # id: e63ca1a0-f69d-4fbf-b306-310857b1afe5 # name: tenant-internal-net # Filter to find an existing subnet for the cluster internal network # See Cluster API documentation for details subnetFilter: # The CIDR to use if creating a cluster network # This is only used if neither of networkFilter and subnetFilter are given nodeCidr: 192.168.3.0/24 # Settings for registry mirrors # When a mirror is set, it will be tried for images but will fall back to the # upstream registry if the image pull fails # By default, use images mirrored to the StackHPC GitHub packages when possible registryMirrors: # docker.io: # upstream: https://registry-1.docker.io # mirrors: # - url: https://registry.my.domain/v2/dockerhub-public # capabilities: ["pull", "resolve"] docker.io: - https://quay.io/v2/azimuth/docker.io ghcr.io: - https://quay.io/v2/azimuth/ghcr.io nvcr.io: - https://quay.io/v2/azimuth/nvcr.io quay.io: - https://quay.io/v2/azimuth/quay.io registry.k8s.io: - https://quay.io/v2/azimuth/registry.k8s.io # A map of trusted CAs to add to the system trust on cluster nodes trustedCAs: {} # custom-ca: | # -----BEGIN CERTIFICATE----- # ...certificate data... # -----END CERTIFICATE----- # List of additional packages to install on cluster nodes additionalPackages: [] # - nfs-common # Settings for etcd etcd: # The data directory to use for etcd # When a block device is specified, it is mounted at the parent directory, e.g. /var/lib/etcd # This is to avoid etcd complaining about the lost+found directory dataDir: /var/lib/etcd # Any extra command line arguments to pass to etcd extraArgs: # Set timeouts so that etcd tolerates 'slowness' (network + disks) better # This is at the expense of taking longer to detect a leader failure # https://etcd.io/docs/v3.5/tuning/#time-parameters heartbeat-interval: "500" # defaults to 100ms in etcd 3.5 election-timeout: "5000" # defaults to 1000ms in etcd 3.5 # Set a slightly larger space quota than the default (default is 2GB) quota-backend-bytes: "4294967296" # Listen for metrics on 0.0.0.0 so Prometheus can collect them listen-metrics-urls: http://0.0.0.0:2381 # The block device configuration for etcd # If not specified, the root device is used blockDevice: # # The size of the block device # size: 20 # # The type of the block device # # If set to "Volume", which is the default, then a Cinder volume is used to back the block device # # If set to "Local", local ephemeral storage is used to back the block device # type: Volume # # The volume type to use # # If not specified, the default volume type is used # volumeType: # # The volume availability zone to use # # If not specified, the machine availability zone is used # availabilityZone: # Settings for the Kubernetes API server apiServer: # Indicates whether to deploy a load balancer for the API server enableLoadBalancer: true # Indicates what loadbalancer provider to use. Default is amphora loadBalancerProvider: # Restrict loadbalancer access to select IPs # allowedCidrs # - 192.168.0.0/16 # needed for cluster to init # - 10.10.0.0/16 # IPv4 Internal Network # - 123.123.123.123 # some other IPs # Indicates whether to associate a floating IP with the API server associateFloatingIP: true # The specific floating IP to associate with the API server # If not given, a new IP will be allocated if required floatingIP: # The specific fixed IP to associate with the API server # If enableLoadBalancer is true, this will become the VIP of the load balancer # If enableLoadBalancer and associateFloatingIP are both false, this should be # the IP of a pre-allocated port to be used as the VIP fixedIP: # The port to use for the API server port: 6443 # Set osDistro used. ubuntu, flatcar, etc. osDistro: ubuntu # # API server authentication/authorization webhook. Set this to # integrate into KubeadmControlPlane and KubeadmConfigTemplate # possible values: k8s-keystone-auth # authWebhook: k8s-keystone-auth # Settings for the control plane controlPlane: # The failure domains to use for control plane nodes # If given, should be a list of availability zones # Only used when omitFailureDomain = false failureDomains: # Indicates whether the failure domain should be omitted from control plane nodes omitFailureDomain: true # The number of control plane machines to deploy # For high-availability, this should be greater than 1 # For etcd quorum, it should be odd - usually 3, or 5 for very large clusters machineCount: 3 # The kubernetes version for the control plane kubernetesVersion: # The image to use for control plane machineImage: # The ID of the image to use for the control plane machineImageId: # The flavor to use for control plane machines machineFlavor: # The ports for control plane nodes # If no ports are given, the cluster internal network is used # See https://github.com/kubernetes-sigs/cluster-api-provider-openstack/blob/master/docs/book/src/clusteropenstack/configuration.md#network-filters machineNetworking: ports: # The root volume spec for control plane machines machineRootVolume: # The size of the disk to use # If not given, the ephemeral root disk from the flavor is used diskSize: # The volume type to use # If not specified, the default volume type is used # volumeType: # The volume availability zone to use # If not specified, the machine availability zone is used # availabilityZone: # The ID of the server group to use for control plane machines serverGroupId: # Labels to apply to the node objects in Kubernetes that correspond to control plane machines nodeLabels: # my.company.org/label: value # Additional block devices for control plane machines additionalBlockDevices: {} # # The key is the name for the block device in the context of the machine # # It is also used to tag the block device, so that the volume can be identified in instance metadata # scratch: # # The size of the block device # size: 20 # # The type of the block device # # If set to "Volume", which is the default, then a Cinder volume is used to back the block device # # If set to "Local", local ephemeral storage is used to back the block device # # In both cases, the lifecycle of the device is the same as the machine # type: Volume # # The volume type to use # # If not specified, the default volume type is used # volumeType: # # The volume availability zone to use # # If not specified, the machine availability zone is used # availabilityZone: # Indicates whether control plane machines should use config drive or not machineConfigDrive: false # The time to wait for a node to finish draining before it can be removed nodeDrainTimeout: 5m0s # The time to wait for a node to detach all volumes before it can be removed nodeVolumeDetachTimeout: 5m0s # The time to wait for the node resource to be deleted in Kubernetes when a # machine is marked for deletion nodeDeletionTimeout: 5m0s # The remediation strategy for the control plane nodes # We set these so that we don't keep remediating an unhealthy control plane forever remediationStrategy: # The maximum number of times that a remediation will be retried maxRetry: 3 # The amount of time that a node created as a remediation has to become healthy # before the remediation is retried retryPeriod: 20m # The length of time that a node must be healthy before any future problems are # considered unrelated to the previous ones (i.e. the retry count is reset) minHealthyPeriod: 1h # The rollout strategy to use for the control plane nodes # By default, the strategy allows the control plane to begin provisioning new nodes # without first tearing down old ones rolloutStrategy: type: RollingUpdate rollingUpdate: # For the control plane, this can only be 0 or 1 maxSurge: 1 # The kubeadm config specification for the control plane # By default, this uses a simple configuration that enables the external cloud provider kubeadmConfigSpec: initConfiguration: nodeRegistration: name: '{{ local_hostname }}' kubeletExtraArgs: cloud-provider: external # As well as enabling an external cloud provider, we set the bind addresses for the # controller-manager, scheduler and kube-proxy to 0.0.0.0 so that Prometheus can reach # them to collect metrics clusterConfiguration: apiServer: extraArgs: cloud-provider: external controllerManager: extraArgs: cloud-provider: external bind-address: 0.0.0.0 scheduler: extraArgs: bind-address: 0.0.0.0 joinConfiguration: nodeRegistration: name: '{{ local_hostname }}' kubeletExtraArgs: cloud-provider: external kubeProxyConfiguration: metricsBindAddress: 0.0.0.0:10249 # The machine health check for auto-healing of the control plane # See https://cluster-api.sigs.k8s.io/tasks/healthcheck.html healthCheck: # Indicates if the machine health check should be enabled enabled: true # The spec for the health check spec: # By default, don't remediate control plane nodes when more than one is unhealthy maxUnhealthy: 1 # If a node takes longer than 30 mins to startup, remediate it nodeStartupTimeout: 30m0s # By default, consider a control plane node that has not been Ready # for more than 5 mins unhealthy unhealthyConditions: - type: Ready status: Unknown timeout: 5m0s - type: Ready status: "False" timeout: 5m0s # Defaults for node groups # Each of these can be overridden in the specification for an individual node group nodeGroupDefaults: # Indicates if the node group should be autoscaled autoscale: false # The failure domain for the node group failureDomain: # The flavor to use for machines in the node group machineFlavor: # Default image id for nodeGroup hosts machineImage: # The ID of the image to use for nodeGroup machines machineImageId: # Kubernetes version for nodeGroup machines kubernetesVersion: # The default networks and ports for worker nodes # If neither networks or ports are given, the cluster internal network is used # The default ports for worker nodes # If no ports are given, the cluster internal network is used # See https://github.com/kubernetes-sigs/cluster-api-provider-openstack/blob/master/docs/book/src/clusteropenstack/configuration.md#network-filters machineNetworking: ports: # The root volume spec for machines in the node group machineRootVolume: # The size of the disk to use # If not given, the ephemeral root disk from the flavor is used diskSize: # The volume type to use # If not specified, the default volume type is used # volumeType: # The volume availability zone to use # If not specified, the machine availability zone is used # availabilityZone: # The ID of the server group to use for machines in the node group serverGroupId: # Labels to apply to the node objects in Kubernetes that correspond to machines in the node group # By default, nodes get the label "capi.stackhpc.com/node-group=" nodeLabels: # my.company.org/label: value # Additional block devices for node groups machines # Options are the same as for controlPlane.additionalBlockDevices above additionalBlockDevices: {} # Indicates whether control plane machines should use config drive or not machineConfigDrive: false # The time to wait for a node to finish draining before it can be removed nodeDrainTimeout: 5m0s # The time to wait for a node to detach all volumes before it can be removed nodeVolumeDetachTimeout: 5m0s # The time to wait for the node resource to be deleted in Kubernetes when a # machine is marked for deletion nodeDeletionTimeout: 5m0s # The rollout strategy to use for the node group # By default, this is set to do a rolling update within the existing resource envelope # of the node group, even if that means the node group temporarily has zero nodes rolloutStrategy: type: RollingUpdate rollingUpdate: # The maximum number of node group machines that can be unavailable during the update # Can be an absolute number or a percentage of the desired count maxUnavailable: 1 # The maximum number of machines that can be scheduled above the desired count for # the group during an update # Can be an absolute number or a percentage of the desired count maxSurge: 0 # One of Random, Newest, Oldest deletePolicy: Random # The default kubeadm config specification for worker nodes # This will be merged with any configuration given for specific node groups # By default, this uses a simple configuration that enables the external cloud provider kubeadmConfigSpec: joinConfiguration: nodeRegistration: name: '{{ local_hostname }}' kubeletExtraArgs: cloud-provider: external # The default machine health check for worker nodes # See https://cluster-api.sigs.k8s.io/tasks/healthcheck.html # Note that maxUnhealthy or unhealthRange are evaluated per node group healthCheck: # Indicates if the machine health check should be enabled enabled: true # The spec for the health check spec: # By default, remediate unhealthy workers as long as they are less than 40% of # the total number of workers in the node group maxUnhealthy: 40% # If a node takes longer than 30 mins to startup, remediate it nodeStartupTimeout: 30m0s # By default, consider a worker node that has not been Ready for # more than 5 mins unhealthy unhealthyConditions: - type: Ready status: Unknown timeout: 5m0s - type: Ready status: "False" timeout: 5m0s # The worker node groups for the cluster nodeGroups: - # The name of the node group name: md-0 # The number of machines in the node group if autoscale is false machineCount: 3 # The minimum and maximum number of machines in the node group if autoscale is true # machineCountMin: 3 # machineCountMax: 3 # Configuration for the cluster autoscaler autoscaler: # The image to use for the autoscaler component image: repository: registry.k8s.io/autoscaling/cluster-autoscaler pullPolicy: IfNotPresent tag: v1.29.0 imagePullSecrets: [] # Any extra args for the autoscaler extraArgs: # Make sure logs go to stderr logtostderr: true stderrthreshold: info # Output at a decent log level v: 4 # Cordon nodes before terminating them so new pods are not scheduled there cordon-node-before-terminating: "true" # When scaling up, choose the node group that will result in the least idle CPU after expander: least-waste,random # Allow pods in kube-system to prevent a node from being deleted skip-nodes-with-system-pods: "true" # Allow pods with emptyDirs to be evicted skip-nodes-with-local-storage: "false" # Allow pods with custom controllers to be evicted skip-nodes-with-custom-controller-pods: "false" # Pod-level security context podSecurityContext: runAsNonRoot: true runAsUser: 1001 # Container-level security context securityContext: allowPrivilegeEscalation: false capabilities: drop: [ALL] readOnlyRootFilesystem: true # Resource requests and limits for pods resources: {} # Node selector for pods nodeSelector: {} # Tolerations for pods tolerations: [] # Topology spread constraints for pods topologySpreadConstraints: [] # Affinity rules for pods affinity: {} # Configuration for cluster addons addons: # Indicates if cluster addons should be deployed enabled: true # Enable the openstack integrations by default openstack: enabled: true