536 lines
19 KiB
YAML
536 lines
19 KiB
YAML
# Configuration that is shared between the cluster and the addons
|
||
global:
|
||
# The Kubernetes version of the cluster
|
||
# This should match the version of kubelet and kubeadm in the image
|
||
kubernetesVersion:
|
||
|
||
# The name of an existing secret containing a clouds.yaml and optional cacert
|
||
cloudCredentialsSecretName:
|
||
# OR
|
||
# Content for the clouds.yaml file
|
||
# Having this as a top-level item allows a clouds.yaml file from OpenStack to be used as a values file
|
||
clouds:
|
||
# The PEM-encoded CA certificate for the specified cloud
|
||
cloudCACert:
|
||
|
||
# The name of the cloud to use from the specified clouds.yaml
|
||
cloudName: openstack
|
||
|
||
# The name of the image to use for cluster machines
|
||
# This is used when creating machines using ephemeral root disks
|
||
machineImage:
|
||
# The ID of the image to use for cluster machines
|
||
# This is required when creating machines with volumes as root disks
|
||
machineImageId:
|
||
|
||
# The name of the SSH key to inject into cluster machines
|
||
machineSSHKeyName:
|
||
|
||
# Values for the Kubernetes cluster network
|
||
kubeNetwork:
|
||
# By default, use the private network range 172.16.0.0/12 for the cluster network
|
||
# We split it into two equally-sized blocks for pods and services
|
||
# This gives ~500,000 addresses in each block
|
||
pods:
|
||
cidrBlocks:
|
||
- 172.16.0.0/13
|
||
services:
|
||
cidrBlocks:
|
||
- 172.24.0.0/13
|
||
serviceDomain: cluster.local
|
||
|
||
# Settings for the OpenStack networking for the cluster
|
||
clusterNetworking:
|
||
# Custom nameservers to use for the hosts
|
||
dnsNameservers:
|
||
# Indicates if security groups should be managed by the cluster
|
||
manageSecurityGroups: true
|
||
# Indicates if the managed security groups should allow all in-cluster traffic
|
||
# The default CNI installed by the addons is Cilium, so this is true by default
|
||
allowAllInClusterTraffic: true
|
||
# The ID of the external network to use
|
||
# If not given, the external network will be detected
|
||
externalNetworkId:
|
||
# Details of the internal network to use
|
||
internalNetwork:
|
||
# Filter to find an existing network for the cluster internal network
|
||
# See Cluster API documentation for details
|
||
networkFilter:
|
||
# id: e63ca1a0-f69d-4fbf-b306-310857b1afe5
|
||
# name: tenant-internal-net
|
||
# Filter to find an existing subnet for the cluster internal network
|
||
# See Cluster API documentation for details
|
||
subnetFilter:
|
||
# The CIDR to use if creating a cluster network
|
||
# This is only used if neither of networkFilter and subnetFilter are given
|
||
nodeCidr: 192.168.3.0/24
|
||
|
||
# Settings for registry mirrors
|
||
registryMirrors: {}
|
||
# docker.io: ["https://registry.my.domain/v2/dockerhub-public"]
|
||
|
||
# Settings for node-level registry auth
|
||
registryAuth: {}
|
||
# registry-1.docker.io:
|
||
# username: "<username>"
|
||
# password: "<password>"
|
||
|
||
# A map of trusted CAs to add to the system trust on cluster nodes
|
||
trustedCAs: {}
|
||
# custom-ca: |
|
||
# -----BEGIN CERTIFICATE-----
|
||
# ...certificate data...
|
||
# -----END CERTIFICATE-----
|
||
|
||
# List of additional packages to install on cluster nodes
|
||
additionalPackages: []
|
||
# - nfs-common
|
||
|
||
# Settings for the Kubernetes API server
|
||
apiServer:
|
||
# Indicates whether to deploy a load balancer for the API server
|
||
enableLoadBalancer: true
|
||
# Indicates whether to associate a floating IP with the API server
|
||
associateFloatingIP: true
|
||
# The specific floating IP to associate with the API server
|
||
# If not given, a new IP will be allocated if required
|
||
floatingIP:
|
||
# The specific fixed IP to associate with the API server
|
||
# If enableLoadBalancer is true, this will become the VIP of the load balancer
|
||
# If enableLoadBalancer and associateFloatingIP are both false, this should be
|
||
# the IP of a pre-allocated port to be used as the VIP
|
||
fixedIP:
|
||
# The port to use for the API server
|
||
port: 6443
|
||
|
||
# Settings for the control plane
|
||
controlPlane:
|
||
# The failure domains to use for control plane nodes
|
||
# If given, should be a list of availability zones
|
||
# Only used when omitFailureDomain = false
|
||
failureDomains:
|
||
# Indicates whether the failure domain should be omitted from control plane nodes
|
||
omitFailureDomain: true
|
||
# The number of control plane machines to deploy
|
||
# For high-availability, this should be greater than 1
|
||
# For etcd quorum, it should be odd - usually 3, or 5 for very large clusters
|
||
machineCount: 3
|
||
# The flavor to use for control plane machines
|
||
machineFlavor:
|
||
# The networks and ports for control plane nodes
|
||
# If neither networks or ports are given, the cluster internal network is used
|
||
# See https://github.com/kubernetes-sigs/cluster-api-provider-openstack/blob/master/docs/book/src/clusteropenstack/configuration.md#network-filters
|
||
machineNetworking:
|
||
networks:
|
||
ports:
|
||
# The root volume spec for control plane machines
|
||
# If not given, the ephemeral root disk from the flavor is used
|
||
machineRootVolume:
|
||
# # The size of the disk to use
|
||
# diskSize:
|
||
# # The volume type to use
|
||
# # If not specified, the default volume type is used
|
||
# volumeType:
|
||
# # The volume availability zone to use
|
||
# # If not specified, the machine availability zone is used
|
||
# availabilityZone:
|
||
# The time to wait for a node to finish draining before it can be removed
|
||
nodeDrainTimeout: 5m
|
||
# The rollout strategy to use for the control plane nodes
|
||
# By default, the strategy allows the control plane to begin provisioning new nodes
|
||
# without first tearing down old ones
|
||
rolloutStrategy:
|
||
type: RollingUpdate
|
||
rollingUpdate:
|
||
# For the control plane, this can only be 0 or 1
|
||
maxSurge: 1
|
||
# The kubeadm config specification for the control plane
|
||
# By default, this uses a simple configuration that enables the external cloud provider
|
||
kubeadmConfigSpec:
|
||
initConfiguration:
|
||
nodeRegistration:
|
||
name: '{{ local_hostname }}'
|
||
kubeletExtraArgs:
|
||
cloud-provider: external
|
||
# As well as enabling an external cloud provider, we set the bind addresses for the
|
||
# etcd metrics, controller-manager, scheduler and kube-proxy to 0.0.0.0 so that Prometheus
|
||
# can reach them to collect metrics
|
||
clusterConfiguration:
|
||
etcd:
|
||
local:
|
||
extraArgs:
|
||
listen-metrics-urls: http://0.0.0.0:2381
|
||
apiServer:
|
||
extraArgs:
|
||
cloud-provider: external
|
||
controllerManager:
|
||
extraArgs:
|
||
cloud-provider: external
|
||
bind-address: 0.0.0.0
|
||
scheduler:
|
||
extraArgs:
|
||
bind-address: 0.0.0.0
|
||
joinConfiguration:
|
||
nodeRegistration:
|
||
name: '{{ local_hostname }}'
|
||
kubeletExtraArgs:
|
||
cloud-provider: external
|
||
kubeProxyConfiguration:
|
||
metricsBindAddress: 0.0.0.0:10249
|
||
# The machine health check for auto-healing of the control plane
|
||
# See https://cluster-api.sigs.k8s.io/tasks/healthcheck.html
|
||
healthCheck:
|
||
# Indicates if the machine health check should be enabled
|
||
enabled: true
|
||
# The spec for the health check
|
||
spec:
|
||
# By default, unhealthy control plane nodes are always remediated
|
||
maxUnhealthy: 100%
|
||
# By default, consider a control plane node that has not been Ready
|
||
# for more than 5 mins unhealthy
|
||
unhealthyConditions:
|
||
- type: Ready
|
||
status: Unknown
|
||
timeout: 300s
|
||
- type: Ready
|
||
status: "False"
|
||
timeout: 300s
|
||
|
||
# Defaults for node groups
|
||
# Each of these can be overridden in the specification for an individual node group
|
||
nodeGroupDefaults:
|
||
# Indicates if the node group should be autoscaled
|
||
autoscale: false
|
||
# The failure domain for the node group
|
||
failureDomain:
|
||
# The flavor to use for machines in the node group
|
||
machineFlavor:
|
||
# The default networks and ports for worker nodes
|
||
# If neither networks or ports are given, the cluster internal network is used
|
||
# See https://github.com/kubernetes-sigs/cluster-api-provider-openstack/blob/master/docs/book/src/clusteropenstack/configuration.md#network-filters
|
||
machineNetworking:
|
||
networks:
|
||
ports:
|
||
# The root volume spec for machines in the node group
|
||
# If not given, the ephemeral root disk from the flavor is used
|
||
machineRootVolume:
|
||
# # The size of the disk to use
|
||
# diskSize:
|
||
# # The volume type to use
|
||
# # If not specified, the default volume type is used
|
||
# volumeType:
|
||
# # The volume availability zone to use
|
||
# # If not specified, the machine availability zone is used
|
||
# availabilityZone:
|
||
# The time to wait for a node to finish draining before it can be removed
|
||
nodeDrainTimeout: 5m
|
||
# The rollout strategy to use for the node group
|
||
# By default, this is set to do a rolling update within the existing resource envelope
|
||
# of the node group, even if that means the node group temporarily has zero nodes
|
||
rolloutStrategy:
|
||
type: RollingUpdate
|
||
rollingUpdate:
|
||
# The maximum number of node group machines that can be unavailable during the update
|
||
# Can be an absolute number or a percentage of the desired count
|
||
maxUnavailable: 1
|
||
# The maximum number of machines that can be scheduled above the desired count for
|
||
# the group during an update
|
||
# Can be an absolute number or a percentage of the desired count
|
||
maxSurge: 0
|
||
# One of Random, Newest, Oldest
|
||
deletePolicy: Random
|
||
# The default kubeadm config specification for worker nodes
|
||
# This will be merged with any configuration given for specific node groups
|
||
# By default, this uses a simple configuration that enables the external cloud provider
|
||
kubeadmConfigSpec:
|
||
joinConfiguration:
|
||
nodeRegistration:
|
||
name: '{{ local_hostname }}'
|
||
kubeletExtraArgs:
|
||
cloud-provider: external
|
||
# The default machine health check for worker nodes
|
||
# See https://cluster-api.sigs.k8s.io/tasks/healthcheck.html
|
||
# Note that maxUnhealthy or unhealthRange are evaluated per node group
|
||
healthCheck:
|
||
# Indicates if the machine health check should be enabled
|
||
enabled: true
|
||
# The spec for the health check
|
||
spec:
|
||
# By default, unhealthy worker nodes are always remediated
|
||
maxUnhealthy: 100%
|
||
# If a node takes longer than 10 mins to startup, remediate it
|
||
nodeStartupTimeout: 10m
|
||
# By default, consider a worker node that has not been Ready for
|
||
# more than 5 mins unhealthy
|
||
unhealthyConditions:
|
||
- type: Ready
|
||
status: Unknown
|
||
timeout: 300s
|
||
- type: Ready
|
||
status: "False"
|
||
timeout: 300s
|
||
|
||
# The worker node groups for the cluster
|
||
nodeGroups:
|
||
- # The name of the node group
|
||
name: md-0
|
||
# The number of machines in the node group if autoscale is false
|
||
machineCount: 3
|
||
# The minimum and maximum number of machines in the node group if autoscale is true
|
||
# machineCountMin: 3
|
||
# machineCountMax: 3
|
||
|
||
# Configuration for the cluster autoscaler
|
||
autoscaler:
|
||
# The image to use for the autoscaler component
|
||
image:
|
||
repository: k8s.gcr.io/autoscaling/cluster-autoscaler
|
||
pullPolicy: IfNotPresent
|
||
# The tag depends on the Kubernetes version on the target cluster, but can be overridden
|
||
tag:
|
||
# These are the current latest versions for each Kubernetes minor version
|
||
# If a tag is not specified here for the target Kubernetes version, vX.Y.0 is used
|
||
tags:
|
||
"1.25": v1.25.0
|
||
"1.24": v1.24.0
|
||
"1.23": v1.23.1
|
||
"1.22": v1.22.3
|
||
imagePullSecrets: []
|
||
# Pod-level security context
|
||
podSecurityContext:
|
||
runAsNonRoot: true
|
||
runAsUser: 1001
|
||
# Container-level security context
|
||
securityContext:
|
||
allowPrivilegeEscalation: false
|
||
capabilities:
|
||
drop: [ALL]
|
||
readOnlyRootFilesystem: true
|
||
# Resource requests and limits for pods
|
||
resources: {}
|
||
# Node selector for pods
|
||
nodeSelector: {}
|
||
# Tolerations for pods
|
||
tolerations: []
|
||
# Affinity rules for pods
|
||
affinity: {}
|
||
|
||
# Configuration for cluster addons
|
||
addons:
|
||
# Indicates if cluster addons should be deployed
|
||
enabled: true
|
||
|
||
# The label to use to indicate that a configmap or secret should be watched
|
||
watchLabel: addons.stackhpc.com/watch
|
||
|
||
# Settings for hook jobs
|
||
hooks:
|
||
image:
|
||
repository: ghcr.io/stackhpc/k8s-utils
|
||
tag: # Defaults to chart appVersion if not given
|
||
pullPolicy: IfNotPresent
|
||
imagePullSecrets: []
|
||
backoffLimit: 1000
|
||
activeDeadlineSeconds: 3600
|
||
podSecurityContext:
|
||
runAsNonRoot: true
|
||
securityContext:
|
||
allowPrivilegeEscalation: false
|
||
capabilities:
|
||
drop: [ALL]
|
||
readOnlyRootFilesystem: true
|
||
resources: {}
|
||
hostNetwork: false
|
||
tolerations: []
|
||
nodeSelector: {}
|
||
affinity: {}
|
||
|
||
# Settings for the CNI addon
|
||
cni:
|
||
# Indicates if a CNI should be deployed
|
||
enabled: true
|
||
# The CNI to deploy - supported values are calico or cilium
|
||
type: calico
|
||
# Settings for the calico CNI
|
||
# See https://projectcalico.docs.tigera.io/getting-started/kubernetes/helm
|
||
calico:
|
||
chart:
|
||
repo: https://projectcalico.docs.tigera.io/charts
|
||
name: tigera-operator
|
||
version: v3.23.3
|
||
release:
|
||
namespace: tigera-operator
|
||
values: {}
|
||
# Settings for the Cilium CNI
|
||
# See https://docs.cilium.io/en/stable/gettingstarted/k8s-install-helm/ for details
|
||
cilium:
|
||
chart:
|
||
repo: https://helm.cilium.io/
|
||
name: cilium
|
||
version: 1.11.1
|
||
release:
|
||
namespace: kube-system
|
||
values: {}
|
||
|
||
# Settings for the OpenStack integrations
|
||
openstack:
|
||
# Indicates if the OpenStack integrations should be enabled
|
||
enabled: true
|
||
# The target namespace for the OpenStack integrations
|
||
targetNamespace: openstack-system
|
||
# cloud-config options for the OpenStack integrations
|
||
# The [Global] section is configured to use the target cloud
|
||
# See https://github.com/kubernetes/cloud-provider-openstack/blob/master/docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md#config-openstack-cloud-controller-manager
|
||
# and https://github.com/kubernetes/cloud-provider-openstack/blob/master/docs/cinder-csi-plugin/using-cinder-csi-plugin.md#block-storage
|
||
cloudConfig:
|
||
# By default, ignore volume AZs for Cinder as most clouds have a single globally-attachable Cinder AZ
|
||
BlockStorage:
|
||
ignore-volume-az: true
|
||
# Settings for the Cloud Controller Manager (CCM)
|
||
ccm:
|
||
# Indicates if the OpenStack CCM should be enabled
|
||
# By default, the CCM is enabled if the OpenStack integrations are enabled
|
||
# See https://github.com/kubernetes/cloud-provider-openstack/blob/master/charts/openstack-cloud-controller-manager/values.yaml
|
||
enabled: true
|
||
chart:
|
||
repo: https://kubernetes.github.io/cloud-provider-openstack
|
||
name: openstack-cloud-controller-manager
|
||
version: 1.3.0
|
||
values: {}
|
||
# Settings for the Cinder CSI plugin
|
||
csiCinder:
|
||
# Indicates if the Cinder CSI should be enabled
|
||
# By default, it is enabled if the OpenStack integrations are enabled
|
||
# See https://github.com/kubernetes/cloud-provider-openstack/blob/master/charts/cinder-csi-plugin/values.yaml
|
||
enabled: true
|
||
chart:
|
||
repo: https://kubernetes.github.io/cloud-provider-openstack
|
||
name: openstack-cinder-csi
|
||
version: 2.2.0
|
||
values: {}
|
||
# Variables affecting the definition of the storage class
|
||
storageClass:
|
||
# Indicates if the storage class should be enabled
|
||
enabled: true
|
||
# The name of the storage class
|
||
name: csi-cinder
|
||
# Indicates if the storage class should be annotated as the default storage class
|
||
isDefault: true
|
||
# The reclaim policy for the storage class
|
||
reclaimPolicy: Delete
|
||
# Indicates if volume expansion is allowed
|
||
allowVolumeExpansion: true
|
||
# The Cinder availability zone to use for volumes provisioned by the storage class
|
||
availabilityZone: nova
|
||
# The Cinder volume type to use for volumes provisioned by the storage class
|
||
# If not given, the default volume type will be used
|
||
volumeType:
|
||
# The allowed topologies for the storage class
|
||
allowedTopologies:
|
||
|
||
# Settings for the metrics server
|
||
# https://github.com/kubernetes-sigs/metrics-server#helm-chart
|
||
metricsServer:
|
||
# Indicates if the metrics server should be deployed
|
||
enabled: true
|
||
chart:
|
||
repo: https://kubernetes-sigs.github.io/metrics-server
|
||
name: metrics-server
|
||
version: 3.8.2
|
||
release:
|
||
namespace: kube-system
|
||
values: {}
|
||
|
||
# Settings for the Kubernetes dashboard
|
||
# https://github.com/kubernetes/dashboard/tree/master/charts/helm-chart/kubernetes-dashboard
|
||
kubernetesDashboard:
|
||
# Indicates if the Kubernetes dashboard should be enabled
|
||
enabled: false
|
||
chart:
|
||
repo: https://kubernetes.github.io/dashboard
|
||
name: kubernetes-dashboard
|
||
version: 5.10.0
|
||
release:
|
||
namespace: kubernetes-dashboard
|
||
values: {}
|
||
|
||
# Settings for ingress controllers
|
||
ingress:
|
||
# Settings for the Nginx ingress controller
|
||
# https://github.com/kubernetes/ingress-nginx/tree/main/charts/ingress-nginx#configuration
|
||
nginx:
|
||
# Indicates if the Nginx ingress controller should be enabled
|
||
enabled: false
|
||
chart:
|
||
repo: https://kubernetes.github.io/ingress-nginx
|
||
name: ingress-nginx
|
||
version: 4.2.5
|
||
release:
|
||
namespace: ingress-nginx
|
||
values: {}
|
||
|
||
# Settings for cluster monitoring
|
||
monitoring:
|
||
# Indicates if the cluster monitoring should be enabled
|
||
enabled: false
|
||
kubePrometheusStack:
|
||
chart:
|
||
repo: https://prometheus-community.github.io/helm-charts
|
||
name: kube-prometheus-stack
|
||
version: 40.1.0
|
||
release:
|
||
namespace: monitoring-system
|
||
values: {}
|
||
lokiStack:
|
||
enabled: true
|
||
chart:
|
||
repo: https://grafana.github.io/helm-charts
|
||
name: loki-stack
|
||
version: 2.8.2
|
||
release:
|
||
namespace: monitoring-system
|
||
values: {}
|
||
|
||
# Settings for node feature discovery
|
||
# https://github.com/kubernetes-sigs/node-feature-discovery/tree/master/deployment/helm/node-feature-discovery
|
||
nodeFeatureDiscovery:
|
||
# Indicates if node feature discovery should be enabled
|
||
enabled: true
|
||
chart:
|
||
repo: https://kubernetes-sigs.github.io/node-feature-discovery/charts
|
||
name: node-feature-discovery
|
||
version: 0.11.2
|
||
release:
|
||
namespace: node-feature-discovery
|
||
values: {}
|
||
|
||
# Settings for the NVIDIA GPU operator
|
||
nvidiaGPUOperator:
|
||
# Indicates if the NVIDIA GPU operator should be enabled
|
||
# Note that because it uses node feature discovery to run only on nodes
|
||
# with an NVIDIA GPU available, the overhead of enabling this on clusters
|
||
# that do not need it now but may need it in the future is low
|
||
enabled: true
|
||
chart:
|
||
repo: https://nvidia.github.io/gpu-operator
|
||
name: gpu-operator
|
||
version: v1.11.1
|
||
release:
|
||
namespace: gpu-operator
|
||
values: {}
|
||
|
||
# Settings for the Mellanox network operator
|
||
mellanoxNetworkOperator:
|
||
# Indicates if the network operator should be enabled
|
||
# Note that because it uses node feature discovery to run only on nodes
|
||
# with a Mellanox NIC available, the overhead of enabling this on clusters
|
||
# that do not need it now but may need it in the future is low
|
||
enabled: true
|
||
chart:
|
||
repo: https://mellanox.github.io/network-operator
|
||
name: network-operator
|
||
version: 1.3.0
|
||
release:
|
||
namespace: network-operator
|
||
values: {}
|