2022-10-12 16:30:17 +01:00

536 lines
19 KiB
YAML
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Configuration that is shared between the cluster and the addons
global:
# The Kubernetes version of the cluster
# This should match the version of kubelet and kubeadm in the image
kubernetesVersion:
# The name of an existing secret containing a clouds.yaml and optional cacert
cloudCredentialsSecretName:
# OR
# Content for the clouds.yaml file
# Having this as a top-level item allows a clouds.yaml file from OpenStack to be used as a values file
clouds:
# The PEM-encoded CA certificate for the specified cloud
cloudCACert:
# The name of the cloud to use from the specified clouds.yaml
cloudName: openstack
# The name of the image to use for cluster machines
# This is used when creating machines using ephemeral root disks
machineImage:
# The ID of the image to use for cluster machines
# This is required when creating machines with volumes as root disks
machineImageId:
# The name of the SSH key to inject into cluster machines
machineSSHKeyName:
# Values for the Kubernetes cluster network
kubeNetwork:
# By default, use the private network range 172.16.0.0/12 for the cluster network
# We split it into two equally-sized blocks for pods and services
# This gives ~500,000 addresses in each block
pods:
cidrBlocks:
- 172.16.0.0/13
services:
cidrBlocks:
- 172.24.0.0/13
serviceDomain: cluster.local
# Settings for the OpenStack networking for the cluster
clusterNetworking:
# Custom nameservers to use for the hosts
dnsNameservers:
# Indicates if security groups should be managed by the cluster
manageSecurityGroups: true
# Indicates if the managed security groups should allow all in-cluster traffic
# The default CNI installed by the addons is Cilium, so this is true by default
allowAllInClusterTraffic: true
# The ID of the external network to use
# If not given, the external network will be detected
externalNetworkId:
# Details of the internal network to use
internalNetwork:
# Filter to find an existing network for the cluster internal network
# See Cluster API documentation for details
networkFilter:
# id: e63ca1a0-f69d-4fbf-b306-310857b1afe5
# name: tenant-internal-net
# Filter to find an existing subnet for the cluster internal network
# See Cluster API documentation for details
subnetFilter:
# The CIDR to use if creating a cluster network
# This is only used if neither of networkFilter and subnetFilter are given
nodeCidr: 192.168.3.0/24
# Settings for registry mirrors
registryMirrors: {}
# docker.io: ["https://registry.my.domain/v2/dockerhub-public"]
# Settings for node-level registry auth
registryAuth: {}
# registry-1.docker.io:
# username: "<username>"
# password: "<password>"
# A map of trusted CAs to add to the system trust on cluster nodes
trustedCAs: {}
# custom-ca: |
# -----BEGIN CERTIFICATE-----
# ...certificate data...
# -----END CERTIFICATE-----
# List of additional packages to install on cluster nodes
additionalPackages: []
# - nfs-common
# Settings for the Kubernetes API server
apiServer:
# Indicates whether to deploy a load balancer for the API server
enableLoadBalancer: true
# Indicates whether to associate a floating IP with the API server
associateFloatingIP: true
# The specific floating IP to associate with the API server
# If not given, a new IP will be allocated if required
floatingIP:
# The specific fixed IP to associate with the API server
# If enableLoadBalancer is true, this will become the VIP of the load balancer
# If enableLoadBalancer and associateFloatingIP are both false, this should be
# the IP of a pre-allocated port to be used as the VIP
fixedIP:
# The port to use for the API server
port: 6443
# Settings for the control plane
controlPlane:
# The failure domains to use for control plane nodes
# If given, should be a list of availability zones
# Only used when omitFailureDomain = false
failureDomains:
# Indicates whether the failure domain should be omitted from control plane nodes
omitFailureDomain: true
# The number of control plane machines to deploy
# For high-availability, this should be greater than 1
# For etcd quorum, it should be odd - usually 3, or 5 for very large clusters
machineCount: 3
# The flavor to use for control plane machines
machineFlavor:
# The networks and ports for control plane nodes
# If neither networks or ports are given, the cluster internal network is used
# See https://github.com/kubernetes-sigs/cluster-api-provider-openstack/blob/master/docs/book/src/clusteropenstack/configuration.md#network-filters
machineNetworking:
networks:
ports:
# The root volume spec for control plane machines
# If not given, the ephemeral root disk from the flavor is used
machineRootVolume:
# # The size of the disk to use
# diskSize:
# # The volume type to use
# # If not specified, the default volume type is used
# volumeType:
# # The volume availability zone to use
# # If not specified, the machine availability zone is used
# availabilityZone:
# The time to wait for a node to finish draining before it can be removed
nodeDrainTimeout: 5m
# The rollout strategy to use for the control plane nodes
# By default, the strategy allows the control plane to begin provisioning new nodes
# without first tearing down old ones
rolloutStrategy:
type: RollingUpdate
rollingUpdate:
# For the control plane, this can only be 0 or 1
maxSurge: 1
# The kubeadm config specification for the control plane
# By default, this uses a simple configuration that enables the external cloud provider
kubeadmConfigSpec:
initConfiguration:
nodeRegistration:
name: '{{ local_hostname }}'
kubeletExtraArgs:
cloud-provider: external
# As well as enabling an external cloud provider, we set the bind addresses for the
# etcd metrics, controller-manager, scheduler and kube-proxy to 0.0.0.0 so that Prometheus
# can reach them to collect metrics
clusterConfiguration:
etcd:
local:
extraArgs:
listen-metrics-urls: http://0.0.0.0:2381
apiServer:
extraArgs:
cloud-provider: external
controllerManager:
extraArgs:
cloud-provider: external
bind-address: 0.0.0.0
scheduler:
extraArgs:
bind-address: 0.0.0.0
joinConfiguration:
nodeRegistration:
name: '{{ local_hostname }}'
kubeletExtraArgs:
cloud-provider: external
kubeProxyConfiguration:
metricsBindAddress: 0.0.0.0:10249
# The machine health check for auto-healing of the control plane
# See https://cluster-api.sigs.k8s.io/tasks/healthcheck.html
healthCheck:
# Indicates if the machine health check should be enabled
enabled: true
# The spec for the health check
spec:
# By default, unhealthy control plane nodes are always remediated
maxUnhealthy: 100%
# By default, consider a control plane node that has not been Ready
# for more than 5 mins unhealthy
unhealthyConditions:
- type: Ready
status: Unknown
timeout: 300s
- type: Ready
status: "False"
timeout: 300s
# Defaults for node groups
# Each of these can be overridden in the specification for an individual node group
nodeGroupDefaults:
# Indicates if the node group should be autoscaled
autoscale: false
# The failure domain for the node group
failureDomain:
# The flavor to use for machines in the node group
machineFlavor:
# The default networks and ports for worker nodes
# If neither networks or ports are given, the cluster internal network is used
# See https://github.com/kubernetes-sigs/cluster-api-provider-openstack/blob/master/docs/book/src/clusteropenstack/configuration.md#network-filters
machineNetworking:
networks:
ports:
# The root volume spec for machines in the node group
# If not given, the ephemeral root disk from the flavor is used
machineRootVolume:
# # The size of the disk to use
# diskSize:
# # The volume type to use
# # If not specified, the default volume type is used
# volumeType:
# # The volume availability zone to use
# # If not specified, the machine availability zone is used
# availabilityZone:
# The time to wait for a node to finish draining before it can be removed
nodeDrainTimeout: 5m
# The rollout strategy to use for the node group
# By default, this is set to do a rolling update within the existing resource envelope
# of the node group, even if that means the node group temporarily has zero nodes
rolloutStrategy:
type: RollingUpdate
rollingUpdate:
# The maximum number of node group machines that can be unavailable during the update
# Can be an absolute number or a percentage of the desired count
maxUnavailable: 1
# The maximum number of machines that can be scheduled above the desired count for
# the group during an update
# Can be an absolute number or a percentage of the desired count
maxSurge: 0
# One of Random, Newest, Oldest
deletePolicy: Random
# The default kubeadm config specification for worker nodes
# This will be merged with any configuration given for specific node groups
# By default, this uses a simple configuration that enables the external cloud provider
kubeadmConfigSpec:
joinConfiguration:
nodeRegistration:
name: '{{ local_hostname }}'
kubeletExtraArgs:
cloud-provider: external
# The default machine health check for worker nodes
# See https://cluster-api.sigs.k8s.io/tasks/healthcheck.html
# Note that maxUnhealthy or unhealthRange are evaluated per node group
healthCheck:
# Indicates if the machine health check should be enabled
enabled: true
# The spec for the health check
spec:
# By default, unhealthy worker nodes are always remediated
maxUnhealthy: 100%
# If a node takes longer than 10 mins to startup, remediate it
nodeStartupTimeout: 10m
# By default, consider a worker node that has not been Ready for
# more than 5 mins unhealthy
unhealthyConditions:
- type: Ready
status: Unknown
timeout: 300s
- type: Ready
status: "False"
timeout: 300s
# The worker node groups for the cluster
nodeGroups:
- # The name of the node group
name: md-0
# The number of machines in the node group if autoscale is false
machineCount: 3
# The minimum and maximum number of machines in the node group if autoscale is true
# machineCountMin: 3
# machineCountMax: 3
# Configuration for the cluster autoscaler
autoscaler:
# The image to use for the autoscaler component
image:
repository: k8s.gcr.io/autoscaling/cluster-autoscaler
pullPolicy: IfNotPresent
# The tag depends on the Kubernetes version on the target cluster, but can be overridden
tag:
# These are the current latest versions for each Kubernetes minor version
# If a tag is not specified here for the target Kubernetes version, vX.Y.0 is used
tags:
"1.25": v1.25.0
"1.24": v1.24.0
"1.23": v1.23.1
"1.22": v1.22.3
imagePullSecrets: []
# Pod-level security context
podSecurityContext:
runAsNonRoot: true
runAsUser: 1001
# Container-level security context
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: [ALL]
readOnlyRootFilesystem: true
# Resource requests and limits for pods
resources: {}
# Node selector for pods
nodeSelector: {}
# Tolerations for pods
tolerations: []
# Affinity rules for pods
affinity: {}
# Configuration for cluster addons
addons:
# Indicates if cluster addons should be deployed
enabled: true
# The label to use to indicate that a configmap or secret should be watched
watchLabel: addons.stackhpc.com/watch
# Settings for hook jobs
hooks:
image:
repository: ghcr.io/stackhpc/k8s-utils
tag: # Defaults to chart appVersion if not given
pullPolicy: IfNotPresent
imagePullSecrets: []
backoffLimit: 1000
activeDeadlineSeconds: 3600
podSecurityContext:
runAsNonRoot: true
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: [ALL]
readOnlyRootFilesystem: true
resources: {}
hostNetwork: false
tolerations: []
nodeSelector: {}
affinity: {}
# Settings for the CNI addon
cni:
# Indicates if a CNI should be deployed
enabled: true
# The CNI to deploy - supported values are calico or cilium
type: calico
# Settings for the calico CNI
# See https://projectcalico.docs.tigera.io/getting-started/kubernetes/helm
calico:
chart:
repo: https://projectcalico.docs.tigera.io/charts
name: tigera-operator
version: v3.23.3
release:
namespace: tigera-operator
values: {}
# Settings for the Cilium CNI
# See https://docs.cilium.io/en/stable/gettingstarted/k8s-install-helm/ for details
cilium:
chart:
repo: https://helm.cilium.io/
name: cilium
version: 1.11.1
release:
namespace: kube-system
values: {}
# Settings for the OpenStack integrations
openstack:
# Indicates if the OpenStack integrations should be enabled
enabled: true
# The target namespace for the OpenStack integrations
targetNamespace: openstack-system
# cloud-config options for the OpenStack integrations
# The [Global] section is configured to use the target cloud
# See https://github.com/kubernetes/cloud-provider-openstack/blob/master/docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md#config-openstack-cloud-controller-manager
# and https://github.com/kubernetes/cloud-provider-openstack/blob/master/docs/cinder-csi-plugin/using-cinder-csi-plugin.md#block-storage
cloudConfig:
# By default, ignore volume AZs for Cinder as most clouds have a single globally-attachable Cinder AZ
BlockStorage:
ignore-volume-az: true
# Settings for the Cloud Controller Manager (CCM)
ccm:
# Indicates if the OpenStack CCM should be enabled
# By default, the CCM is enabled if the OpenStack integrations are enabled
# See https://github.com/kubernetes/cloud-provider-openstack/blob/master/charts/openstack-cloud-controller-manager/values.yaml
enabled: true
chart:
repo: https://kubernetes.github.io/cloud-provider-openstack
name: openstack-cloud-controller-manager
version: 1.3.0
values: {}
# Settings for the Cinder CSI plugin
csiCinder:
# Indicates if the Cinder CSI should be enabled
# By default, it is enabled if the OpenStack integrations are enabled
# See https://github.com/kubernetes/cloud-provider-openstack/blob/master/charts/cinder-csi-plugin/values.yaml
enabled: true
chart:
repo: https://kubernetes.github.io/cloud-provider-openstack
name: openstack-cinder-csi
version: 2.2.0
values: {}
# Variables affecting the definition of the storage class
storageClass:
# Indicates if the storage class should be enabled
enabled: true
# The name of the storage class
name: csi-cinder
# Indicates if the storage class should be annotated as the default storage class
isDefault: true
# The reclaim policy for the storage class
reclaimPolicy: Delete
# Indicates if volume expansion is allowed
allowVolumeExpansion: true
# The Cinder availability zone to use for volumes provisioned by the storage class
availabilityZone: nova
# The Cinder volume type to use for volumes provisioned by the storage class
# If not given, the default volume type will be used
volumeType:
# The allowed topologies for the storage class
allowedTopologies:
# Settings for the metrics server
# https://github.com/kubernetes-sigs/metrics-server#helm-chart
metricsServer:
# Indicates if the metrics server should be deployed
enabled: true
chart:
repo: https://kubernetes-sigs.github.io/metrics-server
name: metrics-server
version: 3.8.2
release:
namespace: kube-system
values: {}
# Settings for the Kubernetes dashboard
# https://github.com/kubernetes/dashboard/tree/master/charts/helm-chart/kubernetes-dashboard
kubernetesDashboard:
# Indicates if the Kubernetes dashboard should be enabled
enabled: false
chart:
repo: https://kubernetes.github.io/dashboard
name: kubernetes-dashboard
version: 5.10.0
release:
namespace: kubernetes-dashboard
values: {}
# Settings for ingress controllers
ingress:
# Settings for the Nginx ingress controller
# https://github.com/kubernetes/ingress-nginx/tree/main/charts/ingress-nginx#configuration
nginx:
# Indicates if the Nginx ingress controller should be enabled
enabled: false
chart:
repo: https://kubernetes.github.io/ingress-nginx
name: ingress-nginx
version: 4.2.5
release:
namespace: ingress-nginx
values: {}
# Settings for cluster monitoring
monitoring:
# Indicates if the cluster monitoring should be enabled
enabled: false
kubePrometheusStack:
chart:
repo: https://prometheus-community.github.io/helm-charts
name: kube-prometheus-stack
version: 40.1.0
release:
namespace: monitoring-system
values: {}
lokiStack:
enabled: true
chart:
repo: https://grafana.github.io/helm-charts
name: loki-stack
version: 2.8.2
release:
namespace: monitoring-system
values: {}
# Settings for node feature discovery
# https://github.com/kubernetes-sigs/node-feature-discovery/tree/master/deployment/helm/node-feature-discovery
nodeFeatureDiscovery:
# Indicates if node feature discovery should be enabled
enabled: true
chart:
repo: https://kubernetes-sigs.github.io/node-feature-discovery/charts
name: node-feature-discovery
version: 0.11.2
release:
namespace: node-feature-discovery
values: {}
# Settings for the NVIDIA GPU operator
nvidiaGPUOperator:
# Indicates if the NVIDIA GPU operator should be enabled
# Note that because it uses node feature discovery to run only on nodes
# with an NVIDIA GPU available, the overhead of enabling this on clusters
# that do not need it now but may need it in the future is low
enabled: true
chart:
repo: https://nvidia.github.io/gpu-operator
name: gpu-operator
version: v1.11.1
release:
namespace: gpu-operator
values: {}
# Settings for the Mellanox network operator
mellanoxNetworkOperator:
# Indicates if the network operator should be enabled
# Note that because it uses node feature discovery to run only on nodes
# with a Mellanox NIC available, the overhead of enabling this on clusters
# that do not need it now but may need it in the future is low
enabled: true
chart:
repo: https://mellanox.github.io/network-operator
name: network-operator
version: 1.3.0
release:
namespace: network-operator
values: {}