From b31cf3385487749fe73cb0fec8a5b38dedf9da7d Mon Sep 17 00:00:00 2001 From: Matt Pryor Date: Mon, 22 Jan 2024 15:40:35 +0000 Subject: [PATCH] Ability to put etcd on a separate block device (#223) * Config to put etcd on a separate block device * Add etcd volume test * Reinstate pull_request_target --- .github/workflows/test.yaml | 69 +++++++++++++++++++ .../control-plane/kubeadm-control-plane.yaml | 28 ++++++++ .../openstack-machine-template.yaml | 10 ++- charts/openstack-cluster/values.yaml | 36 ++++++++-- 4 files changed, 134 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 556b70b..d2632c8 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -94,11 +94,78 @@ jobs: name-suffix: ${{ github.job }} if: ${{ always() }} + # This job tests the etcd volume support + # It only runs for non-draft PRs + # It uses a pre-existing internal network and the default volume type + etcd-volume: + runs-on: ubuntu-latest + if: ${{ inputs.tests-full }} + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + ref: ${{ inputs.ref }} + + - name: Create kind cluster + uses: helm/kind-action@v1.8.0 + + - name: Set up test environment + uses: ./.github/actions/setup + + - name: Write cloud credential + run: echo "$CLOUD" > ./clouds.yaml + env: + CLOUD: ${{ secrets.CLOUD }} + + - name: Write Helm values + run: echo "$VALUES" > ./values.yaml + env: + VALUES: | + clouds: + openstack: + auth: + project_id: ${{ secrets.PROJECT_ID }} + verify: false + clusterNetworking: + externalNetworkId: ${{ secrets.EXTERNAL_NETWORK_ID }} + internalNetwork: + networkFilter: + tags: capi-helm-chart-ci + etcd: + blockDevice: + size: 10 + controlPlane: + machineFlavor: ${{ secrets.CONTROL_PLANE_FLAVOR }} + machineCount: 1 + nodeGroups: + - name: md-0 + machineFlavor: ${{ secrets.NODE_GROUP_FLAVOR }} + machineCount: 2 + + - name: Deploy Kubernetes 1.29 for etcd volume test + uses: ./.github/actions/upgrade-and-test + with: + name: ci-${{ github.run_id }}-${{ github.job }} + kubernetes-version: ${{ fromJson(inputs.images).kube-1-29-version }} + image-id: ${{ fromJson(inputs.images).kube-1-29-image }} + + - name: Delete etcd volume test deployment + run: helm delete ci-${{ github.run_id }}-${{ github.job }} --wait + if: ${{ always() }} + + - name: Upload logs + uses: ./.github/actions/upload-logs + with: + name-suffix: ${{ github.job }} + if: ${{ always() }} + # This job tests Kubernetes upgrade # It only runs for non-draft PRs # It uses a pre-existing internal network kube-upgrade: runs-on: ubuntu-latest + # Run after the etcd volume tests + needs: [etcd-volume] if: ${{ inputs.tests-full }} steps: - name: Checkout @@ -176,6 +243,8 @@ jobs: # It installs ALL of the addons so that we test upgrading them chart-upgrade: runs-on: ubuntu-latest + # Run after the kube-upgrade + needs: [kube-upgrade] if: ${{ inputs.tests-full }} steps: - name: Checkout current diff --git a/charts/openstack-cluster/templates/control-plane/kubeadm-control-plane.yaml b/charts/openstack-cluster/templates/control-plane/kubeadm-control-plane.yaml index 0944f75..0ce9397 100644 --- a/charts/openstack-cluster/templates/control-plane/kubeadm-control-plane.yaml +++ b/charts/openstack-cluster/templates/control-plane/kubeadm-control-plane.yaml @@ -25,6 +25,33 @@ preKubeadmCommands: {{- end }} {{- end }} +{{- define "openstack-cluster.controlplane.kubeadmConfigSpec.etcd" -}} +clusterConfiguration: + etcd: + local: + dataDir: {{ .Values.etcd.dataDir }} + extraArgs: {{ toYaml .Values.etcd.extraArgs | nindent 8 }} +{{- if .Values.etcd.blockDevice }} +# Tell kubeadm to ignore the fact that the etcd datadir contains lost+found +initConfiguration: + nodeRegistration: + ignorePreflightErrors: + - diravailable-{{ replace "/" "-" .Values.etcd.dataDir }} +joinConfiguration: + nodeRegistration: + ignorePreflightErrors: + - diravailable-{{ replace "/" "-" .Values.etcd.dataDir }} +diskSetup: + filesystems: + - device: /dev/disk/openstack/by-tag/etcd + filesystem: ext4 + label: etcd_disk +mounts: + - - LABEL=etcd_disk + - {{ .Values.etcd.dataDir }} +{{- end }} +{{- end }} + --- apiVersion: controlplane.cluster.x-k8s.io/v1beta1 kind: KubeadmControlPlane @@ -56,6 +83,7 @@ spec: ( list (include "openstack-cluster.controlplane.kubeadmConfigSpec.nodeLabels" . | fromYaml) + (include "openstack-cluster.controlplane.kubeadmConfigSpec.etcd" . | fromYaml) (include "openstack-cluster.kubeadmConfigSpec" (list . .Values.controlPlane.kubeadmConfigSpec) | fromYaml) (include "openstack-cluster.osDistroKubeadmConfigSpec" (list . ) | fromYaml) (include "openstack-cluster.patchConfigSpec" (list .) | fromYaml) diff --git a/charts/openstack-cluster/templates/control-plane/openstack-machine-template.yaml b/charts/openstack-cluster/templates/control-plane/openstack-machine-template.yaml index 24e3ae7..e44372c 100644 --- a/charts/openstack-cluster/templates/control-plane/openstack-machine-template.yaml +++ b/charts/openstack-cluster/templates/control-plane/openstack-machine-template.yaml @@ -3,6 +3,12 @@ To do this, we create a new template whenever the checksum of the spec changes. */}} {{- define "openstack-cluster.controlplane.mt.spec" -}} +{{- + $blockDevices := + not (not .Values.etcd.blockDevice) | + ternary (dict "etcd" .Values.etcd.blockDevice) dict | + mergeOverwrite .Values.controlPlane.additionalBlockDevices +}} template: spec: identityRef: @@ -13,7 +19,7 @@ template: {{- with .Values.machineSSHKeyName }} sshKeyName: {{ . }} {{- end }} - {{- if or .Values.controlPlane.machineConfigDrive .Values.controlPlane.additionalBlockDevices }} + {{- if or .Values.controlPlane.machineConfigDrive $blockDevices }} configDrive: true {{- end }} {{- if .Values.controlPlane.machineRootVolume.diskSize }} @@ -30,7 +36,7 @@ template: {{- else }} {{- fail "Either controlPlane.machineImageId, controlPlane.machineImage, machineImage or machineImageId is required" }} {{- end }} - {{- with .Values.controlPlane.additionalBlockDevices }} + {{- with $blockDevices }} additionalBlockDevices: {{- range $name, $blockDevice := . }} - name: {{ $name }} diff --git a/charts/openstack-cluster/values.yaml b/charts/openstack-cluster/values.yaml index 214c783..c29e42e 100644 --- a/charts/openstack-cluster/values.yaml +++ b/charts/openstack-cluster/values.yaml @@ -102,6 +102,32 @@ trustedCAs: {} additionalPackages: [] # - nfs-common +# Settings for etcd +etcd: + # The data directory to use for etcd + # When a block device is specified, it is mounted at the parent directory, e.g. /var/lib/etcd + # This is to avoid etcd complaining about the lost+found directory + dataDir: /var/lib/etcd + # Any extra command line arguments to pass to etcd + extraArgs: + # Tell etcd to listen for metrics on 0.0.0.0 so Prometheus can collect them + listen-metrics-urls: http://0.0.0.0:2381 + # The block device configuration for etcd + # If not specified, the root device is used + blockDevice: + # # The size of the block device + # size: 20 + # # The type of the block device + # # If set to "Volume", which is the default, then a Cinder volume is used to back the block device + # # If set to "Local", local ephemeral storage is used to back the block device + # type: Volume + # # The volume type to use + # # If not specified, the default volume type is used + # volumeType: + # # The volume availability zone to use + # # If not specified, the machine availability zone is used + # availabilityZone: + # Settings for the Kubernetes API server apiServer: # Indicates whether to deploy a load balancer for the API server @@ -179,7 +205,7 @@ controlPlane: additionalBlockDevices: {} # # The key is the name for the block device in the context of the machine # # It is also used to tag the block device, so that the volume can be identified in instance metadata - # etcd: + # scratch: # # The size of the block device # size: 20 # # The type of the block device @@ -219,13 +245,9 @@ controlPlane: kubeletExtraArgs: cloud-provider: external # As well as enabling an external cloud provider, we set the bind addresses for the - # etcd metrics, controller-manager, scheduler and kube-proxy to 0.0.0.0 so that Prometheus - # can reach them to collect metrics + # controller-manager, scheduler and kube-proxy to 0.0.0.0 so that Prometheus can reach + # them to collect metrics clusterConfiguration: - etcd: - local: - extraArgs: - listen-metrics-urls: http://0.0.0.0:2381 apiServer: extraArgs: cloud-provider: external