From be1d47c726cffc519bdff857e7022bfa15cea423 Mon Sep 17 00:00:00 2001 From: Ramesh Kumar Sivanandam Date: Mon, 1 Apr 2024 07:40:22 -0400 Subject: [PATCH] Backwards compatible Kubernetes admin.conf ownership and permission Kubernetes 1.29 now deploys new 'super-admin.conf' and new RBAC Group kubeadm:cluster-admins. The ownership and permission and association with RBAC group of 'admin.conf' is changed, so K8s control-plane upgrade and abort upgrade operations of 1.29 modifies the ownership and permissions. This workaround resets the ownership and permissions of the admin.conf file to retain backwards compatibility. This may have to be addressed differently in future. Without this change, kubectl commands no longer work due to insufficient permissions after K8s control-plane upgrade or abort upgrade of 1.29. Test Plan: PASS: Install ISO with K8s 1.29 on AIO-SX. PASS: Install ISO with K8s 1.29 on AIO-SX and perform the BnR. PASS: Ansible bootstrap replay on K8s 1.29 PASS: Install ISO with K8s 1.28 on AIO-SX, upgrade to 1.29 PASS: Install ISO with K8s 1.28 on AIO-SX, upgrade to 1.29 and perform the BnR. PASS: Install ISO with K8s 1.28 on AIO-SX, abort upgrade to 1.29. PASS: Install ISO with K8s 1.29 on AIO-DX. PASS: Install ISO with K8s 1.29 on AIO-DX and perform the BnR. PASS: Install ISO with K8s 1.28 on AIO-DX, upgrade to 1.29 PASS: Verify that "kubectl get pods -n kube-system" works as expected after the K8s upgrade and abort upgrade operations. PASS: Manually hardcode to fail kube-upgrade-abort and verify backup of admin.conf and super-admin.conf files restored successfully. PASS: Lock/unlock the host after modify the permission of Kubernetes admin.conf and verify permissions are reset correctly. Story: 2011047 Task: 49793 Change-Id: I1d410cb1e638b94bcc965dd43dd0e8f032401ade Signed-off-by: Ramesh Kumar Sivanandam --- .../modules/platform/manifests/kubernetes.pp | 65 +++++++++++++++++-- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/puppet-manifests/src/modules/platform/manifests/kubernetes.pp b/puppet-manifests/src/modules/platform/manifests/kubernetes.pp index 8e47c5dfa..81cbfe476 100644 --- a/puppet-manifests/src/modules/platform/manifests/kubernetes.pp +++ b/puppet-manifests/src/modules/platform/manifests/kubernetes.pp @@ -55,6 +55,7 @@ class platform::kubernetes::params ( $etcd_snapshot_file = '/opt/backups/k8s-control-plane/etcd/stx_etcd.snap', $static_pod_manifests_initial = '/opt/backups/k8s-control-plane/static-pod-manifests', $static_pod_manifests_abort = '/opt/backups/k8s-control-plane/static-pod-manifests-abort', + $kube_config_backup_path = '/opt/backups/k8s-control-plane/k8s-config', $etcd_name = 'controller', $etcd_initial_cluster = 'controller=http://localhost:2380', # The file holding the root CA cert/key to update to @@ -492,6 +493,16 @@ class platform::kubernetes::master::init -> file { '/etc/platform/.initial_k8s_config_complete': ensure => present, } + } else { + # K8s control plane upgrade from 1.28 to 1.29 changes the ownership/permission + # of kube config file. We are resetting it after the control plane upgrade. + # In case of any failure before resetting it, this sets the correct ownership/permission + # to kube config during the host reboots after the initial install. + file { '/etc/kubernetes/admin.conf': + owner => 'root', + group => 'sys_protected', + mode => '0640', + } } # Run kube-cert-rotation daily @@ -858,6 +869,14 @@ class platform::kubernetes::upgrade_first_control_plane command => 'kubectl -n kube-system get configmaps -oname --sort-by=.metadata.creationTimestamp | grep -e kubelet-config | head -n -1 | xargs -r -i kubectl -n kube-system delete {}', # lint:ignore:140chars logoutput => true, } + # Control plane upgrade from 1.28 to 1.29 changed the ownership and permission + # of kube config file. Setting the ownership & permission to the old state. + # This issue not present in the fresh install with K8s 1.29. + -> file { '/etc/kubernetes/admin.conf': + owner => 'root', + group => 'sys_protected', + mode => '0640', + } if $::platform::params::system_mode != 'simplex' { # For duplex and multi-node system, restrict the coredns pod to control-plane nodes @@ -1713,13 +1732,42 @@ class platform::kubernetes::unmask_start_services } } +class platform::kubernetes::refresh_admin_config { + # Remove and regenerate the kube config file /etc/kubernetes/admin.conf + exec { 'remove the /etc/kubernetes/admin.conf': + command => 'rm -f /etc/kubernetes/admin.conf', + } + -> exec { 'remove the /etc/kubernetes/super-admin.conf': + command => 'rm -f /etc/kubernetes/super-admin.conf', + onlyif => 'test -f /etc/kubernetes/super-admin.conf', + } + # K8s version upgrade abort of 1.28 to 1.29 removes some of the permission & priviledge + # of kubernetes-admin user. Following command will regenerate admin.conf file and + # ensure the required permissions & priviledges. + -> exec { 'regenerate the /etc/kubernetes/admin.conf': + command => 'kubeadm init phase kubeconfig admin', + } + -> file { '/etc/kubernetes/admin.conf': + owner => 'root', + group => 'sys_protected', + mode => '0640', + } +} + class platform::kubernetes::upgrade_abort inherits ::platform::kubernetes::params { $software_version = $::platform::params::software_version include platform::kubernetes::cordon_node include platform::kubernetes::mask_stop_kubelet include platform::kubernetes::unmask_start_services + include platform::kubernetes::refresh_admin_config + # Keep a backup of the current Kubernetes config files so that if abort fails, + # we can restore to that state with upgrade_abort_recovery. + exec { 'backup the kubernetes admin and super-admin config': + command => "mkdir -p ${kube_config_backup_path} && cp -p /etc/kubernetes/*admin.conf ${kube_config_backup_path}/.", + onlyif => ['test -f /etc/kubernetes/admin.conf'], + } # Take latest static manifest files backup for recovery if upgrade_abort fail exec { 'remove the control-plane pods': command => "mkdir -p ${static_pod_manifests_abort} && mv -f /etc/kubernetes/manifests/*.yaml ${static_pod_manifests_abort}/.", @@ -1765,6 +1813,12 @@ class platform::kubernetes::upgrade_abort environment => [ 'ETCDCTL_API=3' ], onlyif => "test -f ${etcd_snapshot_file}" } + -> exec { 'restore static manifest files': + command => "/usr/bin/cp -f ${static_pod_manifests_initial}/*.yaml /etc/kubernetes/manifests", + onlyif => "test -d ${static_pod_manifests_initial}", + } + -> Class['platform::kubernetes::unmask_start_services'] + -> Class['platform::kubernetes::refresh_admin_config'] # Remove recover static manifest files backup if snapshot restore succeeded -> exec { 'remove recover static manifest files': command => "rm -rf ${static_pod_manifests_abort}", @@ -1775,11 +1829,11 @@ class platform::kubernetes::upgrade_abort command => "rm -rf /opt/etcd/${software_version}/controller.etcd.bck", onlyif => "test -d /opt/etcd/${software_version}/controller.etcd.bck", } - -> exec { 'restore static manifest files': - command => "/usr/bin/cp -f ${static_pod_manifests_initial}/*.yaml /etc/kubernetes/manifests", - onlyif => "test -d ${static_pod_manifests_initial}", + # Remove kube config files backup after the abort + -> exec { 'remove kube config files backup': + command => "rm -rf ${kube_config_backup_path}", + onlyif => "test -d ${kube_config_backup_path}", } - -> Class['platform::kubernetes::unmask_start_services'] } class platform::kubernetes::upgrade_abort_recovery @@ -1794,6 +1848,9 @@ class platform::kubernetes::upgrade_abort_recovery -> exec { 'restore recover static manifest files': command => "mv -f ${static_pod_manifests_abort}/*.yaml /etc/kubernetes/manifests/", } + -> exec { 'restore the admin and super-admin config files': + command => "mv -f ${kube_config_backup_path}/*admin.conf /etc/kubernetes/", + } -> Class['platform::kubernetes::unmask_start_services'] -> exec { 'uncordon the node': command => "kubectl --kubeconfig=/etc/kubernetes/admin.conf uncordon ${::platform::params::hostname}",