Backwards compatible Kubernetes admin.conf ownership and permission

Kubernetes 1.29 now deploys new 'super-admin.conf' and new RBAC Group
kubeadm:cluster-admins. The ownership and permission and association
with RBAC group of 'admin.conf' is changed, so K8s control-plane
upgrade and abort upgrade operations of 1.29 modifies the ownership
and permissions.

This workaround resets the ownership and permissions of the
admin.conf file to retain backwards compatibility. This may have
to be addressed differently in future.

Without this change, kubectl commands no longer work due to
insufficient permissions after K8s control-plane upgrade or
abort upgrade of 1.29.

Test Plan:
PASS: Install ISO with K8s 1.29 on AIO-SX.
PASS: Install ISO with K8s 1.29 on AIO-SX and perform the BnR.
PASS: Ansible bootstrap replay on K8s 1.29
PASS: Install ISO with K8s 1.28 on AIO-SX, upgrade to 1.29
PASS: Install ISO with K8s 1.28 on AIO-SX, upgrade to 1.29 and
      perform the BnR.
PASS: Install ISO with K8s 1.28 on AIO-SX, abort upgrade to 1.29.
PASS: Install ISO with K8s 1.29 on AIO-DX.
PASS: Install ISO with K8s 1.29 on AIO-DX and perform the BnR.
PASS: Install ISO with K8s 1.28 on AIO-DX, upgrade to 1.29
PASS: Verify that "kubectl get pods -n kube-system" works as expected
      after the K8s upgrade and abort upgrade operations.
PASS: Manually hardcode to fail kube-upgrade-abort and verify backup
      of admin.conf and super-admin.conf files restored successfully.
PASS: Lock/unlock the host after modify the permission of Kubernetes
      admin.conf and verify permissions are reset correctly.

Story: 2011047
Task: 49793

Change-Id: I1d410cb1e638b94bcc965dd43dd0e8f032401ade
Signed-off-by: Ramesh Kumar Sivanandam <rameshkumar.sivanandam@windriver.com>
This commit is contained in:
Ramesh Kumar Sivanandam 2024-04-01 07:40:22 -04:00
parent 43c9e74505
commit be1d47c726

View File

@ -55,6 +55,7 @@ class platform::kubernetes::params (
$etcd_snapshot_file = '/opt/backups/k8s-control-plane/etcd/stx_etcd.snap',
$static_pod_manifests_initial = '/opt/backups/k8s-control-plane/static-pod-manifests',
$static_pod_manifests_abort = '/opt/backups/k8s-control-plane/static-pod-manifests-abort',
$kube_config_backup_path = '/opt/backups/k8s-control-plane/k8s-config',
$etcd_name = 'controller',
$etcd_initial_cluster = 'controller=http://localhost:2380',
# The file holding the root CA cert/key to update to
@ -492,6 +493,16 @@ class platform::kubernetes::master::init
-> file { '/etc/platform/.initial_k8s_config_complete':
ensure => present,
}
} else {
# K8s control plane upgrade from 1.28 to 1.29 changes the ownership/permission
# of kube config file. We are resetting it after the control plane upgrade.
# In case of any failure before resetting it, this sets the correct ownership/permission
# to kube config during the host reboots after the initial install.
file { '/etc/kubernetes/admin.conf':
owner => 'root',
group => 'sys_protected',
mode => '0640',
}
}
# Run kube-cert-rotation daily
@ -858,6 +869,14 @@ class platform::kubernetes::upgrade_first_control_plane
command => 'kubectl -n kube-system get configmaps -oname --sort-by=.metadata.creationTimestamp | grep -e kubelet-config | head -n -1 | xargs -r -i kubectl -n kube-system delete {}', # lint:ignore:140chars
logoutput => true,
}
# Control plane upgrade from 1.28 to 1.29 changed the ownership and permission
# of kube config file. Setting the ownership & permission to the old state.
# This issue not present in the fresh install with K8s 1.29.
-> file { '/etc/kubernetes/admin.conf':
owner => 'root',
group => 'sys_protected',
mode => '0640',
}
if $::platform::params::system_mode != 'simplex' {
# For duplex and multi-node system, restrict the coredns pod to control-plane nodes
@ -1713,13 +1732,42 @@ class platform::kubernetes::unmask_start_services
}
}
class platform::kubernetes::refresh_admin_config {
# Remove and regenerate the kube config file /etc/kubernetes/admin.conf
exec { 'remove the /etc/kubernetes/admin.conf':
command => 'rm -f /etc/kubernetes/admin.conf',
}
-> exec { 'remove the /etc/kubernetes/super-admin.conf':
command => 'rm -f /etc/kubernetes/super-admin.conf',
onlyif => 'test -f /etc/kubernetes/super-admin.conf',
}
# K8s version upgrade abort of 1.28 to 1.29 removes some of the permission & priviledge
# of kubernetes-admin user. Following command will regenerate admin.conf file and
# ensure the required permissions & priviledges.
-> exec { 'regenerate the /etc/kubernetes/admin.conf':
command => 'kubeadm init phase kubeconfig admin',
}
-> file { '/etc/kubernetes/admin.conf':
owner => 'root',
group => 'sys_protected',
mode => '0640',
}
}
class platform::kubernetes::upgrade_abort
inherits ::platform::kubernetes::params {
$software_version = $::platform::params::software_version
include platform::kubernetes::cordon_node
include platform::kubernetes::mask_stop_kubelet
include platform::kubernetes::unmask_start_services
include platform::kubernetes::refresh_admin_config
# Keep a backup of the current Kubernetes config files so that if abort fails,
# we can restore to that state with upgrade_abort_recovery.
exec { 'backup the kubernetes admin and super-admin config':
command => "mkdir -p ${kube_config_backup_path} && cp -p /etc/kubernetes/*admin.conf ${kube_config_backup_path}/.",
onlyif => ['test -f /etc/kubernetes/admin.conf'],
}
# Take latest static manifest files backup for recovery if upgrade_abort fail
exec { 'remove the control-plane pods':
command => "mkdir -p ${static_pod_manifests_abort} && mv -f /etc/kubernetes/manifests/*.yaml ${static_pod_manifests_abort}/.",
@ -1765,6 +1813,12 @@ class platform::kubernetes::upgrade_abort
environment => [ 'ETCDCTL_API=3' ],
onlyif => "test -f ${etcd_snapshot_file}"
}
-> exec { 'restore static manifest files':
command => "/usr/bin/cp -f ${static_pod_manifests_initial}/*.yaml /etc/kubernetes/manifests",
onlyif => "test -d ${static_pod_manifests_initial}",
}
-> Class['platform::kubernetes::unmask_start_services']
-> Class['platform::kubernetes::refresh_admin_config']
# Remove recover static manifest files backup if snapshot restore succeeded
-> exec { 'remove recover static manifest files':
command => "rm -rf ${static_pod_manifests_abort}",
@ -1775,11 +1829,11 @@ class platform::kubernetes::upgrade_abort
command => "rm -rf /opt/etcd/${software_version}/controller.etcd.bck",
onlyif => "test -d /opt/etcd/${software_version}/controller.etcd.bck",
}
-> exec { 'restore static manifest files':
command => "/usr/bin/cp -f ${static_pod_manifests_initial}/*.yaml /etc/kubernetes/manifests",
onlyif => "test -d ${static_pod_manifests_initial}",
# Remove kube config files backup after the abort
-> exec { 'remove kube config files backup':
command => "rm -rf ${kube_config_backup_path}",
onlyif => "test -d ${kube_config_backup_path}",
}
-> Class['platform::kubernetes::unmask_start_services']
}
class platform::kubernetes::upgrade_abort_recovery
@ -1794,6 +1848,9 @@ class platform::kubernetes::upgrade_abort_recovery
-> exec { 'restore recover static manifest files':
command => "mv -f ${static_pod_manifests_abort}/*.yaml /etc/kubernetes/manifests/",
}
-> exec { 'restore the admin and super-admin config files':
command => "mv -f ${kube_config_backup_path}/*admin.conf /etc/kubernetes/",
}
-> Class['platform::kubernetes::unmask_start_services']
-> exec { 'uncordon the node':
command => "kubectl --kubeconfig=/etc/kubernetes/admin.conf uncordon ${::platform::params::hostname}",