From 9b5148e3b57ea48540b795be95abb035fa9a983d Mon Sep 17 00:00:00 2001 From: Ovidiu Poncea Date: Mon, 27 Jul 2020 15:04:38 +0300 Subject: [PATCH] Harden kickstarts as udev behavior can lead to random failures Whenever a dev node that is not in use is opened with open(O_RDWR) udev triggers a flush in devtmpfs that briefly remove & recreate all the nodes for partitions on that device. This leads to commands accessing dev nodes during the flush to fail. In our case blkid and lsblk failed. These failures are hard to reproduce, have devastating effect on the partitioning operations and are not solved by using 'udevadm settle' as some of the kernel events are asynchronous. So, mainly, this commit stops udev from messing up with /dev nodes by initializing file descriptors for all storage devices then opening locks on them with flock. Setting locks stops udev triggering kernel partition rescan. Locks are set at the start of the partitioning operation and released at the end. For more details and similar cases see: o https://github.com/systemd/systemd/commit/02ba8fb3357daf57f6120ac512fb464a4c623419 o http://tracker.ceph.com/issues/14080 o http://tracker.ceph.com/issues/15176 This commit: o stops udev messing up with /dev nodes; o aborts install on critical failures; o adds retry for critical operations such as LVM cleanup or partition removal and creation. Closes-Bug: 1888938 Change-Id: Iaaaaaae973ee36f2c4bfd42c327e8c6278d59303 Signed-off-by: Ovidiu Poncea --- bsp-files/centos-ks-gen.pl | 15 ++ bsp-files/kickstarts/functions.sh | 60 ++++++- bsp-files/kickstarts/post_common.cfg | 7 +- bsp-files/kickstarts/pre_disk_aio.cfg | 50 ++++-- bsp-files/kickstarts/pre_disk_controller.cfg | 47 ++++-- .../kickstarts/pre_disk_setup_common.cfg | 146 ++++++++++++------ bsp-files/kickstarts/pre_disk_setup_tail.cfg | 21 +++ bsp-files/kickstarts/pre_disk_storage.cfg | 14 +- bsp-files/kickstarts/pre_disk_worker.cfg | 14 +- 9 files changed, 284 insertions(+), 90 deletions(-) create mode 100644 bsp-files/kickstarts/pre_disk_setup_tail.cfg diff --git a/bsp-files/centos-ks-gen.pl b/bsp-files/centos-ks-gen.pl index 32c4a2e4..469837b3 100755 --- a/bsp-files/centos-ks-gen.pl +++ b/bsp-files/centos-ks-gen.pl @@ -42,6 +42,7 @@ write_config_file("controller", "pre_pkglist.cfg", "pre_disk_setup_common.cfg", "pre_disk_controller.cfg", + "pre_disk_setup_tail.cfg", "post_platform_conf_controller.cfg", "post_common.cfg", "post_kernel_controller.cfg", @@ -54,6 +55,7 @@ write_config_file("controller-worker", "pre_pkglist.cfg", "pre_disk_setup_common.cfg", "pre_disk_aio.cfg", + "pre_disk_setup_tail.cfg", "post_platform_conf_aio.cfg", "post_common.cfg", "post_kernel_aio_and_worker.cfg", @@ -67,6 +69,7 @@ write_config_file("controller-worker-lowlatency", "pre_pkglist_lowlatency.cfg", "pre_disk_setup_common.cfg", "pre_disk_aio.cfg", + "pre_disk_setup_tail.cfg", "post_platform_conf_aio_lowlatency.cfg", "post_common.cfg", "post_kernel_aio_and_worker.cfg", @@ -84,6 +87,7 @@ write_config_file("controller", "pre_pkglist.cfg", "pre_disk_setup_common.cfg", "pre_disk_controller.cfg", + "pre_disk_setup_tail.cfg", "post_platform_conf_controller.cfg", "post_common.cfg", "post_kernel_controller.cfg", @@ -95,6 +99,7 @@ write_config_file("controller-worker", "pre_pkglist.cfg", "pre_disk_setup_common.cfg", "pre_disk_aio.cfg", + "pre_disk_setup_tail.cfg", "post_platform_conf_aio.cfg", "post_common.cfg", "post_kernel_aio_and_worker.cfg", @@ -107,6 +112,7 @@ write_config_file("controller-worker-lowlatency", "pre_pkglist_lowlatency.cfg", "pre_disk_setup_common.cfg", "pre_disk_aio.cfg", + "pre_disk_setup_tail.cfg", "post_platform_conf_aio_lowlatency.cfg", "post_common.cfg", "post_kernel_aio_and_worker.cfg", @@ -123,6 +129,7 @@ write_config_file("controller", "pre_pkglist.cfg", "pre_disk_setup_common.cfg", "pre_disk_controller.cfg", + "pre_disk_setup_tail.cfg", "post_platform_conf_controller.cfg", "post_common.cfg", "post_kernel_controller.cfg", @@ -136,6 +143,7 @@ write_config_file("controller-worker", "pre_pkglist.cfg", "pre_disk_setup_common.cfg", "pre_disk_aio.cfg", + "pre_disk_setup_tail.cfg", "post_platform_conf_aio.cfg", "post_common.cfg", "post_kernel_aio_and_worker.cfg", @@ -150,6 +158,7 @@ write_config_file("controller-worker-lowlatency", "pre_pkglist_lowlatency.cfg", "pre_disk_setup_common.cfg", "pre_disk_aio.cfg", + "pre_disk_setup_tail.cfg", "post_platform_conf_aio_lowlatency.cfg", "post_common.cfg", "post_kernel_aio_and_worker.cfg", @@ -164,6 +173,7 @@ write_config_file("worker", "pre_pkglist.cfg", "pre_disk_setup_common.cfg", "pre_disk_worker.cfg", + "pre_disk_setup_tail.cfg", "post_platform_conf_worker.cfg", "post_common.cfg", "post_kernel_aio_and_worker.cfg", @@ -176,6 +186,7 @@ write_config_file("worker-lowlatency", "pre_pkglist_lowlatency.cfg", "pre_disk_setup_common.cfg", "pre_disk_worker.cfg", + "pre_disk_setup_tail.cfg", "post_platform_conf_worker_lowlatency.cfg", "post_common.cfg", "post_kernel_aio_and_worker.cfg", @@ -188,6 +199,7 @@ write_config_file("storage", "pre_pkglist.cfg", "pre_disk_setup_common.cfg", "pre_disk_storage.cfg", + "pre_disk_setup_tail.cfg", "post_platform_conf_storage.cfg", "post_common.cfg", "post_kernel_storage.cfg", @@ -208,6 +220,7 @@ foreach $server (keys %boot_servers) "pre_pkglist.cfg", "pre_disk_setup_common.cfg", "pre_disk_controller.cfg", + "pre_disk_setup_tail.cfg", "post_platform_conf_controller.cfg", "post_common.cfg", "post_kernel_controller.cfg", @@ -219,6 +232,7 @@ foreach $server (keys %boot_servers) "pre_pkglist.cfg", "pre_disk_setup_common.cfg", "pre_disk_aio.cfg", + "pre_disk_setup_tail.cfg", "post_platform_conf_aio.cfg", "post_common.cfg", "post_kernel_aio_and_worker.cfg", @@ -231,6 +245,7 @@ foreach $server (keys %boot_servers) "pre_pkglist_lowlatency.cfg", "pre_disk_setup_common.cfg", "pre_disk_aio.cfg", + "pre_disk_setup_tail.cfg", "post_platform_conf_aio_lowlatency.cfg", "post_common.cfg", "post_kernel_aio_and_worker.cfg", diff --git a/bsp-files/kickstarts/functions.sh b/bsp-files/kickstarts/functions.sh index ebbbe29f..2e3a8711 100644 --- a/bsp-files/kickstarts/functions.sh +++ b/bsp-files/kickstarts/functions.sh @@ -9,6 +9,17 @@ cat </tmp/ks-functions.sh # SPDX-License-Identifier: Apache-2.0 # +# Get the FD used by subshells to log output +if [ -z "\$stdout" ]; then + exec {stdout}>&1 +fi + +function wlog() +{ + local dt="\$(date "+%Y-%m-%d %H:%M:%S.%3N")" + echo "\$dt - \$1" >&\${stdout} +} + function get_by_path() { local disk=\$(cd /dev ; readlink -f \$1) @@ -73,7 +84,7 @@ function get_http_port() echo \$(cat /proc/cmdline |xargs -n1 echo |grep '^inst.repo=' | sed -r 's#^[^/]*://[^/]*:([0-9]*)/.*#\1#') } -get_disk_dev() +function get_disk_dev() { local disk # Detect HDD @@ -97,5 +108,52 @@ get_disk_dev() done } +function exec_no_fds() +{ + # Close open FDs when executing commands that complain about leaked FDs. + local fds=\$1 + local cmd=\$2 + local retries=\$3 + local interval=\$4 + local ret_code=0 + local ret_stdout="" + for fd in \$fds + do + local cmd="\$cmd \$fd>&-" + done + if [ -z "\$retries" ]; then + #wlog "Running command: '\$cmd'." + eval "\$cmd" + else + ret_stdout=\$(exec_retry "\$retries" "\$interval" "\$cmd") + ret_code=\$? + echo "\${ret_stdout}" + return \${ret_code} + fi +} + +function exec_retry() +{ + local retries=\$1 + local interval=\$2 + local cmd=\$3 + let -i retry_count=1 + local ret_code=0 + local ret_stdout="" + cmd="\$cmd" # 2>&\$stdout" + while [ \$retry_count -le \$retries ]; do + #wlog "Running command: '\$cmd'." + ret_stdout=\$(eval \$cmd) + ret_code=\$? + [ \$ret_code -eq 0 ] && break + wlog "Error running command '\${cmd}'. Try \${retry_count} of \${retries} at \${interval}s." + wlog "ret_code: \${ret_code}, stdout: '\${ret_stdout}'." + sleep \$interval + let retry_count++ + done + echo "\${ret_stdout}" + return \${ret_code} +} + END_FUNCTIONS diff --git a/bsp-files/kickstarts/post_common.cfg b/bsp-files/kickstarts/post_common.cfg index b84da62f..82140084 100644 --- a/bsp-files/kickstarts/post_common.cfg +++ b/bsp-files/kickstarts/post_common.cfg @@ -1,9 +1,12 @@ -%post --nochroot +%post --nochroot --erroronfail + +# Source common functions +. /tmp/ks-functions.sh # Change GUID of backup partition change_guid=/tmp/backup-guid-change.sh if [ -f "$change_guid" ]; then - sh $change_guid + sh $change_guid || report_post_failure_with_logfile "ERROR: Failed to update platform backup GUID" fi %end diff --git a/bsp-files/kickstarts/pre_disk_aio.cfg b/bsp-files/kickstarts/pre_disk_aio.cfg index 5832ac50..ded2b735 100755 --- a/bsp-files/kickstarts/pre_disk_aio.cfg +++ b/bsp-files/kickstarts/pre_disk_aio.cfg @@ -85,7 +85,7 @@ ## cgts-vg PV (142G), cgts-vg PV (336G) ## -sz=$(blockdev --getsize64 $(get_disk $ROOTFS_DISK)) +sz=$(blockdev --getsize64 $rootfs_device) if [ $sz -le $((240*$gb)) ] ; then # Round CGCS_PV_SIZE to the closest upper value that can be divided by 1024. # 190480/1024=186.02. CGCS_PV_SIZE=187*1024=191488. Using a disk with a @@ -119,12 +119,16 @@ if [ -d /sys/firmware/efi ] ; then END_POINT=$(($START_POINT + $PLATFORM_BACKUP_SIZE)) BACKUP_END_POINT=$END_POINT if [ $BACKUP_CREATED -eq 0 ] ; then - parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB + wlog "Creating platform backup partition of ${PLATFORM_BACKUP_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB." + exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB" + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!" fi START_POINT=$END_POINT END_POINT=$(($START_POINT + $EFI_SIZE)) - parted -s $ROOTFS_DISK mkpart primary fat32 ${START_POINT}MiB ${END_POINT}MiB + wlog "Creating EFI partition of ${EFI_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB." + exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary fat32 ${START_POINT}MiB ${END_POINT}MiB" + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!" cat<>/tmp/part-include part /boot/efi --fstype=efi --onpart=${ROOTFS_PART_PREFIX}2 @@ -132,13 +136,17 @@ EOF else BACKUP_PART=${ROOTFS_PART_PREFIX}2 BACKUP_PART_NO=2 - parted -s $ROOTFS_DISK mkpart primary 1MiB 2MiB + wlog "Creating 1MB BIOS GRUB partition from 1MiB to 2MiB." + exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary 1MiB 2MiB" + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!" START_POINT=2 END_POINT=$(($START_POINT + $PLATFORM_BACKUP_SIZE)) BACKUP_END_POINT=$END_POINT if [ $BACKUP_CREATED -eq 0 ] ; then - parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB + wlog "Creating platform backup partition of ${PLATFORM_BACKUP_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB." + exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB" + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!" fi cat<>/tmp/part-include part biosboot --asprimary --fstype=biosboot --onpart=${ROOTFS_PART_PREFIX}1 @@ -147,31 +155,47 @@ fi START_POINT=$END_POINT END_POINT=$(($START_POINT + $BOOT_SIZE)) -parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB +wlog "Creating boot partition of ${BOOT_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB." +exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB" +[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!" START_POINT=$END_POINT END_POINT=$(($START_POINT + $ROOTFS_SIZE)) -parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB +wlog "Creating rootfs partition of ${ROOTFS_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB." +exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB" +[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!" START_POINT=$END_POINT END_POINT=$(($START_POINT + $CGCS_PV_SIZE)) -parted -s $ROOTFS_DISK mkpart extended ${START_POINT}MiB ${END_POINT}MiB +wlog "Creating cgcs-vg partition of ${CGCS_PV_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB." +exec_retry 5 0.5 "parted -s $rootfs_device mkpart extended ${START_POINT}MiB ${END_POINT}MiB" +[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!" if [ $BACKUP_CREATED -ne 0 ] ; then BACKUP_CURRENT_SIZE=$(parted -s $BACKUP_PART unit MiB print | grep $BACKUP_PART | awk '{print $3}' | sed 's/[^C0-9]*//g') if [ $BACKUP_CURRENT_SIZE -lt $PLATFORM_BACKUP_SIZE ] ; then + wlog "Backup partition size is ${BACKUP_CURRENT_SIZE}MiB, resizing to ${PLATFORM_BACKUP_SIZE}MiB." # parted will throw an error about overlapping with the next partition if we don't do this BACKUP_END_POINT=$(($BACKUP_END_POINT - 1)).9 - parted -s $ROOTFS_DISK resizepart $BACKUP_PART_NO ${BACKUP_END_POINT}MiB - e2fsck -p -f $BACKUP_PART - resize2fs $BACKUP_PART + exec_retry 5 0.5 "parted -s $rootfs_device resizepart $BACKUP_PART_NO ${BACKUP_END_POINT}MiB" + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: resize of platform backup partition failed!" + exec_retry 2 0.1 "e2fsck -p -f $BACKUP_PART" + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: e2fsck failed on platform backup partition!" + exec_retry 2 1 "resize2fs $BACKUP_PART" + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Filed to resize ext4 fs of platform backup partition!" + elif [ $BACKUP_CURRENT_SIZE -gt $PLATFORM_BACKUP_SIZE ] ; then + report_pre_failure_with_msg "ERROR: Backup partition is ${BACKUP_CURRENT_SIZE}MiB expected size is less or equal to ${PLATFORM_BACKUP_SIZE}MiB." + else + wlog "Backup partition size is correct: ${PLATFORM_BACKUP_SIZE}MiB." fi + cat<>/tmp/part-include part /opt/platform-backup --fstype=ext4 --asprimary --noformat --onpart=$BACKUP_PART --fsoptions="$ROOTFS_OPTIONS" EOF else cat</tmp/backup-guid-change.sh -flock $ROOTFS_DISK sgdisk --change-name=${BACKUP_PART_NO}:"${BACKUP_PART_LABEL}" --typecode=${BACKUP_PART_NO}:"${BACKUP_PART_GUID}" $ROOTFS_DISK +echo "\$(date '+%Y-%m-%d %H:%M:%S.%3N') - Updating backup partition GUID." +flock $rootfs_device sgdisk --change-name=${BACKUP_PART_NO}:"${BACKUP_PART_LABEL}" --typecode=${BACKUP_PART_NO}:"${BACKUP_PART_GUID}" $rootfs_device || exit 1 EOF cat<>/tmp/part-include @@ -188,5 +212,3 @@ logvol /scratch --fstype=ext4 --vgname=cgts-vg --size=$SCRATCH_VOL_SIZE --name=s part / --fstype=ext4 --asprimary --onpart=${ROOTFS_PART_PREFIX}4 --fsoptions="$ROOTFS_OPTIONS" EOF -%end - diff --git a/bsp-files/kickstarts/pre_disk_controller.cfg b/bsp-files/kickstarts/pre_disk_controller.cfg index 7ab8e7ed..1d246027 100755 --- a/bsp-files/kickstarts/pre_disk_controller.cfg +++ b/bsp-files/kickstarts/pre_disk_controller.cfg @@ -23,12 +23,16 @@ if [ -d /sys/firmware/efi ] ; then END_POINT=$(($START_POINT + $PLATFORM_BACKUP_SIZE)) BACKUP_END_POINT=$END_POINT if [ $BACKUP_CREATED -eq 0 ] ; then - parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB + wlog "Creating platform backup partition of ${PLATFORM_BACKUP_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB." + exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB" + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!" fi START_POINT=$END_POINT END_POINT=$(($START_POINT + $EFI_SIZE)) - parted -s $ROOTFS_DISK mkpart primary fat32 ${START_POINT}MiB ${END_POINT}MiB + wlog "Creating EFI partition of ${EFI_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB." + exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary fat32 ${START_POINT}MiB ${END_POINT}MiB" + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!" cat<>/tmp/part-include part /boot/efi --fstype=efi --onpart=${ROOTFS_PART_PREFIX}2 @@ -36,13 +40,17 @@ EOF else BACKUP_PART=${ROOTFS_PART_PREFIX}2 BACKUP_PART_NO=2 - parted -s $ROOTFS_DISK mkpart primary 1MiB 2MiB + wlog "Creating 1MB BIOS GRUB partition from 1MiB to 2MiB." + exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary 1MiB 2MiB" + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!" START_POINT=2 END_POINT=$(($START_POINT + $PLATFORM_BACKUP_SIZE)) BACKUP_END_POINT=$END_POINT if [ $BACKUP_CREATED -eq 0 ] ; then - parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB + wlog "Creating platform backup partition of ${PLATFORM_BACKUP_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB." + exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB" + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!" fi cat<>/tmp/part-include part biosboot --asprimary --fstype=biosboot --onpart=${ROOTFS_PART_PREFIX}1 @@ -51,30 +59,45 @@ fi START_POINT=$END_POINT END_POINT=$(($START_POINT + $BOOT_SIZE)) -parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB +wlog "Creating boot partition of ${BOOT_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB." +exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB" +[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!" START_POINT=$END_POINT END_POINT=$(($START_POINT + $ROOTFS_SIZE)) -parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB +wlog "Creating rootfs partition of ${ROOTFS_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB." +exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB" +[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!" START_POINT=$END_POINT -parted -s $ROOTFS_DISK mkpart extended ${START_POINT}MiB 100% +wlog "Creating cgcs-vg partition of ${CGCS_PV_SIZE}MiB from ${START_POINT}MiB to 100%." +exec_retry 5 0.5 "parted -s $rootfs_device mkpart extended ${START_POINT}MiB 100%" +[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!" if [ $BACKUP_CREATED -ne 0 ] ; then BACKUP_CURRENT_SIZE=$(parted -s $BACKUP_PART unit MiB print | grep $BACKUP_PART | awk '{print $3}' | sed 's/[^C0-9]*//g') if [ $BACKUP_CURRENT_SIZE -lt $PLATFORM_BACKUP_SIZE ] ; then + wlog "Backup partition size is ${BACKUP_CURRENT_SIZE}MiB, resizing to ${PLATFORM_BACKUP_SIZE}MiB." # parted will throw an error about overlapping with the next partition if we don't do this BACKUP_END_POINT=$(($BACKUP_END_POINT - 1)).9 - parted -s $ROOTFS_DISK resizepart $BACKUP_PART_NO ${BACKUP_END_POINT}MiB - e2fsck -p -f $BACKUP_PART - resize2fs $BACKUP_PART + exec_retry 5 0.5 "parted -s $rootfs_device resizepart $BACKUP_PART_NO ${BACKUP_END_POINT}MiB" + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: resize of platform backup partition failed!" + exec_retry 2 0.1 "e2fsck -p -f $BACKUP_PART" + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: e2fsck failed on platform backup partition!" + exec_retry 2 1 "resize2fs $BACKUP_PART" + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Filed to resize ext4 fs of platform backup partition!" + elif [ $BACKUP_CURRENT_SIZE -gt $PLATFORM_BACKUP_SIZE ] ; then + report_pre_failure_with_msg "ERROR: Backup partition is ${BACKUP_CURRENT_SIZE}MiB expected size is less or equal to ${PLATFORM_BACKUP_SIZE}MiB." + else + wlog "Backup partition size is correct: ${PLATFORM_BACKUP_SIZE}MiB." fi cat<>/tmp/part-include part /opt/platform-backup --fstype=ext4 --asprimary --noformat --onpart=$BACKUP_PART --fsoptions="$ROOTFS_OPTIONS" EOF else cat</tmp/backup-guid-change.sh -flock $ROOTFS_DISK sgdisk --change-name=${BACKUP_PART_NO}:"${BACKUP_PART_LABEL}" --typecode=${BACKUP_PART_NO}:"${BACKUP_PART_GUID}" $ROOTFS_DISK +echo "\$(date '+%Y-%m-%d %H:%M:%S.%3N') - Updating backup partition GUID." +flock $rootfs_device sgdisk --change-name=${BACKUP_PART_NO}:"${BACKUP_PART_LABEL}" --typecode=${BACKUP_PART_NO}:"${BACKUP_PART_GUID}" $rootfs_device || exit 1 EOF cat<>/tmp/part-include @@ -91,5 +114,3 @@ logvol /scratch --fstype=ext4 --vgname=cgts-vg --size=$SCRATCH_VOL_SIZE --name=s part / --fstype=ext4 --asprimary --onpart=${ROOTFS_PART_PREFIX}4 --fsoptions="$ROOTFS_OPTIONS" EOF -%end - diff --git a/bsp-files/kickstarts/pre_disk_setup_common.cfg b/bsp-files/kickstarts/pre_disk_setup_common.cfg index 44f66606..84f5394d 100644 --- a/bsp-files/kickstarts/pre_disk_setup_common.cfg +++ b/bsp-files/kickstarts/pre_disk_setup_common.cfg @@ -3,6 +3,9 @@ # Source common functions . /tmp/ks-functions.sh +wlog "ISO_DEV='$ISO_DEV'." +wlog "USB_DEV='$USB_DEV'." + # This is a really fancy way of finding the first usable disk for the # install and not stomping on the USB device if it comes up first @@ -17,13 +20,18 @@ if [ -z "$rootfs_device" ]; then rootfs_device=$(get_disk_dev) fi -# Convert to by-path +# Get root and boot devices orig_rootfs_device=$rootfs_device -rootfs_device=$(get_by_path $rootfs_device) +by_path_rootfs_device=$(get_by_path $rootfs_device) +rootfs_device=$(get_disk $by_path_rootfs_device) +wlog "Found rootfs $orig_rootfs_device on: $by_path_rootfs_device->$rootfs_device." orig_boot_device=$boot_device -boot_device=$(get_by_path $boot_device) +by_path_boot_device=$(get_by_path $boot_device) +boot_device=$(get_disk $by_path_boot_device) +wlog "Found boot $orig_boot_device on: $by_path_boot_device->$boot_device." +# Check if boot and rootfs devices are valid if [ ! -e "$rootfs_device" -o ! -e "$boot_device" ] ; then # Touch this file to prevent Anaconda from dying an ungraceful death touch /tmp/part-include @@ -31,9 +39,50 @@ if [ ! -e "$rootfs_device" -o ! -e "$boot_device" ] ; then report_pre_failure_with_msg "ERROR: Specified installation ($orig_rootfs_device) or boot ($orig_boot_device) device is invalid." fi +# Get all block devices of type disk in the system. This includes solid +# state devices. +# Note: /dev/* are managed by kernel tmpdevfs while links in /dev/disk/by-path/ +# are managed by udev which updates them asynchronously so we should avoid using +# them while performing partition operations. +STOR_DEVS="" +wlog "Detected storage devices:" +for f in /dev/disk/by-path/*; do + dev=$(readlink -f $f) + exec_retry 2 0.5 "lsblk --nodeps --pairs $dev" | grep -q 'TYPE="disk"' + if [ $? -eq 0 ] + then + STOR_DEVS="$STOR_DEVS $dev" + wlog " ${f}->${dev}" + fi +done + +if [ -z "$STOR_DEVS" ] +then + report_pre_failure_with_msg "ERROR: No storage devices available." +fi + +# Lock all devices so that udev doesn't trigger a kernel partition table +# rescan that removes and recreates all /dev nodes for partitions on those +# devices. Since udev events are asynchronous this could lead to a case +# where /dev/ links for existing partitions are briefly missing. +# Missing /dev links leads to command execution failures. +STOR_DEV_FDS="$stdout" +for dev in $STOR_DEVS; do + exec {fd}>$dev || report_pre_failure_with_msg "ERROR: Error creating file descriptor for $dev." + flock -n "$fd" || report_pre_failure_with_msg "ERROR: Can't get a lock on fd $fd of device $dev." + STOR_DEV_FDS="$STOR_DEV_FDS $fd" +done + +# Log info about system state at beginning of partitioning operation +for dev in $STOR_DEVS; do + wlog "Initial partition table for $dev is:" + parted -s $dev unit mib print +done + # Ensure specified device is not a USB drive udevadm info --query=property --name=$rootfs_device |grep -q '^ID_BUS=usb' || \ udevadm info --query=property --name=$boot_device |grep -q '^ID_BUS=usb' + if [ $? -eq 0 ]; then # Touch this file to prevent Anaconda from dying an ungraceful death touch /tmp/part-include @@ -42,30 +91,48 @@ if [ $? -eq 0 ]; then fi # Deactivate existing volume groups to avoid Anaconda issues with pre-existing groups -vgs --noheadings -o vg_name | xargs --no-run-if-empty -n 1 vgchange -an +vgs=$(exec_no_fds "$STOR_DEV_FDS" "vgs --noheadings -o vg_name") +for vg in $vgs; do + wlog "Disabling $vg." + exec_no_fds "$STOR_DEV_FDS" "vgchange -an $vg" 5 0.5 + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Failed to disable $vg." +done # Remove the volume groups that have physical volumes on the root disk -for vg in $(vgs --noheadings -o vg_name); do - pvs --select "vg_name=$vg" --noheadings -o pv_name | grep -q "$(get_disk $rootfs_device)" +for vg in $(exec_no_fds "$STOR_DEV_FDS" "vgs --noheadings -o vg_name"); do + exec_no_fds "$STOR_DEV_FDS" "pvs --select \"vg_name=$vg\" --noheadings -o pv_name" | grep -q "$rootfs_device" if [ $? -ne 0 ]; then + wlog "Found $vg with no PV on rootfs, ignoring." continue fi - echo "Removing vg $vg" - lvremove --force $vg - pvs --select "vg_name=$vg" --noheadings -o pv_name | xargs --no-run-if-empty pvremove --force --force --yes - vgs --select "vg_name=$vg" --noheadings -o vg_name | xargs --no-run-if-empty vgremove --force + wlog "Removing LVs on $vg." + exec_no_fds "$STOR_DEV_FDS" "lvremove --force $vg" 5 0.5 || wlog "WARNING: Failed to remove lvs on $vg." + pvs=$(exec_no_fds "$STOR_DEV_FDS" "pvs --select \"vg_name=$vg\" --noheadings -o pv_name") + wlog "VG $vg has PVs: $(echo $pvs), removing them." + for pv in $pvs; do + wlog "Removing PV $pv." + exec_no_fds "$STOR_DEV_FDS" "pvremove --force --force --yes $pv" 5 0.5 + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Failed to remove PV." + done + # VG should no longer be present + vg_check=$(exec_no_fds "$STOR_DEV_FDS" "vgs --select \"vg_name=$vg\" --noheadings -o vg_name") + if [ -n "$vg_check" ]; then + wlog "WARNING: VG $vg is still present after removing PVs! Removing it by force." + exec_no_fds "$STOR_DEV_FDS" "vgremove --force $vg" 5 0.5 + [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Failed to remove VG." + fi done ONLYUSE_HDD="" part_type_guid_str="Partition GUID code" if [ "$(curl -sf http://pxecontroller:6385/v1/upgrade/$(hostname)/in_upgrade 2>/dev/null)" = "true" ]; then # In an upgrade, only wipe the disk with the rootfs and boot partition - echo "In upgrade, wiping only $rootfs_device" - WIPE_HDD="$(get_disk $rootfs_device)" - ONLYUSE_HDD="$(basename $(get_disk $rootfs_device))" - if [ "$(get_disk $rootfs_device)" != "$(get_disk $boot_device)" ]; then - WIPE_HDD="$WIPE_HDD,$(get_disk $boot_device)" - ONLYUSE_HDD="$ONLYUSE_HDD,$(basename $(get_disk $boot_device))" + wlog "In upgrade, wiping only $rootfs_device" + WIPE_HDD=$rootfs_device + ONLYUSE_HDD="$(basename $rootfs_device)" + if [ "$rootfs_device" != "$boot_device" ]; then + WIPE_HDD="$WIPE_HDD,$boot_device" + ONLYUSE_HDD="$ONLYUSE_HDD,$(basename $boot_device)" fi else # Make a list of all the hard drives that are to be wiped @@ -75,22 +142,15 @@ else # Check if we wipe OSDs if [ "$(curl -sf http://pxecontroller:6385/v1/ihosts/wipe_osds 2>/dev/null)" = "true" ]; then - echo "Wipe OSD data." + wlog "Wipe OSD data." WIPE_CEPH_OSDS="true" else - echo "Skip Ceph OSD data wipe." + wlog "Skip Ceph OSD data wipe." WIPE_CEPH_OSDS="false" fi - for f in /dev/disk/by-path/* + for dev in $STOR_DEVS do - dev=$(readlink -f $f) - lsblk --nodeps --pairs $dev | grep -q 'TYPE="disk"' - if [ $? -ne 0 ] - then - continue - fi - # Avoid wiping USB drives udevadm info --query=property --name=$dev |grep -q '^ID_BUS=usb' && continue @@ -101,10 +161,10 @@ else # Scanning the partitions looking for CEPH OSDs and # skipping any disk found with such partitions for part_number in "${part_numbers[@]}"; do - sgdisk_part_info=$(flock $dev sgdisk -i $part_number $dev) + sgdisk_part_info=$(sgdisk -i $part_number $dev) part_type_guid=$(echo "$sgdisk_part_info" | grep "$part_type_guid_str" | awk '{print $4;}') if [ "$part_type_guid" == $CEPH_OSD_GUID ]; then - echo "OSD found on $dev, skipping wipe" + wlog "OSD found on $dev, skipping wipe" wipe_dev="false" break fi @@ -124,14 +184,13 @@ else fi fi done - echo "Not in upgrade, wiping disks: $WIPE_HDD" + wlog "Not in upgrade, wiping disks: $WIPE_HDD" fi -ROOTFS_DISK=$(get_disk $rootfs_device) -ROOTFS_PART_PREFIX=$ROOTFS_DISK +ROOTFS_PART_PREFIX=$rootfs_device #check if disk is nvme -case $ROOTFS_DISK in +case $rootfs_device in *"nvme"*) ROOTFS_PART_PREFIX=${ROOTFS_PART_PREFIX}p ;; @@ -159,28 +218,27 @@ do part=${dev}p${part_number} ;; esac - if [ "$dev" == "$ROOTFS_DISK" ]; then - sgdisk_part_info=$(flock $dev sgdisk -i $part_number $dev) + if [ "$dev" == "$rootfs_device" ]; then + sgdisk_part_info=$(sgdisk -i $part_number $dev) part_type_guid=$(echo "$sgdisk_part_info" | grep "$part_type_guid_str" | awk '{print $4;}') - part_fstype=$(blkid -s TYPE -o value $part) + part_fstype=$(exec_retry 5 0.5 "blkid -s TYPE -o value $part") if [ "$part_type_guid" == $BACKUP_PART_GUID -a "${part_fstype}" == "ext4" ]; then - echo "Skipping wipe backup partition $part" + wlog "Skipping wipe backup partition $part" BACKUP_CREATED=1 continue fi fi - echo "Wiping partition $part" + wlog "Wiping partition $part" dd if=/dev/zero of=$part bs=512 count=34 dd if=/dev/zero of=$part bs=512 count=34 seek=$((`blockdev --getsz $part` - 34)) - parted -s $dev rm $part_number + exec_retry 5 0.5 "parted -s $dev rm $part_number" # LP 1876374: On some nvme systems udev doesn't correctly remove the # links to the deleted partitions from /dev/nvme* causing them to be # seen as non block devices. - sleep 0.3 # Wait for udev to settle - rm -f $part # Delete remaining /dev node leftover + exec_retry 5 0.3 "rm -f $part" # Delete remaining /dev node leftover done - if [ $BACKUP_CREATED -eq 0 -o "$dev" != "$ROOTFS_DISK" ]; then - echo "Creating disk label for $dev" + if [ $BACKUP_CREATED -eq 0 -o "$dev" != "$rootfs_device" ]; then + wlog "Creating disk label for $dev" parted -s $dev mktable gpt fi @@ -190,10 +248,10 @@ done # in an upgrade where we're not wiping all disks. # If we ever create other volume groups from kickstart in the future, # include them in this search as well. -partitions=$(pvs --select 'vg_name=cgts-vg' -o pv_name --noheading | grep -v '\[unknown\]') +partitions=$(exec_no_fds "$STOR_DEV_FDS" "pvs --select 'vg_name=cgts-vg' -o pv_name --noheading" | grep -v '\[unknown\]') for p in $partitions do - echo "Pre-wiping $p from kickstart (cgts-vg present)" + wlog "Pre-wiping $p from kickstart (cgts-vg present)" dd if=/dev/zero of=$p bs=512 count=34 dd if=/dev/zero of=$p bs=512 count=34 seek=$((`blockdev --getsz $p` - 34)) done diff --git a/bsp-files/kickstarts/pre_disk_setup_tail.cfg b/bsp-files/kickstarts/pre_disk_setup_tail.cfg new file mode 100644 index 00000000..f11cdd37 --- /dev/null +++ b/bsp-files/kickstarts/pre_disk_setup_tail.cfg @@ -0,0 +1,21 @@ + +# Log info about system state at end of partitioning operation. +for dev in $STOR_DEVS; do + wlog "Partition table at end of script for $dev is:" + parted -s $dev unit mib print +done + +# Close all FDs and wait for udev to reshuffle all partitions. +wlog "Releasing storage device locks and FDs." +for fd in $STOR_DEV_FDS +do + flock -u "$fd" + exec {fd}>&- +done +sleep 2 +udevadm settle || report_pre_failure_with_msg "ERROR: udevadm settle failed!" + +# Rescan LVM cache to avoid warnings for VGs that were recreated. +pvscan --cache + +%end diff --git a/bsp-files/kickstarts/pre_disk_storage.cfg b/bsp-files/kickstarts/pre_disk_storage.cfg index d3bf93d2..6e89db1c 100755 --- a/bsp-files/kickstarts/pre_disk_storage.cfg +++ b/bsp-files/kickstarts/pre_disk_storage.cfg @@ -1,5 +1,5 @@ -sz=$(blockdev --getsize64 $(get_disk $rootfs_device)) +sz=$(blockdev --getsize64 $rootfs_device) if [ $sz -le $((90*$gb)) ] ; then LOG_VOL_SIZE=4000 SCRATCH_VOL_SIZE=4000 @@ -23,23 +23,21 @@ EOF if [ -d /sys/firmware/efi ] ; then cat<>/tmp/part-include -part /boot/efi --fstype=efi --size=300 --ondrive=$(get_disk $boot_device) +part /boot/efi --fstype=efi --size=300 --ondrive=$boot_device EOF else cat<>/tmp/part-include -part biosboot --asprimary --fstype=biosboot --size=1 --ondrive=$(get_disk $boot_device) +part biosboot --asprimary --fstype=biosboot --size=1 --ondrive=$boot_device EOF fi cat<>/tmp/part-include -part /boot --fstype=ext4 --asprimary --size=500 --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS" -part pv.253004 --grow --asprimary --size=500 --ondrive=$(get_disk $rootfs_device) +part /boot --fstype=ext4 --asprimary --size=500 --ondrive=$rootfs_device --fsoptions="$ROOTFS_OPTIONS" +part pv.253004 --grow --asprimary --size=500 --ondrive=$rootfs_device volgroup cgts-vg --pesize=32768 pv.253004 logvol /var/log --fstype=ext4 --vgname=cgts-vg --size=$LOG_VOL_SIZE --name=log-lv logvol /scratch --fstype=ext4 --vgname=cgts-vg --size=$SCRATCH_VOL_SIZE --name=scratch-lv -part / --fstype=ext4 --asprimary --size=$ROOTFS_SIZE --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS" +part / --fstype=ext4 --asprimary --size=$ROOTFS_SIZE --ondrive=$rootfs_device --fsoptions="$ROOTFS_OPTIONS" EOF -%end - diff --git a/bsp-files/kickstarts/pre_disk_worker.cfg b/bsp-files/kickstarts/pre_disk_worker.cfg index 5a408f5a..d051b530 100755 --- a/bsp-files/kickstarts/pre_disk_worker.cfg +++ b/bsp-files/kickstarts/pre_disk_worker.cfg @@ -15,7 +15,7 @@ BOOT_VOL_SIZE=500 ## 69648/1024=68.01. CGTS_PV_SIZE=69*1024=70656. CGTS_PV_SIZE=70656 -sz=$(blockdev --getsize64 $(get_disk $rootfs_device)) +sz=$(blockdev --getsize64 $rootfs_device) if [ $sz -le $((80*$gb)) ] ; then ## Less than 80GB use a 10GB root partition ROOTFS_SIZE=10000 @@ -37,23 +37,21 @@ EOF if [ -d /sys/firmware/efi ] ; then cat<>/tmp/part-include -part /boot/efi --fstype=efi --size=300 --ondrive=$(get_disk $boot_device) +part /boot/efi --fstype=efi --size=300 --ondrive=$boot_device EOF else cat<>/tmp/part-include -part biosboot --asprimary --fstype=biosboot --size=1 --ondrive=$(get_disk $boot_device) +part biosboot --asprimary --fstype=biosboot --size=1 --ondrive=$boot_device EOF fi cat<>/tmp/part-include -part /boot --fstype=ext4 --asprimary --size=$BOOT_VOL_SIZE --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS" -part pv.253004 --asprimary --size=$CGTS_PV_SIZE --ondrive=$(get_disk $rootfs_device) +part /boot --fstype=ext4 --asprimary --size=$BOOT_VOL_SIZE --ondrive=$rootfs_device --fsoptions="$ROOTFS_OPTIONS" +part pv.253004 --asprimary --size=$CGTS_PV_SIZE --ondrive=$rootfs_device volgroup cgts-vg --pesize=32768 pv.253004 logvol /var/log --fstype=ext4 --vgname=cgts-vg --size=$LOG_VOL_SIZE --name=log-lv logvol /scratch --fstype=ext4 --vgname=cgts-vg --size=$SCRATCH_VOL_SIZE --name=scratch-lv -part / --fstype=ext4 --asprimary --size=$ROOTFS_SIZE --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS" +part / --fstype=ext4 --asprimary --size=$ROOTFS_SIZE --ondrive=$rootfs_device --fsoptions="$ROOTFS_OPTIONS" EOF -%end -