Merge "Harden kickstarts as udev behavior can lead to random failures"

This commit is contained in:
Zuul 2020-07-28 21:20:59 +00:00 committed by Gerrit Code Review
commit bb739f8311
9 changed files with 284 additions and 90 deletions

View File

@ -42,6 +42,7 @@ write_config_file("controller",
"pre_pkglist.cfg",
"pre_disk_setup_common.cfg",
"pre_disk_controller.cfg",
"pre_disk_setup_tail.cfg",
"post_platform_conf_controller.cfg",
"post_common.cfg",
"post_kernel_controller.cfg",
@ -54,6 +55,7 @@ write_config_file("controller-worker",
"pre_pkglist.cfg",
"pre_disk_setup_common.cfg",
"pre_disk_aio.cfg",
"pre_disk_setup_tail.cfg",
"post_platform_conf_aio.cfg",
"post_common.cfg",
"post_kernel_aio_and_worker.cfg",
@ -67,6 +69,7 @@ write_config_file("controller-worker-lowlatency",
"pre_pkglist_lowlatency.cfg",
"pre_disk_setup_common.cfg",
"pre_disk_aio.cfg",
"pre_disk_setup_tail.cfg",
"post_platform_conf_aio_lowlatency.cfg",
"post_common.cfg",
"post_kernel_aio_and_worker.cfg",
@ -84,6 +87,7 @@ write_config_file("controller",
"pre_pkglist.cfg",
"pre_disk_setup_common.cfg",
"pre_disk_controller.cfg",
"pre_disk_setup_tail.cfg",
"post_platform_conf_controller.cfg",
"post_common.cfg",
"post_kernel_controller.cfg",
@ -95,6 +99,7 @@ write_config_file("controller-worker",
"pre_pkglist.cfg",
"pre_disk_setup_common.cfg",
"pre_disk_aio.cfg",
"pre_disk_setup_tail.cfg",
"post_platform_conf_aio.cfg",
"post_common.cfg",
"post_kernel_aio_and_worker.cfg",
@ -107,6 +112,7 @@ write_config_file("controller-worker-lowlatency",
"pre_pkglist_lowlatency.cfg",
"pre_disk_setup_common.cfg",
"pre_disk_aio.cfg",
"pre_disk_setup_tail.cfg",
"post_platform_conf_aio_lowlatency.cfg",
"post_common.cfg",
"post_kernel_aio_and_worker.cfg",
@ -123,6 +129,7 @@ write_config_file("controller",
"pre_pkglist.cfg",
"pre_disk_setup_common.cfg",
"pre_disk_controller.cfg",
"pre_disk_setup_tail.cfg",
"post_platform_conf_controller.cfg",
"post_common.cfg",
"post_kernel_controller.cfg",
@ -136,6 +143,7 @@ write_config_file("controller-worker",
"pre_pkglist.cfg",
"pre_disk_setup_common.cfg",
"pre_disk_aio.cfg",
"pre_disk_setup_tail.cfg",
"post_platform_conf_aio.cfg",
"post_common.cfg",
"post_kernel_aio_and_worker.cfg",
@ -150,6 +158,7 @@ write_config_file("controller-worker-lowlatency",
"pre_pkglist_lowlatency.cfg",
"pre_disk_setup_common.cfg",
"pre_disk_aio.cfg",
"pre_disk_setup_tail.cfg",
"post_platform_conf_aio_lowlatency.cfg",
"post_common.cfg",
"post_kernel_aio_and_worker.cfg",
@ -164,6 +173,7 @@ write_config_file("worker",
"pre_pkglist.cfg",
"pre_disk_setup_common.cfg",
"pre_disk_worker.cfg",
"pre_disk_setup_tail.cfg",
"post_platform_conf_worker.cfg",
"post_common.cfg",
"post_kernel_aio_and_worker.cfg",
@ -176,6 +186,7 @@ write_config_file("worker-lowlatency",
"pre_pkglist_lowlatency.cfg",
"pre_disk_setup_common.cfg",
"pre_disk_worker.cfg",
"pre_disk_setup_tail.cfg",
"post_platform_conf_worker_lowlatency.cfg",
"post_common.cfg",
"post_kernel_aio_and_worker.cfg",
@ -188,6 +199,7 @@ write_config_file("storage",
"pre_pkglist.cfg",
"pre_disk_setup_common.cfg",
"pre_disk_storage.cfg",
"pre_disk_setup_tail.cfg",
"post_platform_conf_storage.cfg",
"post_common.cfg",
"post_kernel_storage.cfg",
@ -208,6 +220,7 @@ foreach $server (keys %boot_servers)
"pre_pkglist.cfg",
"pre_disk_setup_common.cfg",
"pre_disk_controller.cfg",
"pre_disk_setup_tail.cfg",
"post_platform_conf_controller.cfg",
"post_common.cfg",
"post_kernel_controller.cfg",
@ -219,6 +232,7 @@ foreach $server (keys %boot_servers)
"pre_pkglist.cfg",
"pre_disk_setup_common.cfg",
"pre_disk_aio.cfg",
"pre_disk_setup_tail.cfg",
"post_platform_conf_aio.cfg",
"post_common.cfg",
"post_kernel_aio_and_worker.cfg",
@ -231,6 +245,7 @@ foreach $server (keys %boot_servers)
"pre_pkglist_lowlatency.cfg",
"pre_disk_setup_common.cfg",
"pre_disk_aio.cfg",
"pre_disk_setup_tail.cfg",
"post_platform_conf_aio_lowlatency.cfg",
"post_common.cfg",
"post_kernel_aio_and_worker.cfg",

View File

@ -9,6 +9,17 @@ cat <<END_FUNCTIONS >/tmp/ks-functions.sh
# SPDX-License-Identifier: Apache-2.0
#
# Get the FD used by subshells to log output
if [ -z "\$stdout" ]; then
exec {stdout}>&1
fi
function wlog()
{
local dt="\$(date "+%Y-%m-%d %H:%M:%S.%3N")"
echo "\$dt - \$1" >&\${stdout}
}
function get_by_path()
{
local disk=\$(cd /dev ; readlink -f \$1)
@ -73,7 +84,7 @@ function get_http_port()
echo \$(cat /proc/cmdline |xargs -n1 echo |grep '^inst.repo=' | sed -r 's#^[^/]*://[^/]*:([0-9]*)/.*#\1#')
}
get_disk_dev()
function get_disk_dev()
{
local disk
# Detect HDD
@ -97,5 +108,52 @@ get_disk_dev()
done
}
function exec_no_fds()
{
# Close open FDs when executing commands that complain about leaked FDs.
local fds=\$1
local cmd=\$2
local retries=\$3
local interval=\$4
local ret_code=0
local ret_stdout=""
for fd in \$fds
do
local cmd="\$cmd \$fd>&-"
done
if [ -z "\$retries" ]; then
#wlog "Running command: '\$cmd'."
eval "\$cmd"
else
ret_stdout=\$(exec_retry "\$retries" "\$interval" "\$cmd")
ret_code=\$?
echo "\${ret_stdout}"
return \${ret_code}
fi
}
function exec_retry()
{
local retries=\$1
local interval=\$2
local cmd=\$3
let -i retry_count=1
local ret_code=0
local ret_stdout=""
cmd="\$cmd" # 2>&\$stdout"
while [ \$retry_count -le \$retries ]; do
#wlog "Running command: '\$cmd'."
ret_stdout=\$(eval \$cmd)
ret_code=\$?
[ \$ret_code -eq 0 ] && break
wlog "Error running command '\${cmd}'. Try \${retry_count} of \${retries} at \${interval}s."
wlog "ret_code: \${ret_code}, stdout: '\${ret_stdout}'."
sleep \$interval
let retry_count++
done
echo "\${ret_stdout}"
return \${ret_code}
}
END_FUNCTIONS

View File

@ -1,9 +1,12 @@
%post --nochroot
%post --nochroot --erroronfail
# Source common functions
. /tmp/ks-functions.sh
# Change GUID of backup partition
change_guid=/tmp/backup-guid-change.sh
if [ -f "$change_guid" ]; then
sh $change_guid
sh $change_guid || report_post_failure_with_logfile "ERROR: Failed to update platform backup GUID"
fi
%end

View File

@ -85,7 +85,7 @@
## cgts-vg PV (142G), cgts-vg PV (336G)
##
sz=$(blockdev --getsize64 $(get_disk $ROOTFS_DISK))
sz=$(blockdev --getsize64 $rootfs_device)
if [ $sz -le $((240*$gb)) ] ; then
# Round CGCS_PV_SIZE to the closest upper value that can be divided by 1024.
# 190480/1024=186.02. CGCS_PV_SIZE=187*1024=191488. Using a disk with a
@ -119,12 +119,16 @@ if [ -d /sys/firmware/efi ] ; then
END_POINT=$(($START_POINT + $PLATFORM_BACKUP_SIZE))
BACKUP_END_POINT=$END_POINT
if [ $BACKUP_CREATED -eq 0 ] ; then
parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB
wlog "Creating platform backup partition of ${PLATFORM_BACKUP_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB."
exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!"
fi
START_POINT=$END_POINT
END_POINT=$(($START_POINT + $EFI_SIZE))
parted -s $ROOTFS_DISK mkpart primary fat32 ${START_POINT}MiB ${END_POINT}MiB
wlog "Creating EFI partition of ${EFI_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB."
exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary fat32 ${START_POINT}MiB ${END_POINT}MiB"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!"
cat<<EOF>>/tmp/part-include
part /boot/efi --fstype=efi --onpart=${ROOTFS_PART_PREFIX}2
@ -132,13 +136,17 @@ EOF
else
BACKUP_PART=${ROOTFS_PART_PREFIX}2
BACKUP_PART_NO=2
parted -s $ROOTFS_DISK mkpart primary 1MiB 2MiB
wlog "Creating 1MB BIOS GRUB partition from 1MiB to 2MiB."
exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary 1MiB 2MiB"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!"
START_POINT=2
END_POINT=$(($START_POINT + $PLATFORM_BACKUP_SIZE))
BACKUP_END_POINT=$END_POINT
if [ $BACKUP_CREATED -eq 0 ] ; then
parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB
wlog "Creating platform backup partition of ${PLATFORM_BACKUP_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB."
exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!"
fi
cat<<EOF>>/tmp/part-include
part biosboot --asprimary --fstype=biosboot --onpart=${ROOTFS_PART_PREFIX}1
@ -147,31 +155,47 @@ fi
START_POINT=$END_POINT
END_POINT=$(($START_POINT + $BOOT_SIZE))
parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB
wlog "Creating boot partition of ${BOOT_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB."
exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!"
START_POINT=$END_POINT
END_POINT=$(($START_POINT + $ROOTFS_SIZE))
parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB
wlog "Creating rootfs partition of ${ROOTFS_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB."
exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!"
START_POINT=$END_POINT
END_POINT=$(($START_POINT + $CGCS_PV_SIZE))
parted -s $ROOTFS_DISK mkpart extended ${START_POINT}MiB ${END_POINT}MiB
wlog "Creating cgcs-vg partition of ${CGCS_PV_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB."
exec_retry 5 0.5 "parted -s $rootfs_device mkpart extended ${START_POINT}MiB ${END_POINT}MiB"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!"
if [ $BACKUP_CREATED -ne 0 ] ; then
BACKUP_CURRENT_SIZE=$(parted -s $BACKUP_PART unit MiB print | grep $BACKUP_PART | awk '{print $3}' | sed 's/[^C0-9]*//g')
if [ $BACKUP_CURRENT_SIZE -lt $PLATFORM_BACKUP_SIZE ] ; then
wlog "Backup partition size is ${BACKUP_CURRENT_SIZE}MiB, resizing to ${PLATFORM_BACKUP_SIZE}MiB."
# parted will throw an error about overlapping with the next partition if we don't do this
BACKUP_END_POINT=$(($BACKUP_END_POINT - 1)).9
parted -s $ROOTFS_DISK resizepart $BACKUP_PART_NO ${BACKUP_END_POINT}MiB
e2fsck -p -f $BACKUP_PART
resize2fs $BACKUP_PART
exec_retry 5 0.5 "parted -s $rootfs_device resizepart $BACKUP_PART_NO ${BACKUP_END_POINT}MiB"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: resize of platform backup partition failed!"
exec_retry 2 0.1 "e2fsck -p -f $BACKUP_PART"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: e2fsck failed on platform backup partition!"
exec_retry 2 1 "resize2fs $BACKUP_PART"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Filed to resize ext4 fs of platform backup partition!"
elif [ $BACKUP_CURRENT_SIZE -gt $PLATFORM_BACKUP_SIZE ] ; then
report_pre_failure_with_msg "ERROR: Backup partition is ${BACKUP_CURRENT_SIZE}MiB expected size is less or equal to ${PLATFORM_BACKUP_SIZE}MiB."
else
wlog "Backup partition size is correct: ${PLATFORM_BACKUP_SIZE}MiB."
fi
cat<<EOF>>/tmp/part-include
part /opt/platform-backup --fstype=ext4 --asprimary --noformat --onpart=$BACKUP_PART --fsoptions="$ROOTFS_OPTIONS"
EOF
else
cat<<EOF>/tmp/backup-guid-change.sh
flock $ROOTFS_DISK sgdisk --change-name=${BACKUP_PART_NO}:"${BACKUP_PART_LABEL}" --typecode=${BACKUP_PART_NO}:"${BACKUP_PART_GUID}" $ROOTFS_DISK
echo "\$(date '+%Y-%m-%d %H:%M:%S.%3N') - Updating backup partition GUID."
flock $rootfs_device sgdisk --change-name=${BACKUP_PART_NO}:"${BACKUP_PART_LABEL}" --typecode=${BACKUP_PART_NO}:"${BACKUP_PART_GUID}" $rootfs_device || exit 1
EOF
cat<<EOF>>/tmp/part-include
@ -188,5 +212,3 @@ logvol /scratch --fstype=ext4 --vgname=cgts-vg --size=$SCRATCH_VOL_SIZE --name=s
part / --fstype=ext4 --asprimary --onpart=${ROOTFS_PART_PREFIX}4 --fsoptions="$ROOTFS_OPTIONS"
EOF
%end

View File

@ -23,12 +23,16 @@ if [ -d /sys/firmware/efi ] ; then
END_POINT=$(($START_POINT + $PLATFORM_BACKUP_SIZE))
BACKUP_END_POINT=$END_POINT
if [ $BACKUP_CREATED -eq 0 ] ; then
parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB
wlog "Creating platform backup partition of ${PLATFORM_BACKUP_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB."
exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!"
fi
START_POINT=$END_POINT
END_POINT=$(($START_POINT + $EFI_SIZE))
parted -s $ROOTFS_DISK mkpart primary fat32 ${START_POINT}MiB ${END_POINT}MiB
wlog "Creating EFI partition of ${EFI_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB."
exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary fat32 ${START_POINT}MiB ${END_POINT}MiB"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!"
cat<<EOF>>/tmp/part-include
part /boot/efi --fstype=efi --onpart=${ROOTFS_PART_PREFIX}2
@ -36,13 +40,17 @@ EOF
else
BACKUP_PART=${ROOTFS_PART_PREFIX}2
BACKUP_PART_NO=2
parted -s $ROOTFS_DISK mkpart primary 1MiB 2MiB
wlog "Creating 1MB BIOS GRUB partition from 1MiB to 2MiB."
exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary 1MiB 2MiB"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!"
START_POINT=2
END_POINT=$(($START_POINT + $PLATFORM_BACKUP_SIZE))
BACKUP_END_POINT=$END_POINT
if [ $BACKUP_CREATED -eq 0 ] ; then
parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB
wlog "Creating platform backup partition of ${PLATFORM_BACKUP_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB."
exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!"
fi
cat<<EOF>>/tmp/part-include
part biosboot --asprimary --fstype=biosboot --onpart=${ROOTFS_PART_PREFIX}1
@ -51,30 +59,45 @@ fi
START_POINT=$END_POINT
END_POINT=$(($START_POINT + $BOOT_SIZE))
parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB
wlog "Creating boot partition of ${BOOT_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB."
exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!"
START_POINT=$END_POINT
END_POINT=$(($START_POINT + $ROOTFS_SIZE))
parted -s $ROOTFS_DISK mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB
wlog "Creating rootfs partition of ${ROOTFS_SIZE}MiB from ${START_POINT}MiB to ${END_POINT}MiB."
exec_retry 5 0.5 "parted -s $rootfs_device mkpart primary ext4 ${START_POINT}MiB ${END_POINT}MiB"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!"
START_POINT=$END_POINT
parted -s $ROOTFS_DISK mkpart extended ${START_POINT}MiB 100%
wlog "Creating cgcs-vg partition of ${CGCS_PV_SIZE}MiB from ${START_POINT}MiB to 100%."
exec_retry 5 0.5 "parted -s $rootfs_device mkpart extended ${START_POINT}MiB 100%"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Partition creation failed!"
if [ $BACKUP_CREATED -ne 0 ] ; then
BACKUP_CURRENT_SIZE=$(parted -s $BACKUP_PART unit MiB print | grep $BACKUP_PART | awk '{print $3}' | sed 's/[^C0-9]*//g')
if [ $BACKUP_CURRENT_SIZE -lt $PLATFORM_BACKUP_SIZE ] ; then
wlog "Backup partition size is ${BACKUP_CURRENT_SIZE}MiB, resizing to ${PLATFORM_BACKUP_SIZE}MiB."
# parted will throw an error about overlapping with the next partition if we don't do this
BACKUP_END_POINT=$(($BACKUP_END_POINT - 1)).9
parted -s $ROOTFS_DISK resizepart $BACKUP_PART_NO ${BACKUP_END_POINT}MiB
e2fsck -p -f $BACKUP_PART
resize2fs $BACKUP_PART
exec_retry 5 0.5 "parted -s $rootfs_device resizepart $BACKUP_PART_NO ${BACKUP_END_POINT}MiB"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: resize of platform backup partition failed!"
exec_retry 2 0.1 "e2fsck -p -f $BACKUP_PART"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: e2fsck failed on platform backup partition!"
exec_retry 2 1 "resize2fs $BACKUP_PART"
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Filed to resize ext4 fs of platform backup partition!"
elif [ $BACKUP_CURRENT_SIZE -gt $PLATFORM_BACKUP_SIZE ] ; then
report_pre_failure_with_msg "ERROR: Backup partition is ${BACKUP_CURRENT_SIZE}MiB expected size is less or equal to ${PLATFORM_BACKUP_SIZE}MiB."
else
wlog "Backup partition size is correct: ${PLATFORM_BACKUP_SIZE}MiB."
fi
cat<<EOF>>/tmp/part-include
part /opt/platform-backup --fstype=ext4 --asprimary --noformat --onpart=$BACKUP_PART --fsoptions="$ROOTFS_OPTIONS"
EOF
else
cat<<EOF>/tmp/backup-guid-change.sh
flock $ROOTFS_DISK sgdisk --change-name=${BACKUP_PART_NO}:"${BACKUP_PART_LABEL}" --typecode=${BACKUP_PART_NO}:"${BACKUP_PART_GUID}" $ROOTFS_DISK
echo "\$(date '+%Y-%m-%d %H:%M:%S.%3N') - Updating backup partition GUID."
flock $rootfs_device sgdisk --change-name=${BACKUP_PART_NO}:"${BACKUP_PART_LABEL}" --typecode=${BACKUP_PART_NO}:"${BACKUP_PART_GUID}" $rootfs_device || exit 1
EOF
cat<<EOF>>/tmp/part-include
@ -91,5 +114,3 @@ logvol /scratch --fstype=ext4 --vgname=cgts-vg --size=$SCRATCH_VOL_SIZE --name=s
part / --fstype=ext4 --asprimary --onpart=${ROOTFS_PART_PREFIX}4 --fsoptions="$ROOTFS_OPTIONS"
EOF
%end

View File

@ -3,6 +3,9 @@
# Source common functions
. /tmp/ks-functions.sh
wlog "ISO_DEV='$ISO_DEV'."
wlog "USB_DEV='$USB_DEV'."
# This is a really fancy way of finding the first usable disk for the
# install and not stomping on the USB device if it comes up first
@ -17,13 +20,18 @@ if [ -z "$rootfs_device" ]; then
rootfs_device=$(get_disk_dev)
fi
# Convert to by-path
# Get root and boot devices
orig_rootfs_device=$rootfs_device
rootfs_device=$(get_by_path $rootfs_device)
by_path_rootfs_device=$(get_by_path $rootfs_device)
rootfs_device=$(get_disk $by_path_rootfs_device)
wlog "Found rootfs $orig_rootfs_device on: $by_path_rootfs_device->$rootfs_device."
orig_boot_device=$boot_device
boot_device=$(get_by_path $boot_device)
by_path_boot_device=$(get_by_path $boot_device)
boot_device=$(get_disk $by_path_boot_device)
wlog "Found boot $orig_boot_device on: $by_path_boot_device->$boot_device."
# Check if boot and rootfs devices are valid
if [ ! -e "$rootfs_device" -o ! -e "$boot_device" ] ; then
# Touch this file to prevent Anaconda from dying an ungraceful death
touch /tmp/part-include
@ -31,9 +39,50 @@ if [ ! -e "$rootfs_device" -o ! -e "$boot_device" ] ; then
report_pre_failure_with_msg "ERROR: Specified installation ($orig_rootfs_device) or boot ($orig_boot_device) device is invalid."
fi
# Get all block devices of type disk in the system. This includes solid
# state devices.
# Note: /dev/* are managed by kernel tmpdevfs while links in /dev/disk/by-path/
# are managed by udev which updates them asynchronously so we should avoid using
# them while performing partition operations.
STOR_DEVS=""
wlog "Detected storage devices:"
for f in /dev/disk/by-path/*; do
dev=$(readlink -f $f)
exec_retry 2 0.5 "lsblk --nodeps --pairs $dev" | grep -q 'TYPE="disk"'
if [ $? -eq 0 ]
then
STOR_DEVS="$STOR_DEVS $dev"
wlog " ${f}->${dev}"
fi
done
if [ -z "$STOR_DEVS" ]
then
report_pre_failure_with_msg "ERROR: No storage devices available."
fi
# Lock all devices so that udev doesn't trigger a kernel partition table
# rescan that removes and recreates all /dev nodes for partitions on those
# devices. Since udev events are asynchronous this could lead to a case
# where /dev/ links for existing partitions are briefly missing.
# Missing /dev links leads to command execution failures.
STOR_DEV_FDS="$stdout"
for dev in $STOR_DEVS; do
exec {fd}>$dev || report_pre_failure_with_msg "ERROR: Error creating file descriptor for $dev."
flock -n "$fd" || report_pre_failure_with_msg "ERROR: Can't get a lock on fd $fd of device $dev."
STOR_DEV_FDS="$STOR_DEV_FDS $fd"
done
# Log info about system state at beginning of partitioning operation
for dev in $STOR_DEVS; do
wlog "Initial partition table for $dev is:"
parted -s $dev unit mib print
done
# Ensure specified device is not a USB drive
udevadm info --query=property --name=$rootfs_device |grep -q '^ID_BUS=usb' || \
udevadm info --query=property --name=$boot_device |grep -q '^ID_BUS=usb'
if [ $? -eq 0 ]; then
# Touch this file to prevent Anaconda from dying an ungraceful death
touch /tmp/part-include
@ -42,30 +91,48 @@ if [ $? -eq 0 ]; then
fi
# Deactivate existing volume groups to avoid Anaconda issues with pre-existing groups
vgs --noheadings -o vg_name | xargs --no-run-if-empty -n 1 vgchange -an
vgs=$(exec_no_fds "$STOR_DEV_FDS" "vgs --noheadings -o vg_name")
for vg in $vgs; do
wlog "Disabling $vg."
exec_no_fds "$STOR_DEV_FDS" "vgchange -an $vg" 5 0.5
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Failed to disable $vg."
done
# Remove the volume groups that have physical volumes on the root disk
for vg in $(vgs --noheadings -o vg_name); do
pvs --select "vg_name=$vg" --noheadings -o pv_name | grep -q "$(get_disk $rootfs_device)"
for vg in $(exec_no_fds "$STOR_DEV_FDS" "vgs --noheadings -o vg_name"); do
exec_no_fds "$STOR_DEV_FDS" "pvs --select \"vg_name=$vg\" --noheadings -o pv_name" | grep -q "$rootfs_device"
if [ $? -ne 0 ]; then
wlog "Found $vg with no PV on rootfs, ignoring."
continue
fi
echo "Removing vg $vg"
lvremove --force $vg
pvs --select "vg_name=$vg" --noheadings -o pv_name | xargs --no-run-if-empty pvremove --force --force --yes
vgs --select "vg_name=$vg" --noheadings -o vg_name | xargs --no-run-if-empty vgremove --force
wlog "Removing LVs on $vg."
exec_no_fds "$STOR_DEV_FDS" "lvremove --force $vg" 5 0.5 || wlog "WARNING: Failed to remove lvs on $vg."
pvs=$(exec_no_fds "$STOR_DEV_FDS" "pvs --select \"vg_name=$vg\" --noheadings -o pv_name")
wlog "VG $vg has PVs: $(echo $pvs), removing them."
for pv in $pvs; do
wlog "Removing PV $pv."
exec_no_fds "$STOR_DEV_FDS" "pvremove --force --force --yes $pv" 5 0.5
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Failed to remove PV."
done
# VG should no longer be present
vg_check=$(exec_no_fds "$STOR_DEV_FDS" "vgs --select \"vg_name=$vg\" --noheadings -o vg_name")
if [ -n "$vg_check" ]; then
wlog "WARNING: VG $vg is still present after removing PVs! Removing it by force."
exec_no_fds "$STOR_DEV_FDS" "vgremove --force $vg" 5 0.5
[ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Failed to remove VG."
fi
done
ONLYUSE_HDD=""
part_type_guid_str="Partition GUID code"
if [ "$(curl -sf http://pxecontroller:6385/v1/upgrade/$(hostname)/in_upgrade 2>/dev/null)" = "true" ]; then
# In an upgrade, only wipe the disk with the rootfs and boot partition
echo "In upgrade, wiping only $rootfs_device"
WIPE_HDD="$(get_disk $rootfs_device)"
ONLYUSE_HDD="$(basename $(get_disk $rootfs_device))"
if [ "$(get_disk $rootfs_device)" != "$(get_disk $boot_device)" ]; then
WIPE_HDD="$WIPE_HDD,$(get_disk $boot_device)"
ONLYUSE_HDD="$ONLYUSE_HDD,$(basename $(get_disk $boot_device))"
wlog "In upgrade, wiping only $rootfs_device"
WIPE_HDD=$rootfs_device
ONLYUSE_HDD="$(basename $rootfs_device)"
if [ "$rootfs_device" != "$boot_device" ]; then
WIPE_HDD="$WIPE_HDD,$boot_device"
ONLYUSE_HDD="$ONLYUSE_HDD,$(basename $boot_device)"
fi
else
# Make a list of all the hard drives that are to be wiped
@ -75,22 +142,15 @@ else
# Check if we wipe OSDs
if [ "$(curl -sf http://pxecontroller:6385/v1/ihosts/wipe_osds 2>/dev/null)" = "true" ]; then
echo "Wipe OSD data."
wlog "Wipe OSD data."
WIPE_CEPH_OSDS="true"
else
echo "Skip Ceph OSD data wipe."
wlog "Skip Ceph OSD data wipe."
WIPE_CEPH_OSDS="false"
fi
for f in /dev/disk/by-path/*
for dev in $STOR_DEVS
do
dev=$(readlink -f $f)
lsblk --nodeps --pairs $dev | grep -q 'TYPE="disk"'
if [ $? -ne 0 ]
then
continue
fi
# Avoid wiping USB drives
udevadm info --query=property --name=$dev |grep -q '^ID_BUS=usb' && continue
@ -101,10 +161,10 @@ else
# Scanning the partitions looking for CEPH OSDs and
# skipping any disk found with such partitions
for part_number in "${part_numbers[@]}"; do
sgdisk_part_info=$(flock $dev sgdisk -i $part_number $dev)
sgdisk_part_info=$(sgdisk -i $part_number $dev)
part_type_guid=$(echo "$sgdisk_part_info" | grep "$part_type_guid_str" | awk '{print $4;}')
if [ "$part_type_guid" == $CEPH_OSD_GUID ]; then
echo "OSD found on $dev, skipping wipe"
wlog "OSD found on $dev, skipping wipe"
wipe_dev="false"
break
fi
@ -124,14 +184,13 @@ else
fi
fi
done
echo "Not in upgrade, wiping disks: $WIPE_HDD"
wlog "Not in upgrade, wiping disks: $WIPE_HDD"
fi
ROOTFS_DISK=$(get_disk $rootfs_device)
ROOTFS_PART_PREFIX=$ROOTFS_DISK
ROOTFS_PART_PREFIX=$rootfs_device
#check if disk is nvme
case $ROOTFS_DISK in
case $rootfs_device in
*"nvme"*)
ROOTFS_PART_PREFIX=${ROOTFS_PART_PREFIX}p
;;
@ -159,28 +218,27 @@ do
part=${dev}p${part_number}
;;
esac
if [ "$dev" == "$ROOTFS_DISK" ]; then
sgdisk_part_info=$(flock $dev sgdisk -i $part_number $dev)
if [ "$dev" == "$rootfs_device" ]; then
sgdisk_part_info=$(sgdisk -i $part_number $dev)
part_type_guid=$(echo "$sgdisk_part_info" | grep "$part_type_guid_str" | awk '{print $4;}')
part_fstype=$(blkid -s TYPE -o value $part)
part_fstype=$(exec_retry 5 0.5 "blkid -s TYPE -o value $part")
if [ "$part_type_guid" == $BACKUP_PART_GUID -a "${part_fstype}" == "ext4" ]; then
echo "Skipping wipe backup partition $part"
wlog "Skipping wipe backup partition $part"
BACKUP_CREATED=1
continue
fi
fi
echo "Wiping partition $part"
wlog "Wiping partition $part"
dd if=/dev/zero of=$part bs=512 count=34
dd if=/dev/zero of=$part bs=512 count=34 seek=$((`blockdev --getsz $part` - 34))
parted -s $dev rm $part_number
exec_retry 5 0.5 "parted -s $dev rm $part_number"
# LP 1876374: On some nvme systems udev doesn't correctly remove the
# links to the deleted partitions from /dev/nvme* causing them to be
# seen as non block devices.
sleep 0.3 # Wait for udev to settle
rm -f $part # Delete remaining /dev node leftover
exec_retry 5 0.3 "rm -f $part" # Delete remaining /dev node leftover
done
if [ $BACKUP_CREATED -eq 0 -o "$dev" != "$ROOTFS_DISK" ]; then
echo "Creating disk label for $dev"
if [ $BACKUP_CREATED -eq 0 -o "$dev" != "$rootfs_device" ]; then
wlog "Creating disk label for $dev"
parted -s $dev mktable gpt
fi
@ -190,10 +248,10 @@ done
# in an upgrade where we're not wiping all disks.
# If we ever create other volume groups from kickstart in the future,
# include them in this search as well.
partitions=$(pvs --select 'vg_name=cgts-vg' -o pv_name --noheading | grep -v '\[unknown\]')
partitions=$(exec_no_fds "$STOR_DEV_FDS" "pvs --select 'vg_name=cgts-vg' -o pv_name --noheading" | grep -v '\[unknown\]')
for p in $partitions
do
echo "Pre-wiping $p from kickstart (cgts-vg present)"
wlog "Pre-wiping $p from kickstart (cgts-vg present)"
dd if=/dev/zero of=$p bs=512 count=34
dd if=/dev/zero of=$p bs=512 count=34 seek=$((`blockdev --getsz $p` - 34))
done

View File

@ -0,0 +1,21 @@
# Log info about system state at end of partitioning operation.
for dev in $STOR_DEVS; do
wlog "Partition table at end of script for $dev is:"
parted -s $dev unit mib print
done
# Close all FDs and wait for udev to reshuffle all partitions.
wlog "Releasing storage device locks and FDs."
for fd in $STOR_DEV_FDS
do
flock -u "$fd"
exec {fd}>&-
done
sleep 2
udevadm settle || report_pre_failure_with_msg "ERROR: udevadm settle failed!"
# Rescan LVM cache to avoid warnings for VGs that were recreated.
pvscan --cache
%end

View File

@ -1,5 +1,5 @@
sz=$(blockdev --getsize64 $(get_disk $rootfs_device))
sz=$(blockdev --getsize64 $rootfs_device)
if [ $sz -le $((90*$gb)) ] ; then
LOG_VOL_SIZE=4000
SCRATCH_VOL_SIZE=4000
@ -23,23 +23,21 @@ EOF
if [ -d /sys/firmware/efi ] ; then
cat<<EOF>>/tmp/part-include
part /boot/efi --fstype=efi --size=300 --ondrive=$(get_disk $boot_device)
part /boot/efi --fstype=efi --size=300 --ondrive=$boot_device
EOF
else
cat<<EOF>>/tmp/part-include
part biosboot --asprimary --fstype=biosboot --size=1 --ondrive=$(get_disk $boot_device)
part biosboot --asprimary --fstype=biosboot --size=1 --ondrive=$boot_device
EOF
fi
cat<<EOF>>/tmp/part-include
part /boot --fstype=ext4 --asprimary --size=500 --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS"
part pv.253004 --grow --asprimary --size=500 --ondrive=$(get_disk $rootfs_device)
part /boot --fstype=ext4 --asprimary --size=500 --ondrive=$rootfs_device --fsoptions="$ROOTFS_OPTIONS"
part pv.253004 --grow --asprimary --size=500 --ondrive=$rootfs_device
volgroup cgts-vg --pesize=32768 pv.253004
logvol /var/log --fstype=ext4 --vgname=cgts-vg --size=$LOG_VOL_SIZE --name=log-lv
logvol /scratch --fstype=ext4 --vgname=cgts-vg --size=$SCRATCH_VOL_SIZE --name=scratch-lv
part / --fstype=ext4 --asprimary --size=$ROOTFS_SIZE --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS"
part / --fstype=ext4 --asprimary --size=$ROOTFS_SIZE --ondrive=$rootfs_device --fsoptions="$ROOTFS_OPTIONS"
EOF
%end

View File

@ -15,7 +15,7 @@ BOOT_VOL_SIZE=500
## 69648/1024=68.01. CGTS_PV_SIZE=69*1024=70656.
CGTS_PV_SIZE=70656
sz=$(blockdev --getsize64 $(get_disk $rootfs_device))
sz=$(blockdev --getsize64 $rootfs_device)
if [ $sz -le $((80*$gb)) ] ; then
## Less than 80GB use a 10GB root partition
ROOTFS_SIZE=10000
@ -37,23 +37,21 @@ EOF
if [ -d /sys/firmware/efi ] ; then
cat<<EOF>>/tmp/part-include
part /boot/efi --fstype=efi --size=300 --ondrive=$(get_disk $boot_device)
part /boot/efi --fstype=efi --size=300 --ondrive=$boot_device
EOF
else
cat<<EOF>>/tmp/part-include
part biosboot --asprimary --fstype=biosboot --size=1 --ondrive=$(get_disk $boot_device)
part biosboot --asprimary --fstype=biosboot --size=1 --ondrive=$boot_device
EOF
fi
cat<<EOF>>/tmp/part-include
part /boot --fstype=ext4 --asprimary --size=$BOOT_VOL_SIZE --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS"
part pv.253004 --asprimary --size=$CGTS_PV_SIZE --ondrive=$(get_disk $rootfs_device)
part /boot --fstype=ext4 --asprimary --size=$BOOT_VOL_SIZE --ondrive=$rootfs_device --fsoptions="$ROOTFS_OPTIONS"
part pv.253004 --asprimary --size=$CGTS_PV_SIZE --ondrive=$rootfs_device
volgroup cgts-vg --pesize=32768 pv.253004
logvol /var/log --fstype=ext4 --vgname=cgts-vg --size=$LOG_VOL_SIZE --name=log-lv
logvol /scratch --fstype=ext4 --vgname=cgts-vg --size=$SCRATCH_VOL_SIZE --name=scratch-lv
part / --fstype=ext4 --asprimary --size=$ROOTFS_SIZE --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS"
part / --fstype=ext4 --asprimary --size=$ROOTFS_SIZE --ondrive=$rootfs_device --fsoptions="$ROOTFS_OPTIONS"
EOF
%end