[ceph-osd] Remove udev interactions from osd-init

There are bugs with containerizing certain udev operations in some
udev versions. The osd-init container can hang in these
circumstances, so the osd-init scripts are modified not to use
these problematic operations.

Change-Id: I6b39321b849f5fbf1b6f2097c6c57ffaebe68121
This commit is contained in:
Stephen Taylor 2022-04-29 10:50:13 -06:00
parent 0dc859cbcb
commit e02dc3da44
3 changed files with 14 additions and 13 deletions

View File

@ -15,6 +15,6 @@ apiVersion: v1
appVersion: v1.0.0
description: OpenStack-Helm Ceph OSD
name: ceph-osd
version: 0.1.39
version: 0.1.40
home: https://github.com/ceph/ceph
...

View File

@ -292,21 +292,15 @@ function zap_extra_partitions {
# Delete any discovered journal, block.db, and block.wal partitions
if [ ! -z "${journal_disk}" ]; then
sgdisk -d ${journal_part} ${journal_disk}
/sbin/udevadm settle --timeout=600
/usr/bin/flock -s ${journal_disk} /sbin/partprobe ${journal_disk}
/sbin/udevadm settle --timeout=600
fi
if [ ! -z "${block_db_disk}" ]; then
sgdisk -d ${block_db_part} ${block_db_disk}
/sbin/udevadm settle --timeout=600
/usr/bin/flock -s ${block_db_disk} /sbin/partprobe ${block_db_disk}
/sbin/udevadm settle --timeout=600
fi
if [ ! -z "${block_wal_disk}" ]; then
sgdisk -d ${block_wal_part} ${block_wal_disk}
/sbin/udevadm settle --timeout=600
/usr/bin/flock -s ${block_wal_disk} /sbin/partprobe ${block_wal_disk}
/sbin/udevadm settle --timeout=600
fi
}
@ -345,9 +339,19 @@ function lvm_scan {
lvscan
}
function wait_for_device {
local device="$1"
echo "Waiting for block device ${device} to appear"
for countdown in {1..600}; do
test -b "${device}" && break
sleep 1
done
test -b "${device}" || exit 1
}
function udev_settle {
osd_devices="${OSD_DEVICE}"
udevadm settle --timeout=600
partprobe "${OSD_DEVICE}"
lvm_scan
if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then
@ -378,11 +382,10 @@ function udev_settle {
local JDEV=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
osd_devices="${osd_devices}\|${JDEV}"
partprobe "${JDEV}"
wait_for_device "${JDEV}"
fi
fi
fi
# watch the udev event queue, and exit if all current events are handled
udevadm settle --timeout=600
# On occassion udev may not make the correct device symlinks for Ceph, just in case we make them manually
mkdir -p /dev/disk/by-partuuid
@ -394,9 +397,6 @@ function udev_settle {
ln -s "../../${dev}" "${symlink}"
fi
done
# Give udev another chance now that all symlinks exist for devices we care about
udevadm settle --timeout=600
}
# Helper function to get a logical volume from a physical volume

View File

@ -40,4 +40,5 @@ ceph-osd:
- 0.1.37 Add a disruptive OSD restart to the post-apply job
- 0.1.38 Skip pod wait in post-apply job when disruptive
- 0.1.39 Allow for unconditional OSD restart
- 0.1.40 Remove udev interactions from osd-init
...