Disable libvirt cgroup functionality for cgroup v2

Change-Id: I5a9f1828d7c2f36e14de89323868c4a1dbebba64
This commit is contained in:
Sadegh Hayeri 2023-07-05 21:07:49 +03:30
parent 6b6ca9e26c
commit 4fc46f1808
3 changed files with 61 additions and 41 deletions

View File

@ -15,7 +15,7 @@ apiVersion: v1
appVersion: v1.0.0
description: OpenStack-Helm libvirt
name: libvirt
version: 0.1.20
version: 0.1.21
home: https://libvirt.org
sources:
- https://libvirt.org/git/?p=libvirt.git;a=summary

View File

@ -16,6 +16,13 @@ limitations under the License.
set -ex
# TODO: We disable cgroup functionality for cgroup v2, we should fix this in the future
if $(stat -fc %T /sys/fs/cgroup/ | grep -q cgroup2fs); then
CGROUP_VERSION=v2
else
CGROUP_VERSION=v1
fi
if [ -n "$(cat /proc/*/comm 2>/dev/null | grep -w libvirtd)" ]; then
set +x
for proc in $(ls /proc/*/comm 2>/dev/null); do
@ -38,14 +45,16 @@ if [[ -c /dev/kvm ]]; then
chown root:kvm /dev/kvm
fi
#Setup Cgroups to use when breaking out of Kubernetes defined groups
CGROUPS=""
for CGROUP in cpu rdma hugetlb; do
if [ -d /sys/fs/cgroup/${CGROUP} ]; then
CGROUPS+="${CGROUP},"
fi
done
cgcreate -g ${CGROUPS%,}:/osh-libvirt
if [ $CGROUP_VERSION != "v2" ]; then
#Setup Cgroups to use when breaking out of Kubernetes defined groups
CGROUPS=""
for CGROUP in cpu rdma hugetlb; do
if [ -d /sys/fs/cgroup/${CGROUP} ]; then
CGROUPS+="${CGROUP},"
fi
done
cgcreate -g ${CGROUPS%,}:/osh-libvirt
fi
# We assume that if hugepage count > 0, then hugepages should be exposed to libvirt/qemu
hp_count="$(cat /proc/meminfo | grep HugePages_Total | tr -cd '[:digit:]')"
@ -68,43 +77,49 @@ if [ 0"$hp_count" -gt 0 ]; then
exit 1
fi
# Kubernetes 1.10.x introduced cgroup changes that caused the container's
# hugepage byte limit quota to zero out. This workaround sets that pod limit
# back to the total number of hugepage bytes available to the baremetal host.
if [ -d /sys/fs/cgroup/hugetlb ]; then
limits="$(ls /sys/fs/cgroup/hugetlb/{{ .Values.conf.kubernetes.cgroup }}/hugetlb.*.limit_in_bytes)" || \
(echo "ERROR: Failed to locate any hugetable limits. Did you set the correct cgroup in your values used for this chart?"
exit 1)
for limit in $limits; do
target="/sys/fs/cgroup/hugetlb/$(dirname $(awk -F: '($2~/hugetlb/){print $3}' /proc/self/cgroup))/$(basename $limit)"
# Ensure the write target for the hugepage limit for the pod exists
if [ ! -f "$target" ]; then
echo "ERROR: Could not find write target for hugepage limit: $target"
fi
if [ $CGROUP_VERSION != "v2" ]; then
# Kubernetes 1.10.x introduced cgroup changes that caused the container's
# hugepage byte limit quota to zero out. This workaround sets that pod limit
# back to the total number of hugepage bytes available to the baremetal host.
if [ -d /sys/fs/cgroup/hugetlb ]; then
limits="$(ls /sys/fs/cgroup/hugetlb/{{ .Values.conf.kubernetes.cgroup }}/hugetlb.*.limit_in_bytes)" || \
(echo "ERROR: Failed to locate any hugetable limits. Did you set the correct cgroup in your values used for this chart?"
exit 1)
for limit in $limits; do
target="/sys/fs/cgroup/hugetlb/$(dirname $(awk -F: '($2~/hugetlb/){print $3}' /proc/self/cgroup))/$(basename $limit)"
# Ensure the write target for the hugepage limit for the pod exists
if [ ! -f "$target" ]; then
echo "ERROR: Could not find write target for hugepage limit: $target"
fi
# Write hugetable limit for pod
echo "$(cat $limit)" > "$target"
done
fi
# Write hugetable limit for pod
echo "$(cat $limit)" > "$target"
done
fi
# Determine OS default hugepage size to use for the hugepage write test
default_hp_kb="$(cat /proc/meminfo | grep Hugepagesize | tr -cd '[:digit:]')"
# Determine OS default hugepage size to use for the hugepage write test
default_hp_kb="$(cat /proc/meminfo | grep Hugepagesize | tr -cd '[:digit:]')"
# Attempt to write to the hugepage mount to ensure it is operational, but only
# if we have at least 1 free page.
num_free_pages="$(cat /sys/kernel/mm/hugepages/hugepages-${default_hp_kb}kB/free_hugepages | tr -cd '[:digit:]')"
echo "INFO: '$num_free_pages' free hugepages of size ${default_hp_kb}kB"
if [ 0"$num_free_pages" -gt 0 ]; then
(fallocate -o0 -l "$default_hp_kb" /dev/hugepages/foo && rm /dev/hugepages/foo) || \
(echo "ERROR: fallocate failed test at /dev/hugepages with size ${default_hp_kb}kB"
rm /dev/hugepages/foo
exit 1)
# Attempt to write to the hugepage mount to ensure it is operational, but only
# if we have at least 1 free page.
num_free_pages="$(cat /sys/kernel/mm/hugepages/hugepages-${default_hp_kb}kB/free_hugepages | tr -cd '[:digit:]')"
echo "INFO: '$num_free_pages' free hugepages of size ${default_hp_kb}kB"
if [ 0"$num_free_pages" -gt 0 ]; then
(fallocate -o0 -l "$default_hp_kb" /dev/hugepages/foo && rm /dev/hugepages/foo) || \
(echo "ERROR: fallocate failed test at /dev/hugepages with size ${default_hp_kb}kB"
rm /dev/hugepages/foo
exit 1)
fi
fi
fi
if [ -n "${LIBVIRT_CEPH_CINDER_SECRET_UUID}" ] || [ -n "${LIBVIRT_EXTERNAL_CEPH_CINDER_SECRET_UUID}" ] ; then
#NOTE(portdirect): run libvirtd as a transient unit on the host with the osh-libvirt cgroups applied.
cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen &
if [ $CGROUP_VERSION != "v2" ]; then
#NOTE(portdirect): run libvirtd as a transient unit on the host with the osh-libvirt cgroups applied.
cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen &
else
systemd-run --scope --slice=system libvirtd --listen &
fi
tmpsecret=$(mktemp --suffix .xml)
if [ -n "${LIBVIRT_EXTERNAL_CEPH_CINDER_SECRET_UUID}" ] ; then
@ -180,5 +195,9 @@ EOF
fi
#NOTE(portdirect): run libvirtd as a transient unit on the host with the osh-libvirt cgroups applied.
cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen
if [ $CGROUP_VERSION != "v2" ]; then
#NOTE(portdirect): run libvirtd as a transient unit on the host with the osh-libvirt cgroups applied.
cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen
else
systemd-run --scope --slice=system libvirtd --listen
fi

View File

@ -21,4 +21,5 @@ libvirt:
- 0.1.18 Replace node-role.kubernetes.io/master with control-plane
- 0.1.19 Set kubernetes cgroup value equal kubepods.slice to fit systemd cgroup driver
- 0.1.20 Update Ceph to 17.2.6
- 0.1.21 Disable libvirt cgroup functionality for cgroup-v2
...