From c1ebe18e2a4e144d9cf03568b43da91c9cbaa6f3 Mon Sep 17 00:00:00 2001 From: Tom Weininger Date: Fri, 8 Jul 2022 17:57:59 +0200 Subject: [PATCH] Add cpu-pinning element The new "cpu-pinning" element optimizes the amphora image for better vertical scaling. When an amphora flavor with multiple vCPUs is configured it will configure the kernel to isolate (isolcpus) all vCPUs except the first one. Furthermore, it uninstalls irqbalance and sets the IRQ affinity to the first CPU. That way the other CPUs are free to be used by HAProxy exclusively. A new customized TuneD profile applies some more tweaks for improving network latency. This new feature is disabled by default in diskimage-create.sh. Story: 2010236 Task: 46042 Change-Id: I1a0591de79be867483a044705e866b2368b2a567 --- diskimage-create/README.rst | 2 + diskimage-create/diskimage-create.sh | 14 +++- elements/cpu-pinning/README.rst | 6 ++ elements/cpu-pinning/element-deps | 2 + .../environment.d/80-kernel-cpu-affinity | 36 ++++++++++ elements/cpu-pinning/package-installs.yaml | 3 + .../post-install.d/20-enable-tuned | 11 +++ .../post-install.d/30-set-tuned-profile | 3 + .../static/etc/tuned/active_profile | 1 + .../static/etc/tuned/amphora/script.sh | 23 +++++++ .../static/etc/tuned/amphora/tuned.conf | 67 +++++++++++++++++++ elements/cpu-pinning/svc-map | 4 ++ ...-cpu-pinning-element-86617303b720d5a9.yaml | 19 ++++++ 13 files changed, 190 insertions(+), 1 deletion(-) create mode 100644 elements/cpu-pinning/README.rst create mode 100644 elements/cpu-pinning/element-deps create mode 100644 elements/cpu-pinning/environment.d/80-kernel-cpu-affinity create mode 100644 elements/cpu-pinning/package-installs.yaml create mode 100644 elements/cpu-pinning/post-install.d/20-enable-tuned create mode 100644 elements/cpu-pinning/post-install.d/30-set-tuned-profile create mode 100644 elements/cpu-pinning/static/etc/tuned/active_profile create mode 100644 elements/cpu-pinning/static/etc/tuned/amphora/script.sh create mode 100644 elements/cpu-pinning/static/etc/tuned/amphora/tuned.conf create mode 100644 elements/cpu-pinning/svc-map create mode 100644 releasenotes/notes/add-cpu-pinning-element-86617303b720d5a9.yaml diff --git a/diskimage-create/README.rst b/diskimage-create/README.rst index 48390aa3af..fde101a97a 100644 --- a/diskimage-create/README.rst +++ b/diskimage-create/README.rst @@ -110,6 +110,7 @@ Command syntax: [-i **ubuntu-minimal** | fedora | centos-minimal | rhel ] [-k ] [-l ] + [-m] [-n] [-o **amphora-x64-haproxy** | ] [-p] @@ -132,6 +133,7 @@ Command syntax: '-i' is the base OS (default: ubuntu-minimal) '-k' is the kernel meta package name, currently only for ubuntu-minimal base OS (default: linux-image-virtual) '-l' is output logfile (default: none) + '-m' enable vCPU pinning optimizations (default: disabled) '-n' disable sshd (default: enabled) '-o' is the output image file name '-p' install amphora-agent from distribution packages (default: disabled)" diff --git a/diskimage-create/diskimage-create.sh b/diskimage-create/diskimage-create.sh index dbb165abce..44049fd1a3 100755 --- a/diskimage-create/diskimage-create.sh +++ b/diskimage-create/diskimage-create.sh @@ -31,6 +31,7 @@ usage() { echo " [-i **ubuntu-minimal** | fedora | centos-minimal | rhel ]" echo " [-k ]" echo " [-l ]" + echo " [-m]" echo " [-n]" echo " [-o **amphora-x64-haproxy.qcow2** | ]" echo " [-p]" @@ -52,6 +53,7 @@ usage() { echo " '-i' is the base OS (default: ubuntu-minimal)" echo " '-k' is the kernel meta package name, currently only for ubuntu-minimal base OS (default: linux-image-virtual)" echo " '-l' is output logfile (default: none)" + echo " '-m' enable vCPU pinning optimizations (default: disabled)" echo " '-n' disable sshd (default: enabled)" echo " '-o' is the output image file name" echo " '-p' install amphora-agent from distribution packages (default: disabled)" @@ -92,7 +94,7 @@ dib_enable_tracing= AMP_LOGFILE="" -while getopts "a:b:c:d:efg:hi:k:l:no:pt:r:s:vw:xy" opt; do +while getopts "a:b:c:d:efg:hi:k:l:mno:pt:r:s:vw:xy" opt; do case $opt in a) AMP_ARCH=$OPTARG @@ -166,6 +168,9 @@ while getopts "a:b:c:d:efg:hi:k:l:no:pt:r:s:vw:xy" opt; do l) AMP_LOGFILE="--logfile=$OPTARG" ;; + m) + AMP_ENABLE_CPUPINNING=1 + ;; n) AMP_DISABLE_SSHD=1 ;; @@ -253,6 +258,8 @@ if [ "$AMP_BASEOS" = "ubuntu-minimal" ]; then export DIB_UBUNTU_KERNEL=${AMP_KERNEL:-"linux-image-virtual"} fi +AMP_ENABLE_CPUPINNING=${AMP_ENABLE_CPUPINNING:-0} + AMP_DISABLE_SSHD=${AMP_DISABLE_SSHD:-0} AMP_PACKAGE_INSTALL=${AMP_PACKAGE_INSTALL:-0} @@ -476,6 +483,11 @@ AMP_element_sequence="$AMP_element_sequence pip-cache" # Add certificate ramfs element AMP_element_sequence="$AMP_element_sequence certs-ramfs" +# Add cpu-pinning element +if [ "$AMP_ENABLE_CPUPINNING" -eq 1 ]; then + AMP_element_sequence="$AMP_element_sequence cpu-pinning" +fi + # Disable SSHD if requested if [ "$AMP_DISABLE_SSHD" -eq 1 ]; then AMP_element_sequence="$AMP_element_sequence remove-sshd" diff --git a/elements/cpu-pinning/README.rst b/elements/cpu-pinning/README.rst new file mode 100644 index 0000000000..a59432c5f3 --- /dev/null +++ b/elements/cpu-pinning/README.rst @@ -0,0 +1,6 @@ +Element to enable optimizations for vertical scaling + +This element configures the Linux kernel to isolate all but the first +vCPU of the system, so that they are used by HAProxy threads exclusively. +It also installs and activates a customized TuneD profile that should further +tweak vertical scaling performance. diff --git a/elements/cpu-pinning/element-deps b/elements/cpu-pinning/element-deps new file mode 100644 index 0000000000..483dfd9a67 --- /dev/null +++ b/elements/cpu-pinning/element-deps @@ -0,0 +1,2 @@ +install-static +package-installs diff --git a/elements/cpu-pinning/environment.d/80-kernel-cpu-affinity b/elements/cpu-pinning/environment.d/80-kernel-cpu-affinity new file mode 100644 index 0000000000..198aef0bca --- /dev/null +++ b/elements/cpu-pinning/environment.d/80-kernel-cpu-affinity @@ -0,0 +1,36 @@ +#!/bin/bash + +# +# Copyright Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# + +if [ ${DIB_DEBUG_TRACE:-0} -gt 0 ]; then + set -x +fi + +set -euo pipefail + +case $DISTRO_NAME in + ubuntu | debian | fedora | centos* | rhel* ) + DIB_BOOTLOADER_DEFAULT_CMDLINE+=" irqaffinity=0" + # This will be ignored on single vCPU systems + DIB_BOOTLOADER_DEFAULT_CMDLINE+=" isolcpus=1-N" + export DIB_BOOTLOADER_DEFAULT_CMDLINE + ;; + *) + echo "ERROR: Unsupported distribution $DISTRO_NAME" + exit 1 + ;; +esac diff --git a/elements/cpu-pinning/package-installs.yaml b/elements/cpu-pinning/package-installs.yaml new file mode 100644 index 0000000000..a537247400 --- /dev/null +++ b/elements/cpu-pinning/package-installs.yaml @@ -0,0 +1,3 @@ +irqbalance: + uninstall: True +tuned: diff --git a/elements/cpu-pinning/post-install.d/20-enable-tuned b/elements/cpu-pinning/post-install.d/20-enable-tuned new file mode 100644 index 0000000000..b369b2ca81 --- /dev/null +++ b/elements/cpu-pinning/post-install.d/20-enable-tuned @@ -0,0 +1,11 @@ +#!/bin/bash + +if [ ${DIB_DEBUG_TRACE:-0} -gt 0 ]; then + set -x +fi + +set -euo pipefail + +if [ "$DIB_INIT_SYSTEM" == "systemd" ]; then + systemctl enable $(svc-map tuned) +fi diff --git a/elements/cpu-pinning/post-install.d/30-set-tuned-profile b/elements/cpu-pinning/post-install.d/30-set-tuned-profile new file mode 100644 index 0000000000..347dcbe3b7 --- /dev/null +++ b/elements/cpu-pinning/post-install.d/30-set-tuned-profile @@ -0,0 +1,3 @@ +#!/bin/sh + +chmod +x /usr/lib/tuned/amphora/script.sh diff --git a/elements/cpu-pinning/static/etc/tuned/active_profile b/elements/cpu-pinning/static/etc/tuned/active_profile new file mode 100644 index 0000000000..b6b31fe78a --- /dev/null +++ b/elements/cpu-pinning/static/etc/tuned/active_profile @@ -0,0 +1 @@ +virtual-guest optimize-serial-console amphora diff --git a/elements/cpu-pinning/static/etc/tuned/amphora/script.sh b/elements/cpu-pinning/static/etc/tuned/amphora/script.sh new file mode 100644 index 0000000000..8088595ce0 --- /dev/null +++ b/elements/cpu-pinning/static/etc/tuned/amphora/script.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +# Comment the line in ...tuned/functions that fails on the amp: +# DISKS_SYS="$(command ls -d1 /sys/block/{sd,cciss,dm-,vd,dasd,xvd}* 2>/dev/null)" +sed -i 's/^DISKS_SYS=/#&/' /usr/lib/tuned/functions +. /usr/lib/tuned/functions + +start() { + setup_kvm_mod_low_latency + disable_ksm + + return "$?" +} + +stop() { + if [ "$1" = "full_rollback" ]; then + teardown_kvm_mod_low_latency + enable_ksm + fi + return "$?" +} + +process $@ diff --git a/elements/cpu-pinning/static/etc/tuned/amphora/tuned.conf b/elements/cpu-pinning/static/etc/tuned/amphora/tuned.conf new file mode 100644 index 0000000000..b53536a612 --- /dev/null +++ b/elements/cpu-pinning/static/etc/tuned/amphora/tuned.conf @@ -0,0 +1,67 @@ +# +# tuned configuration +# +# Copyright Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +[main] +summary=Customized profile for use on Octavia amphorae +include=network-latency + +[variables] +isolated_cores=${f:cpulist_invert:0} +no_balance_cores=${isolated_cores} + +# Fail if isolated_cores are not set +assert1=${f:assertion_non_equal:isolated_cores are set:${isolated_cores}:${isolated_cores_assert_check}} + +# tmpdir +tmpdir=${f:strip:${f:exec:mktemp:-d}} + +isolated_cores_expanded=${f:cpulist_unpack:${isolated_cores}} +isolated_cpumask=${f:cpulist2hex:${isolated_cores_expanded}} +not_isolated_cores_expanded=${f:cpulist_invert:${isolated_cores_expanded}} +isolated_cores_online_expanded=${f:cpulist_online:${isolated_cores}} +not_isolated_cores_online_expanded=${f:cpulist_online:${not_isolated_cores_expanded}} +not_isolated_cpumask=${f:cpulist2hex:${not_isolated_cores_expanded}} +# Make sure no_balance_cores is defined before +# no_balance_cores_expanded is defined, so that child profiles can set +# no_balance_cores directly in the profile (tuned.conf) +no_balance_cores_expanded=${f:cpulist_unpack:${no_balance_cores}} + +# Fail if isolated_cores contains CPUs which are not online +assert2=${f:assertion:isolated_cores contains online CPU(s):${isolated_cores_expanded}:${isolated_cores_online_expanded}} + +[sysctl] +kernel.numa_balancing=0 +kernel.hung_task_timeout_secs = 600 +vm.stat_interval = 10 +# See https://bugzilla.redhat.com/show_bug.cgi?id=1797629 +kernel.timer_migration = 0 + +[sysfs] +/sys/bus/workqueue/devices/writeback/cpumask = ${not_isolated_cpumask} +/sys/devices/virtual/workqueue/cpumask = ${not_isolated_cpumask} +/sys/devices/virtual/workqueue/*/cpumask = ${not_isolated_cpumask} +/sys/devices/system/machinecheck/machinecheck*/ignore_ce = 1 + +[systemd] +cpu_affinity=${not_isolated_cores_expanded} + +[script] +script=${i:PROFILE_DIR}/script.sh + +[scheduler] +isolated_cores=${isolated_cores} +ps_blacklist=.*pmd.*;.*PMD.*;^DPDK;.*qemu-kvm.*;^contrail-vroute$;^lcore-slave-.*;^rte_mp_handle$;^rte_mp_async$;^eal-intr-thread$ diff --git a/elements/cpu-pinning/svc-map b/elements/cpu-pinning/svc-map new file mode 100644 index 0000000000..937a5ff9d0 --- /dev/null +++ b/elements/cpu-pinning/svc-map @@ -0,0 +1,4 @@ +tuned: + default: tuned +irqbalance: + default: irqbalance diff --git a/releasenotes/notes/add-cpu-pinning-element-86617303b720d5a9.yaml b/releasenotes/notes/add-cpu-pinning-element-86617303b720d5a9.yaml new file mode 100644 index 0000000000..044ab5e5d3 --- /dev/null +++ b/releasenotes/notes/add-cpu-pinning-element-86617303b720d5a9.yaml @@ -0,0 +1,19 @@ +--- +features: + - | + The new "cpu-pinning" element optimizes the amphora image for better + vertical scaling. When an amphora flavor with multiple vCPUs is configured + it will configure the kernel to isolate (isolcpus) + all vCPUs except the first one. + Furthermore, it uninstalls irqbalance and sets the IRQ affinity to the + first CPU. That way the other CPUs are free to be used by HAProxy + exclusively. A new customized TuneD profile applies some more tweaks + for improving network latency. + This new feature is disabled by default, but can be enabled by running + `diskimage-create.sh` with the `-m` option or setting the + `AMP_ENABLE_CPUPINNING` environment variable to 1 before running the script. +upgrade: + - | + Amphora vertical scaling optimizations require a new amphora image + build with the optional CPU pinning feature enabled in order + to become effective.