cni health: track all cgroup memory usage
The CNI daemon should always be run in its own cgroup. That typically can take two forms: - Running inside a container - Running as a systemd service This patch changes the way the memory usage is tracked so that both of the cgroup memberships listed above are supported. Thanks to using cgroups for tracking the memory usage, we will finally take into account the CNI daemon children memory usage. Change-Id: I0ef48742653d5c17ea0cc787ae3a997d5d315c5a Closes-Bug: 1752939 Signed-off-by: Antoni Segura Puimedon <antonisp@celebdor.com>
This commit is contained in:
parent
7f77dc2380
commit
8f453a2dda
|
@ -90,6 +90,9 @@ function configure_kuryr {
|
|||
# configs.
|
||||
iniset "$KURYR_CONFIG" cni_daemon docker_mode True
|
||||
iniset "$KURYR_CONFIG" cni_daemon netns_proc_dir "/host_proc"
|
||||
else
|
||||
iniset "$KURYR_CONFIG" cni_health_server cg_path \
|
||||
"/system.slice/system-devstack.slice/devstack@kuryr-daemon.service"
|
||||
fi
|
||||
fi
|
||||
|
||||
|
|
|
@ -205,3 +205,30 @@ Run kuryr-daemon::
|
|||
Alternatively you may run it in screen::
|
||||
|
||||
$ screen -dm kuryr-daemon --config-file /etc/kuryr/kuryr.conf -d
|
||||
|
||||
Kuryr CNI Daemon health checks
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The CNI daemon health checks allow the deployer or the orchestration layer
|
||||
(like for example Kubernetes or OpenShift) to probe the CNI daemon for liveness
|
||||
and readiness.
|
||||
|
||||
If you want to make use of all of its facilities, you should run the
|
||||
kuryr-daemon in its own cgroup. It will get its own cgroup if you:
|
||||
* Run it as a systemd service,
|
||||
* run it containerized,
|
||||
* create a memory cgroup for it.
|
||||
|
||||
In order to make the daemon run in its own cgroup, you can do the following::
|
||||
|
||||
systemd-run --unit=kuryr-daemon --scope --slice=kuryr-cni \
|
||||
kuryr-daemon --config-file /etc/kuryr/kuryr.conf -d
|
||||
|
||||
After this, with the CNI daemon running inside its own cgroup, we can enable
|
||||
the CNI daemon memory health check. This health check allows us to limit the
|
||||
memory consumption of the CNI Daemon. The health checks will fail if CNI starts
|
||||
taking more memory that it is set and the orchestration layer should restart.
|
||||
The setting is::
|
||||
|
||||
[cni_health_server]
|
||||
max_memory_usage = 4096 # Set the memory limit to 4GiB
|
||||
|
|
|
@ -10,9 +10,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import gc
|
||||
import os
|
||||
import psutil
|
||||
import requests
|
||||
from six.moves import http_client as httplib
|
||||
|
||||
|
@ -20,6 +18,7 @@ from flask import Flask
|
|||
from pyroute2 import IPDB
|
||||
|
||||
from kuryr.lib._i18n import _
|
||||
from kuryr_kubernetes.cni import utils
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
|
||||
|
@ -35,10 +34,18 @@ cni_health_server_opts = [
|
|||
'process. If this value is exceeded kuryr-daemon '
|
||||
'will be marked as unhealthy.'),
|
||||
default=-1),
|
||||
cfg.StrOpt(
|
||||
'cg_path',
|
||||
help=_('sysfs path to the CNI cgroup. This is used for resource'
|
||||
'tracking and as such should point to the cgroup hierarchy '
|
||||
'leaf. It only applies when non containerized'),
|
||||
default='/sys/fs/cgroup/memory/system.slice/kuryr-cni.service')
|
||||
]
|
||||
|
||||
CONF.register_opts(cni_health_server_opts, "cni_health_server")
|
||||
|
||||
TOP_CGROUP_MEMORY_PATH = '/sys/fs/cgroup/memory'
|
||||
MEMSW_FILENAME = 'memory.memsw.usage_in_bytes'
|
||||
BYTES_AMOUNT = 1048576
|
||||
CAP_NET_ADMIN = 12 # Taken from linux/capabilities.h
|
||||
EFFECTIVE_CAPS = 'CapEff:\t'
|
||||
|
@ -61,6 +68,25 @@ def _has_cap(capability, entry, proc_status_path='/proc/self/status'):
|
|||
return (caps & (1 << capability)) != 0
|
||||
|
||||
|
||||
def _get_cni_cgroup_path():
|
||||
"""Returns the path to the CNI process cgroup memory directory."""
|
||||
if utils.running_under_container_runtime():
|
||||
# We are running inside a container. This means the root cgroup
|
||||
# is the one we need to track as it will be the CNI parent proc
|
||||
cg_memsw_path = TOP_CGROUP_MEMORY_PATH
|
||||
else:
|
||||
cg_memsw_path = CONF.cni_health_server.cg_path
|
||||
|
||||
return cg_memsw_path
|
||||
|
||||
|
||||
def _get_memsw_usage(cgroup_mem_path):
|
||||
"""Returns the group's resident memory plus swap usage."""
|
||||
with open(os.path.join(cgroup_mem_path, MEMSW_FILENAME)) as memsw:
|
||||
memsw_in_bytes = int(memsw.read())
|
||||
return memsw_in_bytes / BYTES_AMOUNT
|
||||
|
||||
|
||||
class CNIHealthServer(object):
|
||||
"""Server used by readiness and liveness probe to manage CNI health checks.
|
||||
|
||||
|
@ -108,11 +134,8 @@ class CNIHealthServer(object):
|
|||
return error_message, httplib.INTERNAL_SERVER_ERROR, self.headers
|
||||
|
||||
if CONF.cni_health_server.max_memory_usage != no_limit:
|
||||
# Force gc to release unreferenced memory before actually checking
|
||||
# the memory.
|
||||
gc.collect()
|
||||
process = psutil.Process(os.getpid())
|
||||
mem_usage = process.memory_info().rss / BYTES_AMOUNT
|
||||
mem_usage = _get_memsw_usage(_get_cni_cgroup_path())
|
||||
|
||||
if mem_usage > CONF.cni_health_server.max_memory_usage:
|
||||
err_message = 'CNI daemon exceeded maximum memory usage.'
|
||||
LOG.debug(err_message)
|
||||
|
|
|
@ -12,6 +12,19 @@
|
|||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
PROC_ONE_CGROUP_PATH = '/proc/1/cgroup'
|
||||
CONTAINER_RUNTIME_CGROUP_IDS = (
|
||||
'docker', # This is set by docker/moby
|
||||
'libpod', # This is set by podman
|
||||
)
|
||||
|
||||
|
||||
def running_under_container_runtime(proc_one_cg_path=PROC_ONE_CGROUP_PATH):
|
||||
"""Returns True iff the CNI process is under a known container runtime."""
|
||||
with open(proc_one_cg_path, 'r') as cgroup_info:
|
||||
proc_one_cg_info = cgroup_info.read()
|
||||
return any(runtime in proc_one_cg_info for runtime in
|
||||
CONTAINER_RUNTIME_CGROUP_IDS)
|
||||
|
||||
|
||||
class CNIConfig(dict):
|
||||
|
|
|
@ -15,15 +15,12 @@ from kuryr_kubernetes.cni import health
|
|||
from kuryr_kubernetes.tests import base
|
||||
import mock
|
||||
import multiprocessing
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from oslo_config import cfg
|
||||
|
||||
|
||||
class TestResourceUsage(object):
|
||||
pass
|
||||
|
||||
|
||||
class TestCNIHealthServer(base.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
|
@ -79,13 +76,12 @@ class TestCNIHealthServer(base.TestCase):
|
|||
resp = self.test_client.get('/alive')
|
||||
self.assertEqual(500, resp.status_code)
|
||||
|
||||
@mock.patch('psutil.Process.memory_info')
|
||||
def test_liveness_status_mem_usage_error(self, m_resource):
|
||||
@mock.patch('kuryr_kubernetes.cni.health._get_memsw_usage')
|
||||
def test_liveness_status_mem_usage_error(self, get_memsw_usage):
|
||||
get_memsw_usage.return_value = 5368709120 / health.BYTES_AMOUNT
|
||||
cfg.CONF.set_override('max_memory_usage', 4096,
|
||||
group='cni_health_server')
|
||||
cls = TestResourceUsage()
|
||||
cls.rss = 5368709120
|
||||
m_resource.return_value = cls
|
||||
|
||||
resp = self.test_client.get('/alive')
|
||||
self.assertEqual(500, resp.status_code)
|
||||
|
||||
|
@ -102,3 +98,30 @@ class TestCNIHealthUtils(base.TestCase):
|
|||
health._has_cap(health.CAP_NET_ADMIN,
|
||||
'CapBnd:\t',
|
||||
fake_status.name))
|
||||
|
||||
def test__get_mem_usage(self):
|
||||
mem_usage = 500 # Arbitrary mem usage amount
|
||||
fake_cg_path = tempfile.mkdtemp(suffix='kuryr')
|
||||
usage_in_bytes_path = os.path.join(fake_cg_path, health.MEMSW_FILENAME)
|
||||
try:
|
||||
with open(usage_in_bytes_path, 'w') as cgroup_mem_usage:
|
||||
cgroup_mem_usage.write('{}\n'.format(
|
||||
mem_usage * health.BYTES_AMOUNT))
|
||||
self.assertEqual(health._get_memsw_usage(fake_cg_path), mem_usage)
|
||||
finally:
|
||||
os.unlink(usage_in_bytes_path)
|
||||
os.rmdir(fake_cg_path)
|
||||
|
||||
@mock.patch('kuryr_kubernetes.cni.utils.running_under_container_runtime')
|
||||
def test__get_cni_cgroup_path_system(self, running_containerized):
|
||||
running_containerized.return_value = False
|
||||
fake_path = '/kuryr/rules'
|
||||
cfg.CONF.set_override('cg_path', fake_path,
|
||||
group='cni_health_server')
|
||||
self.assertEqual(health._get_cni_cgroup_path(), fake_path)
|
||||
|
||||
@mock.patch('kuryr_kubernetes.cni.utils.running_under_container_runtime')
|
||||
def test__get_cni_cgroup_path_container(self, running_containerized):
|
||||
running_containerized.return_value = True
|
||||
self.assertEqual(health._get_cni_cgroup_path(),
|
||||
health.TOP_CGROUP_MEMORY_PATH)
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
# Copyright Red Hat, Inc. 2018
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import tempfile
|
||||
|
||||
import ddt
|
||||
from kuryr_kubernetes.cni import utils
|
||||
from kuryr_kubernetes.tests import base
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
class TestCNIUtils(base.TestCase):
|
||||
@ddt.data(*utils.CONTAINER_RUNTIME_CGROUP_IDS)
|
||||
def test_running_under_container_runtime(self, container_runtime_id):
|
||||
with tempfile.NamedTemporaryFile() as proc_one_cgroup:
|
||||
proc_one_cgroup.write(container_runtime_id.encode())
|
||||
proc_one_cgroup.write(b'\n')
|
||||
proc_one_cgroup.flush()
|
||||
self.assertTrue(
|
||||
utils.running_under_container_runtime(proc_one_cgroup.name))
|
||||
|
||||
def test_not_running_under_container_runtime(self):
|
||||
with tempfile.NamedTemporaryFile() as proc_one_cgroup:
|
||||
self.assertFalse(
|
||||
utils.running_under_container_runtime(proc_one_cgroup.name))
|
|
@ -16,7 +16,6 @@ oslo.serialization!=2.19.1,>=2.18.0 # Apache-2.0
|
|||
oslo.service!=1.28.1,>=1.24.0 # Apache-2.0
|
||||
oslo.utils>=3.33.0 # Apache-2.0
|
||||
os-vif!=1.8.0,>=1.7.0 # Apache-2.0
|
||||
psutil>=3.2.2 # BSD
|
||||
pyroute2>=0.4.21;sys_platform!='win32' # Apache-2.0 (+ dual licensed GPL2)
|
||||
retrying!=1.3.0,>=1.2.3 # Apache-2.0
|
||||
six>=1.10.0 # MIT
|
||||
|
|
Loading…
Reference in New Issue