Add vm.watermark_scale_factor tuning

Depends: https://github.com/juju/charm-helpers/pull/609
Implements: spec memory-fragmentation-tuning
Change-Id: I0195d5f65f36442abf1355dd5150d48a37184e97
This commit is contained in:
Brett Milford 2022-09-02 08:53:06 +10:00
parent d39f452353
commit c6b0d798b7
4 changed files with 144 additions and 3 deletions

View File

View File

@ -0,0 +1,104 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2014-2015 Canonical Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from charmhelpers.core.hookenv import (
log,
DEBUG,
ERROR,
)
import re
from charmhelpers.core.host import get_total_ram
WMARK_MAX = 1000
WMARK_DEFAULT = 10
MEMTOTAL_MIN_BYTES = 17179803648 # 16G
MAX_PAGES = 2500000000
def calculate_watermark_scale_factor():
"""Calculates optimal vm.watermark_scale_factor value
:returns: watermark_scale_factor
:rtype: int
"""
memtotal = get_total_ram()
normal_managed_pages = get_normal_managed_pages()
try:
wmark = min([watermark_scale_factor(memtotal, managed_pages)
for managed_pages in normal_managed_pages])
except ValueError as e:
log("Failed to calculate watermark_scale_factor from normal managed pages: {}".format(normal_managed_pages), ERROR)
raise e
log("vm.watermark_scale_factor: {}".format(wmark), DEBUG)
return wmark
def get_normal_managed_pages():
"""Parse /proc/zoneinfo for managed pages of the
normal zone on each node
:returns: normal_managed_pages
:rtype: [int]
"""
try:
normal_managed_pages = []
with open('/proc/zoneinfo', 'r') as f:
in_zone_normal = False
# regex to search for strings that look like "Node 0, zone Normal" and last string to group 1
normal_zone_matcher = re.compile(r"^Node\s\d+,\s+zone\s+(\S+)$")
# regex to match to a number at the end of the line.
managed_matcher = re.compile(r"\s+managed\s+(\d+)$")
for line in f.readlines():
match = normal_zone_matcher.search(line)
if match:
in_zone_normal = match.group(1) == 'Normal'
if in_zone_normal:
# match the number at the end of " managed 3840" into group 1.
managed_match = managed_matcher.search(line)
if managed_match:
normal_managed_pages.append(int(managed_match.group(1)))
in_zone_normal = False
except OSError as e:
log("Failed to read /proc/zoneinfo in calculating watermark_scale_factor: {}".format(e), ERROR)
raise e
return normal_managed_pages
def watermark_scale_factor(memtotal, managed_pages):
"""Calculate a value for vm.watermark_scale_factor
:param memtotal: Total system memory in KB
:type memtotal: int
:param managed_pages: Number of managed pages
:type managed_pages: int
:returns: normal_managed_pages
:rtype: int
"""
if memtotal <= MEMTOTAL_MIN_BYTES:
return WMARK_DEFAULT
else:
WMARK = int(MAX_PAGES / managed_pages)
if WMARK > WMARK_MAX:
return WMARK_MAX
else:
return WMARK

View File

@ -261,3 +261,19 @@ options:
default: 6012
type: int
description: Listening port of the swift-account-replicator server.
tune-watermark-scale-factor:
type: boolean
default: False
description: |
Whether to tune vm.watermark_scale_factor.
.
In some high memory pressure scenarios, a memory shortage may require
synchronous reclaim in order to allocate higher order pages.
Additionally in some cases the default vm.watermark_scale_factor sysctl value
yields a gap between min<->low<->high watermarks that is too small to
wake up kswapd (asynchronous reclaim) before synchronous reclaim is necessary.
This will fine-tune sysctl vm.watermark_scale_factor at runtime such that the
watermark gap is 1GB to wake kswapd earlier and alleviate issues arising
from this situation.
.
NOTE: Only affects baremetal hosts and kernels 4.15 and later.

View File

@ -23,6 +23,7 @@ import sys
import socket
import subprocess
import tempfile
import yaml
from subprocess import CalledProcessError
@ -75,6 +76,7 @@ from charmhelpers.core.hookenv import (
ingress_address,
DEBUG,
WARNING,
ERROR,
)
from charmhelpers.fetch import (
@ -92,6 +94,9 @@ from charmhelpers.core.host import (
)
from charmhelpers.core.sysctl import create as create_sysctl
from charmhelpers.contrib.sysctl.watermark_scale_factor import (
calculate_watermark_scale_factor,
)
from charmhelpers.payload.execd import execd_preinstall
@ -249,9 +254,25 @@ def config_changed():
if relations_of_type('nrpe-external-master'):
update_nrpe_config()
sysctl_dict = config('sysctl')
if sysctl_dict:
create_sysctl(sysctl_dict, '/etc/sysctl.d/50-swift-storage-charm.conf')
if config('sysctl'):
sysctl_dict_parsed = {}
sysctl_settings = config('sysctl')
try:
sysctl_dict_parsed = yaml.safe_load(sysctl_settings)
except yaml.YAMLError:
log("Error parsing YAML sysctl_dict: {}".format(sysctl_settings),
level=ERROR)
if (config('tune-watermark-scale-factor') is True and
"vm.watermark_scale_factor" not in sysctl_dict_parsed):
try:
wmark = calculate_watermark_scale_factor()
sysctl_dict_parsed["vm.watermark_scale_factor"] = wmark
except Exception:
pass
if sysctl_dict_parsed:
create_sysctl(sysctl_dict_parsed,
'/etc/sysctl.d/50-swift-storage-charm.conf')
add_to_updatedb_prunepath(STORAGE_MOUNT_PATH)