OVS: fix memory pool allocation for virtual environment

Cherry-pick to r/2018.10 branch of commit bcc89e5

This commit increases the vswitch hugepage number for virtual
environments from 512 to 1024, making it equal to the same amount
used for non-virtual environments.

An issue was seen after the da1110a commit to enable LLDP
over OVS, in which puppet would fail to successfully add ports to
OVS.  The issue  would have manifested previously not as a puppet
error, but as a failure to communicate over the data ports of
some virtual compute nodes.

The issue is a failure of DPDK to be able to find a contiguous
mempool of sufficient size in any of the hugepages, which can
happen in a virtual environment restricted to a 2M hugepage size.

Since 1G and 2M pages can be used for both vswitch and vm
purposes, the concept of a hugepage role is removed.

Finally, the code has had some cleanup to separate out constants
and make variable names more pythonic. Unit identifiers have
been made consistent for readability and to prevent confusion.

Change-Id: I14550526deddfaf13284d9273397a00b80eb8527
Closes-Bug: #1796380
Signed-off-by: Steven Webster <steven.webster@windriver.com>
This commit is contained in:
Steven Webster 2018-10-15 13:58:36 -04:00
parent 71b4cc8a52
commit a7cdb4ce95
1 changed files with 166 additions and 167 deletions

View File

@ -17,16 +17,38 @@ from os.path import isfile, join
import re
import subprocess
from sysinv.common import utils
from sysinv.openstack.common import log as logging
import tsconfig.tsconfig as tsc
LOG = logging.getLogger(__name__)
# Defines per-socket vswitch memory requirements (in MB) for both real and
# virtual deployments
VSWITCH_REAL_MEMORY_MB = 1024
VSWITCH_VIRTUAL_MEMORY_MB = 512
# Defines per-socket vswitch memory requirements (in MB)
VSWITCH_MEMORY_MB = 1024
# Defines the size of one kilobyte
SIZE_KB = 1024
# Defines the size of 2 megabytes in kilobyte units
SIZE_2M_KB = 2048
# Defines the size of 1 gigabyte in kilobyte units
SIZE_1G_KB = 1048576
# Defines the size of 2 megabytes in megabyte units
SIZE_2M_MB = int(SIZE_2M_KB / SIZE_KB)
# Defines the size of 1 gigabyte in megabyte units
SIZE_1G_MB = int(SIZE_1G_KB / SIZE_KB)
# Defines the minimum size of memory for a controller node in megabyte units
CONTROLLER_MIN_MB = 6000
# Defines the minimum size of memory for a compute node in megabyte units
COMPUTE_MIN_MB = 1600
# Defines the minimum size of memory for a secondary compute node in megabyte
# units
COMPUTE_MIN_NON_0_MB = 500
class CPU:
@ -73,17 +95,17 @@ class NodeOperator(object):
self.num_cpus = 0
self.num_nodes = 0
self.float_cpuset = 0
self.total_memory_MiB = 0
self.free_memory_MiB = 0
self.total_memory_nodes_MiB = []
self.free_memory_nodes_MiB = []
self.total_memory_mb = 0
self.free_memory_mb = 0
self.total_memory_nodes_mb = []
self.free_memory_nodes_mb = []
self.topology = {}
# self._get_cpu_topology()
# self._get_total_memory_MiB()
# self._get_total_memory_nodes_MiB()
# self._get_free_memory_MiB()
# self._get_free_memory_nodes_MiB()
# self._get_total_memory_mb()
# self._get_total_memory_nodes_mb()
# self._get_free_memory_mb()
# self._get_free_memory_nodes_mb()
def _is_strict(self):
with open(os.devnull, "w") as fnull:
@ -139,7 +161,7 @@ class NodeOperator(object):
self.num_nodes = 0
self.topology = {}
Thread_cnt = {}
thread_cnt = {}
cpu = socket_id = core_id = thread_id = -1
re_processor = re.compile(r'^[Pp]rocessor\s+:\s+(\d+)')
re_socket = re.compile(r'^physical id\s+:\s+(\d+)')
@ -184,9 +206,9 @@ class NodeOperator(object):
if socket_id not in sockets:
sockets.append(socket_id)
attrs = {
'numa_node': socket_id,
'capabilities': {},
}
'numa_node': socket_id,
'capabilities': {},
}
inumas.append(attrs)
continue
@ -194,13 +216,13 @@ class NodeOperator(object):
if match:
core_id = int(match.group(1))
if socket_id not in Thread_cnt:
Thread_cnt[socket_id] = {}
if core_id not in Thread_cnt[socket_id]:
Thread_cnt[socket_id][core_id] = 0
if socket_id not in thread_cnt:
thread_cnt[socket_id] = {}
if core_id not in thread_cnt[socket_id]:
thread_cnt[socket_id][core_id] = 0
else:
Thread_cnt[socket_id][core_id] += 1
thread_id = Thread_cnt[socket_id][core_id]
thread_cnt[socket_id][core_id] += 1
thread_id = thread_cnt[socket_id][core_id]
if socket_id not in self.topology:
self.topology[socket_id] = {}
@ -208,12 +230,13 @@ class NodeOperator(object):
self.topology[socket_id][core_id] = {}
self.topology[socket_id][core_id][thread_id] = cpu
attrs = {'cpu': cpu,
'numa_node': socket_id,
'core': core_id,
'thread': thread_id,
'capabilities': {},
}
attrs = {
'cpu': cpu,
'numa_node': socket_id,
'core': core_id,
'thread': thread_id,
'capabilities': {},
}
icpu_attrs.update(attrs)
icpus.append(icpu_attrs)
icpu_attrs = {}
@ -230,22 +253,21 @@ class NodeOperator(object):
if socket_id not in sockets:
sockets.append(socket_id)
attrs = {
'numa_node': socket_id,
'capabilities': {},
}
'numa_node': socket_id,
'capabilities': {},
}
inumas.append(attrs)
for core_id in range(n_cores):
self.topology[socket_id][core_id] = {}
for thread_id in range(n_threads):
self.topology[socket_id][core_id][thread_id] = 0
attrs = {
'cpu': cpu,
'numa_node': socket_id,
'core': core_id,
'thread': thread_id,
'capabilities': {},
}
'cpu': cpu,
'numa_node': socket_id,
'core': core_id,
'thread': thread_id,
'capabilities': {},
}
icpus.append(attrs)
# Define Thread-Socket-Core order for logical cpu enumeration
@ -256,19 +278,18 @@ class NodeOperator(object):
if socket_id not in sockets:
sockets.append(socket_id)
attrs = {
'numa_node': socket_id,
'capabilities': {},
}
'numa_node': socket_id,
'capabilities': {},
}
inumas.append(attrs)
self.topology[socket_id][core_id][thread_id] = cpu
attrs = {
'cpu': cpu,
'numa_node': socket_id,
'core': core_id,
'thread': thread_id,
'capabilities': {},
}
'cpu': cpu,
'numa_node': socket_id,
'core': core_id,
'thread': thread_id,
'capabilities': {},
}
icpus.append(attrs)
cpu += 1
self.num_nodes = len(self.topology.keys())
@ -281,26 +302,6 @@ class NodeOperator(object):
return [name for name in listdir(dir)
if os.path.isdir(join(dir, name))]
def _set_default_vswitch_hugesize(self):
"""
Set the default memory size for vswitch hugepages when it must fallback
to 2MB pages because there are no 1GB pages. In a virtual environment
we set a smaller amount of memory because vswitch is configured to use
a smaller mbuf pool. In non-virtual environments we use the same
amount of memory as we would if 1GB pages were available.
"""
hugepage_size = 2
if utils.is_virtual():
vswitch_hugepages_nr = VSWITCH_VIRTUAL_MEMORY_MB / hugepage_size
else:
vswitch_hugepages_nr = VSWITCH_REAL_MEMORY_MB / hugepage_size
# Create a new set of dict attributes
hp_attr = {'vswitch_hugepages_size_mib': hugepage_size,
'vswitch_hugepages_nr': vswitch_hugepages_nr,
'vswitch_hugepages_avail': 0}
return hp_attr
def _inode_get_memory_hugepages(self):
"""Collect hugepage info, including vswitch, and vm.
Collect platform reserved if config.
@ -309,12 +310,6 @@ class NodeOperator(object):
"""
imemory = []
Ki = 1024
SZ_2M_Ki = 2048
SZ_1G_Ki = 1048576
controller_min_MB = 6000
compute_min_MB = 1600
compute_min_non0_MB = 500
initial_compute_config_completed = \
os.path.exists(tsc.INITIAL_COMPUTE_CONFIG_COMPLETE)
@ -331,8 +326,8 @@ class NodeOperator(object):
for node in range(self.num_nodes):
attr = {}
Total_HP_MiB = 0 # Total memory (MiB) currently configured in HPs
Free_HP_MiB = 0
total_hp_mb = 0 # Total memory (MB) currently configured in HPs
free_hp_mb = 0
# Check vswitch and libvirt memory
# Loop through configured hugepage sizes of this node and record
@ -345,13 +340,10 @@ class NodeOperator(object):
for subdir in subdirs:
hp_attr = {}
sizesplit = subdir.split('-')
# role via size; also from /etc/nova/compute_reserved.conf
if sizesplit[1].startswith("1048576kB"):
hugepages_role = "vswitch"
size = int(SZ_1G_Ki / Ki)
size = SIZE_1G_MB
else:
hugepages_role = "vm"
size = int(SZ_2M_Ki / Ki)
size = SIZE_2M_MB
nr_hugepages = 0
free_hugepages = 0
@ -367,33 +359,40 @@ class NodeOperator(object):
if file.startswith("free_hugepages"):
free_hugepages = int(f.readline())
Total_HP_MiB = Total_HP_MiB + int(nr_hugepages * size)
Free_HP_MiB = Free_HP_MiB + int(free_hugepages * size)
total_hp_mb = total_hp_mb + int(nr_hugepages * size)
free_hp_mb = free_hp_mb + int(free_hugepages * size)
# Libvirt hugepages can now be 1G and 2M, can't only look
# at 2M pages
if hugepages_role == "vswitch":
vswitch_hugepages_nr = VSWITCH_REAL_MEMORY_MB / size
# Libvirt hugepages can be 1G and 2M
if size == SIZE_1G_MB:
vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size
hp_attr = {
'vswitch_hugepages_size_mib': size,
'vswitch_hugepages_nr': vswitch_hugepages_nr,
'vswitch_hugepages_avail': 0,
'vm_hugepages_nr_1G':
(nr_hugepages - vswitch_hugepages_nr),
'vm_hugepages_avail_1G': free_hugepages,
'vm_hugepages_use_1G': 'True'
}
'vswitch_hugepages_size_mib': size,
'vswitch_hugepages_nr': vswitch_hugepages_nr,
'vswitch_hugepages_avail': 0,
'vm_hugepages_nr_1G':
(nr_hugepages - vswitch_hugepages_nr),
'vm_hugepages_avail_1G': free_hugepages,
'vm_hugepages_use_1G': 'True'
}
else:
if len(subdirs) == 1:
hp_attr = self._set_default_vswitch_hugesize()
# No 1G hugepage support.
vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size
hp_attr = {
'vswitch_hugepages_size_mib': size,
'vswitch_hugepages_nr': vswitch_hugepages_nr,
'vswitch_hugepages_avail': 0,
}
hp_attr.update({'vm_hugepages_use_1G': 'False'})
else:
# vswitch will use 1G hugpages
vswitch_hugepages_nr = 0
vswitch_hugepages_nr = hp_attr.get('vswitch_hugepages_nr', 0)
hp_attr.update({
'vm_hugepages_avail_2M': free_hugepages,
'vm_hugepages_nr_2M':
(nr_hugepages - vswitch_hugepages_nr)
})
})
attr.update(hp_attr)
@ -402,19 +401,19 @@ class NodeOperator(object):
pass
# Get the free and total memory from meminfo for this node
re_node_MemTotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)')
re_node_MemFree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)')
re_node_FilePages = \
re_node_memtotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)')
re_node_memfree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)')
re_node_filepages = \
re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)')
re_node_SReclaim = \
re_node_sreclaim = \
re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)')
re_node_CommitLimit = \
re_node_commitlimit = \
re.compile(r'^Node\s+\d+\s+\CommitLimit:\s+(\d+)')
re_node_Committed_AS = \
re_node_committed_as = \
re.compile(r'^Node\s+\d+\s+\'Committed_AS:\s+(\d+)')
Free_KiB = 0 # Free Memory (KiB) available
Total_KiB = 0 # Total Memory (KiB)
free_kb = 0 # Free Memory (KB) available
total_kb = 0 # Total Memory (KB)
limit = 0 # only used in strict accounting
committed = 0 # only used in strict accounting
@ -422,40 +421,40 @@ class NodeOperator(object):
try:
with open(meminfo, 'r') as infile:
for line in infile:
match = re_node_MemTotal.search(line)
match = re_node_memtotal.search(line)
if match:
Total_KiB += int(match.group(1))
total_kb += int(match.group(1))
continue
match = re_node_MemFree.search(line)
match = re_node_memfree.search(line)
if match:
Free_KiB += int(match.group(1))
free_kb += int(match.group(1))
continue
match = re_node_FilePages.search(line)
match = re_node_filepages.search(line)
if match:
Free_KiB += int(match.group(1))
free_kb += int(match.group(1))
continue
match = re_node_SReclaim.search(line)
match = re_node_sreclaim.search(line)
if match:
Free_KiB += int(match.group(1))
free_kb += int(match.group(1))
continue
match = re_node_CommitLimit.search(line)
match = re_node_commitlimit.search(line)
if match:
limit = int(match.group(1))
continue
match = re_node_Committed_AS.search(line)
match = re_node_committed_as.search(line)
if match:
committed = int(match.group(1))
continue
if self._is_strict():
Free_KiB = limit - committed
free_kb = limit - committed
except IOError:
# silently ignore IO errors (eg. file missing)
pass
# Calculate PSS
Pss_MiB = 0
pss_mb = 0
if node == 0:
cmd = 'cat /proc/*/smaps 2>/dev/null | awk \'/^Pss:/ ' \
'{a += $2;} END {printf "%d\\n", a/1024.0;}\''
@ -463,7 +462,7 @@ class NodeOperator(object):
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
shell=True)
result = proc.stdout.read().strip()
Pss_MiB = int(result)
pss_mb = int(result)
except subprocess.CalledProcessError as e:
LOG.error("Cannot calculate PSS (%s) (%d)", cmd,
e.returncode)
@ -471,11 +470,11 @@ class NodeOperator(object):
LOG.error("Failed to execute (%s) OS error (%d)", cmd,
e.errno)
# need to multiply Total_MiB by 1024 to match compute_huge
node_total_kib = Total_HP_MiB * Ki + Free_KiB + Pss_MiB * Ki
# need to multiply total_mb by 1024 to match compute_huge
node_total_kb = total_hp_mb * SIZE_KB + free_kb + pss_mb * SIZE_KB
# Read base memory from compute_reserved.conf
base_mem_MiB = 0
base_mem_mb = 0
with open('/etc/nova/compute_reserved.conf', 'r') as infile:
for line in infile:
if "COMPUTE_BASE_RESERVED" in line:
@ -484,51 +483,51 @@ class NodeOperator(object):
for reserve in base_reserves.split():
reserve = reserve.split(":")
if reserve[0].strip('"') == "node%d" % node:
base_mem_MiB = int(reserve[1].strip('MB'))
base_mem_mb = int(reserve[1].strip('MB'))
# On small systems, clip memory overhead to more reasonable minimal
# settings
if (Total_KiB / Ki - base_mem_MiB) < 1000:
if (total_kb / SIZE_KB - base_mem_mb) < 1000:
if node == 0:
base_mem_MiB = compute_min_MB
base_mem_mb = COMPUTE_MIN_MB
if tsc.nodetype == 'controller':
base_mem_MiB += controller_min_MB
base_mem_mb += CONTROLLER_MIN_MB
else:
base_mem_MiB = compute_min_non0_MB
base_mem_mb = COMPUTE_MIN_NON_0_MB
Eng_KiB = node_total_kib - base_mem_MiB * Ki
eng_kb = node_total_kb - base_mem_mb * SIZE_KB
vswitch_mem_kib = (attr.get('vswitch_hugepages_size_mib', 0) *
attr.get('vswitch_hugepages_nr', 0) * Ki)
vswitch_mem_kb = (attr.get('vswitch_hugepages_size_mib', 0) *
attr.get('vswitch_hugepages_nr', 0) * SIZE_KB)
VM_KiB = (Eng_KiB - vswitch_mem_kib)
vm_kb = (eng_kb - vswitch_mem_kb)
max_vm_pages_2M = VM_KiB / SZ_2M_Ki
max_vm_pages_1G = VM_KiB / SZ_1G_Ki
max_vm_pages_2mb = vm_kb / SIZE_2M_KB
max_vm_pages_1gb = vm_kb / SIZE_1G_KB
attr.update({
'vm_hugepages_possible_2M': max_vm_pages_2M,
'vm_hugepages_possible_1G': max_vm_pages_1G,
'vm_hugepages_possible_2M': max_vm_pages_2mb,
'vm_hugepages_possible_1G': max_vm_pages_1gb,
})
# calculate 100% 2M pages if it is initial report and the huge
# pages have not been allocated
if initial_report:
Total_HP_MiB += int(max_vm_pages_2M * (SZ_2M_Ki / Ki))
Free_HP_MiB = Total_HP_MiB
total_hp_mb += int(max_vm_pages_2mb * (SIZE_2M_KB / SIZE_KB))
free_hp_mb = total_hp_mb
attr.update({
'vm_hugepages_nr_2M': max_vm_pages_2M,
'vm_hugepages_avail_2M': max_vm_pages_2M,
'vm_hugepages_nr_2M': max_vm_pages_2mb,
'vm_hugepages_avail_2M': max_vm_pages_2mb,
'vm_hugepages_nr_1G': 0
})
attr.update({
'numa_node': node,
'memtotal_mib': Total_HP_MiB,
'memavail_mib': Free_HP_MiB,
'memtotal_mib': total_hp_mb,
'memavail_mib': free_hp_mb,
'hugepages_configured': 'True',
'node_memtotal_mib': node_total_kib / 1024,
})
'node_memtotal_mib': node_total_kb / 1024,
})
imemory.append(attr)
@ -541,53 +540,53 @@ class NodeOperator(object):
'''
imemory = []
self.total_memory_MiB = 0
self.total_memory_mb = 0
re_node_MemTotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)')
re_node_MemFree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)')
re_node_FilePages = re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)')
re_node_SReclaim = re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)')
re_node_memtotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)')
re_node_memfree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)')
re_node_filepages = re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)')
re_node_sreclaim = re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)')
for node in range(self.num_nodes):
attr = {}
Total_MiB = 0
Free_MiB = 0
total_mb = 0
free_mb = 0
meminfo = "/sys/devices/system/node/node%d/meminfo" % node
try:
with open(meminfo, 'r') as infile:
for line in infile:
match = re_node_MemTotal.search(line)
match = re_node_memtotal.search(line)
if match:
Total_MiB += int(match.group(1))
total_mb += int(match.group(1))
continue
match = re_node_MemFree.search(line)
match = re_node_memfree.search(line)
if match:
Free_MiB += int(match.group(1))
free_mb += int(match.group(1))
continue
match = re_node_FilePages.search(line)
match = re_node_filepages.search(line)
if match:
Free_MiB += int(match.group(1))
free_mb += int(match.group(1))
continue
match = re_node_SReclaim.search(line)
match = re_node_sreclaim.search(line)
if match:
Free_MiB += int(match.group(1))
free_mb += int(match.group(1))
continue
except IOError:
# silently ignore IO errors (eg. file missing)
pass
Total_MiB /= 1024
Free_MiB /= 1024
self.total_memory_nodes_MiB.append(Total_MiB)
total_mb /= 1024
free_mb /= 1024
self.total_memory_nodes_mb.append(total_mb)
attr = {
'numa_node': node,
'memtotal_mib': Total_MiB,
'memavail_mib': Free_MiB,
'hugepages_configured': 'False',
}
'numa_node': node,
'memtotal_mib': total_mb,
'memavail_mib': free_mb,
'hugepages_configured': 'False',
}
imemory.append(attr)