From a7cdb4ce95113c74d12d4581c676faa06d1c3112 Mon Sep 17 00:00:00 2001 From: Steven Webster Date: Mon, 15 Oct 2018 13:58:36 -0400 Subject: [PATCH] OVS: fix memory pool allocation for virtual environment Cherry-pick to r/2018.10 branch of commit bcc89e5 This commit increases the vswitch hugepage number for virtual environments from 512 to 1024, making it equal to the same amount used for non-virtual environments. An issue was seen after the da1110a commit to enable LLDP over OVS, in which puppet would fail to successfully add ports to OVS. The issue would have manifested previously not as a puppet error, but as a failure to communicate over the data ports of some virtual compute nodes. The issue is a failure of DPDK to be able to find a contiguous mempool of sufficient size in any of the hugepages, which can happen in a virtual environment restricted to a 2M hugepage size. Since 1G and 2M pages can be used for both vswitch and vm purposes, the concept of a hugepage role is removed. Finally, the code has had some cleanup to separate out constants and make variable names more pythonic. Unit identifiers have been made consistent for readability and to prevent confusion. Change-Id: I14550526deddfaf13284d9273397a00b80eb8527 Closes-Bug: #1796380 Signed-off-by: Steven Webster --- sysinv/sysinv/sysinv/sysinv/agent/node.py | 333 +++++++++++----------- 1 file changed, 166 insertions(+), 167 deletions(-) diff --git a/sysinv/sysinv/sysinv/sysinv/agent/node.py b/sysinv/sysinv/sysinv/sysinv/agent/node.py index c4e459bc87..032923ff0e 100644 --- a/sysinv/sysinv/sysinv/sysinv/agent/node.py +++ b/sysinv/sysinv/sysinv/sysinv/agent/node.py @@ -17,16 +17,38 @@ from os.path import isfile, join import re import subprocess -from sysinv.common import utils from sysinv.openstack.common import log as logging import tsconfig.tsconfig as tsc LOG = logging.getLogger(__name__) -# Defines per-socket vswitch memory requirements (in MB) for both real and -# virtual deployments -VSWITCH_REAL_MEMORY_MB = 1024 -VSWITCH_VIRTUAL_MEMORY_MB = 512 +# Defines per-socket vswitch memory requirements (in MB) +VSWITCH_MEMORY_MB = 1024 + +# Defines the size of one kilobyte +SIZE_KB = 1024 + +# Defines the size of 2 megabytes in kilobyte units +SIZE_2M_KB = 2048 + +# Defines the size of 1 gigabyte in kilobyte units +SIZE_1G_KB = 1048576 + +# Defines the size of 2 megabytes in megabyte units +SIZE_2M_MB = int(SIZE_2M_KB / SIZE_KB) + +# Defines the size of 1 gigabyte in megabyte units +SIZE_1G_MB = int(SIZE_1G_KB / SIZE_KB) + +# Defines the minimum size of memory for a controller node in megabyte units +CONTROLLER_MIN_MB = 6000 + +# Defines the minimum size of memory for a compute node in megabyte units +COMPUTE_MIN_MB = 1600 + +# Defines the minimum size of memory for a secondary compute node in megabyte +# units +COMPUTE_MIN_NON_0_MB = 500 class CPU: @@ -73,17 +95,17 @@ class NodeOperator(object): self.num_cpus = 0 self.num_nodes = 0 self.float_cpuset = 0 - self.total_memory_MiB = 0 - self.free_memory_MiB = 0 - self.total_memory_nodes_MiB = [] - self.free_memory_nodes_MiB = [] + self.total_memory_mb = 0 + self.free_memory_mb = 0 + self.total_memory_nodes_mb = [] + self.free_memory_nodes_mb = [] self.topology = {} # self._get_cpu_topology() - # self._get_total_memory_MiB() - # self._get_total_memory_nodes_MiB() - # self._get_free_memory_MiB() - # self._get_free_memory_nodes_MiB() + # self._get_total_memory_mb() + # self._get_total_memory_nodes_mb() + # self._get_free_memory_mb() + # self._get_free_memory_nodes_mb() def _is_strict(self): with open(os.devnull, "w") as fnull: @@ -139,7 +161,7 @@ class NodeOperator(object): self.num_nodes = 0 self.topology = {} - Thread_cnt = {} + thread_cnt = {} cpu = socket_id = core_id = thread_id = -1 re_processor = re.compile(r'^[Pp]rocessor\s+:\s+(\d+)') re_socket = re.compile(r'^physical id\s+:\s+(\d+)') @@ -184,9 +206,9 @@ class NodeOperator(object): if socket_id not in sockets: sockets.append(socket_id) attrs = { - 'numa_node': socket_id, - 'capabilities': {}, - } + 'numa_node': socket_id, + 'capabilities': {}, + } inumas.append(attrs) continue @@ -194,13 +216,13 @@ class NodeOperator(object): if match: core_id = int(match.group(1)) - if socket_id not in Thread_cnt: - Thread_cnt[socket_id] = {} - if core_id not in Thread_cnt[socket_id]: - Thread_cnt[socket_id][core_id] = 0 + if socket_id not in thread_cnt: + thread_cnt[socket_id] = {} + if core_id not in thread_cnt[socket_id]: + thread_cnt[socket_id][core_id] = 0 else: - Thread_cnt[socket_id][core_id] += 1 - thread_id = Thread_cnt[socket_id][core_id] + thread_cnt[socket_id][core_id] += 1 + thread_id = thread_cnt[socket_id][core_id] if socket_id not in self.topology: self.topology[socket_id] = {} @@ -208,12 +230,13 @@ class NodeOperator(object): self.topology[socket_id][core_id] = {} self.topology[socket_id][core_id][thread_id] = cpu - attrs = {'cpu': cpu, - 'numa_node': socket_id, - 'core': core_id, - 'thread': thread_id, - 'capabilities': {}, - } + attrs = { + 'cpu': cpu, + 'numa_node': socket_id, + 'core': core_id, + 'thread': thread_id, + 'capabilities': {}, + } icpu_attrs.update(attrs) icpus.append(icpu_attrs) icpu_attrs = {} @@ -230,22 +253,21 @@ class NodeOperator(object): if socket_id not in sockets: sockets.append(socket_id) attrs = { - 'numa_node': socket_id, - 'capabilities': {}, - } + 'numa_node': socket_id, + 'capabilities': {}, + } inumas.append(attrs) for core_id in range(n_cores): self.topology[socket_id][core_id] = {} for thread_id in range(n_threads): self.topology[socket_id][core_id][thread_id] = 0 attrs = { - 'cpu': cpu, - 'numa_node': socket_id, - 'core': core_id, - 'thread': thread_id, - 'capabilities': {}, - - } + 'cpu': cpu, + 'numa_node': socket_id, + 'core': core_id, + 'thread': thread_id, + 'capabilities': {}, + } icpus.append(attrs) # Define Thread-Socket-Core order for logical cpu enumeration @@ -256,19 +278,18 @@ class NodeOperator(object): if socket_id not in sockets: sockets.append(socket_id) attrs = { - 'numa_node': socket_id, - 'capabilities': {}, - } + 'numa_node': socket_id, + 'capabilities': {}, + } inumas.append(attrs) self.topology[socket_id][core_id][thread_id] = cpu attrs = { - 'cpu': cpu, - 'numa_node': socket_id, - 'core': core_id, - 'thread': thread_id, - 'capabilities': {}, - - } + 'cpu': cpu, + 'numa_node': socket_id, + 'core': core_id, + 'thread': thread_id, + 'capabilities': {}, + } icpus.append(attrs) cpu += 1 self.num_nodes = len(self.topology.keys()) @@ -281,26 +302,6 @@ class NodeOperator(object): return [name for name in listdir(dir) if os.path.isdir(join(dir, name))] - def _set_default_vswitch_hugesize(self): - """ - Set the default memory size for vswitch hugepages when it must fallback - to 2MB pages because there are no 1GB pages. In a virtual environment - we set a smaller amount of memory because vswitch is configured to use - a smaller mbuf pool. In non-virtual environments we use the same - amount of memory as we would if 1GB pages were available. - """ - hugepage_size = 2 - if utils.is_virtual(): - vswitch_hugepages_nr = VSWITCH_VIRTUAL_MEMORY_MB / hugepage_size - else: - vswitch_hugepages_nr = VSWITCH_REAL_MEMORY_MB / hugepage_size - - # Create a new set of dict attributes - hp_attr = {'vswitch_hugepages_size_mib': hugepage_size, - 'vswitch_hugepages_nr': vswitch_hugepages_nr, - 'vswitch_hugepages_avail': 0} - return hp_attr - def _inode_get_memory_hugepages(self): """Collect hugepage info, including vswitch, and vm. Collect platform reserved if config. @@ -309,12 +310,6 @@ class NodeOperator(object): """ imemory = [] - Ki = 1024 - SZ_2M_Ki = 2048 - SZ_1G_Ki = 1048576 - controller_min_MB = 6000 - compute_min_MB = 1600 - compute_min_non0_MB = 500 initial_compute_config_completed = \ os.path.exists(tsc.INITIAL_COMPUTE_CONFIG_COMPLETE) @@ -331,8 +326,8 @@ class NodeOperator(object): for node in range(self.num_nodes): attr = {} - Total_HP_MiB = 0 # Total memory (MiB) currently configured in HPs - Free_HP_MiB = 0 + total_hp_mb = 0 # Total memory (MB) currently configured in HPs + free_hp_mb = 0 # Check vswitch and libvirt memory # Loop through configured hugepage sizes of this node and record @@ -345,13 +340,10 @@ class NodeOperator(object): for subdir in subdirs: hp_attr = {} sizesplit = subdir.split('-') - # role via size; also from /etc/nova/compute_reserved.conf if sizesplit[1].startswith("1048576kB"): - hugepages_role = "vswitch" - size = int(SZ_1G_Ki / Ki) + size = SIZE_1G_MB else: - hugepages_role = "vm" - size = int(SZ_2M_Ki / Ki) + size = SIZE_2M_MB nr_hugepages = 0 free_hugepages = 0 @@ -367,33 +359,40 @@ class NodeOperator(object): if file.startswith("free_hugepages"): free_hugepages = int(f.readline()) - Total_HP_MiB = Total_HP_MiB + int(nr_hugepages * size) - Free_HP_MiB = Free_HP_MiB + int(free_hugepages * size) + total_hp_mb = total_hp_mb + int(nr_hugepages * size) + free_hp_mb = free_hp_mb + int(free_hugepages * size) - # Libvirt hugepages can now be 1G and 2M, can't only look - # at 2M pages - if hugepages_role == "vswitch": - vswitch_hugepages_nr = VSWITCH_REAL_MEMORY_MB / size + # Libvirt hugepages can be 1G and 2M + if size == SIZE_1G_MB: + vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size hp_attr = { - 'vswitch_hugepages_size_mib': size, - 'vswitch_hugepages_nr': vswitch_hugepages_nr, - 'vswitch_hugepages_avail': 0, - 'vm_hugepages_nr_1G': - (nr_hugepages - vswitch_hugepages_nr), - 'vm_hugepages_avail_1G': free_hugepages, - 'vm_hugepages_use_1G': 'True' - } + 'vswitch_hugepages_size_mib': size, + 'vswitch_hugepages_nr': vswitch_hugepages_nr, + 'vswitch_hugepages_avail': 0, + 'vm_hugepages_nr_1G': + (nr_hugepages - vswitch_hugepages_nr), + 'vm_hugepages_avail_1G': free_hugepages, + 'vm_hugepages_use_1G': 'True' + } else: if len(subdirs) == 1: - hp_attr = self._set_default_vswitch_hugesize() + # No 1G hugepage support. + vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size + hp_attr = { + 'vswitch_hugepages_size_mib': size, + 'vswitch_hugepages_nr': vswitch_hugepages_nr, + 'vswitch_hugepages_avail': 0, + } hp_attr.update({'vm_hugepages_use_1G': 'False'}) + else: + # vswitch will use 1G hugpages + vswitch_hugepages_nr = 0 - vswitch_hugepages_nr = hp_attr.get('vswitch_hugepages_nr', 0) hp_attr.update({ 'vm_hugepages_avail_2M': free_hugepages, 'vm_hugepages_nr_2M': (nr_hugepages - vswitch_hugepages_nr) - }) + }) attr.update(hp_attr) @@ -402,19 +401,19 @@ class NodeOperator(object): pass # Get the free and total memory from meminfo for this node - re_node_MemTotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)') - re_node_MemFree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)') - re_node_FilePages = \ + re_node_memtotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)') + re_node_memfree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)') + re_node_filepages = \ re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)') - re_node_SReclaim = \ + re_node_sreclaim = \ re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)') - re_node_CommitLimit = \ + re_node_commitlimit = \ re.compile(r'^Node\s+\d+\s+\CommitLimit:\s+(\d+)') - re_node_Committed_AS = \ + re_node_committed_as = \ re.compile(r'^Node\s+\d+\s+\'Committed_AS:\s+(\d+)') - Free_KiB = 0 # Free Memory (KiB) available - Total_KiB = 0 # Total Memory (KiB) + free_kb = 0 # Free Memory (KB) available + total_kb = 0 # Total Memory (KB) limit = 0 # only used in strict accounting committed = 0 # only used in strict accounting @@ -422,40 +421,40 @@ class NodeOperator(object): try: with open(meminfo, 'r') as infile: for line in infile: - match = re_node_MemTotal.search(line) + match = re_node_memtotal.search(line) if match: - Total_KiB += int(match.group(1)) + total_kb += int(match.group(1)) continue - match = re_node_MemFree.search(line) + match = re_node_memfree.search(line) if match: - Free_KiB += int(match.group(1)) + free_kb += int(match.group(1)) continue - match = re_node_FilePages.search(line) + match = re_node_filepages.search(line) if match: - Free_KiB += int(match.group(1)) + free_kb += int(match.group(1)) continue - match = re_node_SReclaim.search(line) + match = re_node_sreclaim.search(line) if match: - Free_KiB += int(match.group(1)) + free_kb += int(match.group(1)) continue - match = re_node_CommitLimit.search(line) + match = re_node_commitlimit.search(line) if match: limit = int(match.group(1)) continue - match = re_node_Committed_AS.search(line) + match = re_node_committed_as.search(line) if match: committed = int(match.group(1)) continue if self._is_strict(): - Free_KiB = limit - committed + free_kb = limit - committed except IOError: # silently ignore IO errors (eg. file missing) pass # Calculate PSS - Pss_MiB = 0 + pss_mb = 0 if node == 0: cmd = 'cat /proc/*/smaps 2>/dev/null | awk \'/^Pss:/ ' \ '{a += $2;} END {printf "%d\\n", a/1024.0;}\'' @@ -463,7 +462,7 @@ class NodeOperator(object): proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) result = proc.stdout.read().strip() - Pss_MiB = int(result) + pss_mb = int(result) except subprocess.CalledProcessError as e: LOG.error("Cannot calculate PSS (%s) (%d)", cmd, e.returncode) @@ -471,11 +470,11 @@ class NodeOperator(object): LOG.error("Failed to execute (%s) OS error (%d)", cmd, e.errno) - # need to multiply Total_MiB by 1024 to match compute_huge - node_total_kib = Total_HP_MiB * Ki + Free_KiB + Pss_MiB * Ki + # need to multiply total_mb by 1024 to match compute_huge + node_total_kb = total_hp_mb * SIZE_KB + free_kb + pss_mb * SIZE_KB # Read base memory from compute_reserved.conf - base_mem_MiB = 0 + base_mem_mb = 0 with open('/etc/nova/compute_reserved.conf', 'r') as infile: for line in infile: if "COMPUTE_BASE_RESERVED" in line: @@ -484,51 +483,51 @@ class NodeOperator(object): for reserve in base_reserves.split(): reserve = reserve.split(":") if reserve[0].strip('"') == "node%d" % node: - base_mem_MiB = int(reserve[1].strip('MB')) + base_mem_mb = int(reserve[1].strip('MB')) # On small systems, clip memory overhead to more reasonable minimal # settings - if (Total_KiB / Ki - base_mem_MiB) < 1000: + if (total_kb / SIZE_KB - base_mem_mb) < 1000: if node == 0: - base_mem_MiB = compute_min_MB + base_mem_mb = COMPUTE_MIN_MB if tsc.nodetype == 'controller': - base_mem_MiB += controller_min_MB + base_mem_mb += CONTROLLER_MIN_MB else: - base_mem_MiB = compute_min_non0_MB + base_mem_mb = COMPUTE_MIN_NON_0_MB - Eng_KiB = node_total_kib - base_mem_MiB * Ki + eng_kb = node_total_kb - base_mem_mb * SIZE_KB - vswitch_mem_kib = (attr.get('vswitch_hugepages_size_mib', 0) * - attr.get('vswitch_hugepages_nr', 0) * Ki) + vswitch_mem_kb = (attr.get('vswitch_hugepages_size_mib', 0) * + attr.get('vswitch_hugepages_nr', 0) * SIZE_KB) - VM_KiB = (Eng_KiB - vswitch_mem_kib) + vm_kb = (eng_kb - vswitch_mem_kb) - max_vm_pages_2M = VM_KiB / SZ_2M_Ki - max_vm_pages_1G = VM_KiB / SZ_1G_Ki + max_vm_pages_2mb = vm_kb / SIZE_2M_KB + max_vm_pages_1gb = vm_kb / SIZE_1G_KB attr.update({ - 'vm_hugepages_possible_2M': max_vm_pages_2M, - 'vm_hugepages_possible_1G': max_vm_pages_1G, + 'vm_hugepages_possible_2M': max_vm_pages_2mb, + 'vm_hugepages_possible_1G': max_vm_pages_1gb, }) # calculate 100% 2M pages if it is initial report and the huge # pages have not been allocated if initial_report: - Total_HP_MiB += int(max_vm_pages_2M * (SZ_2M_Ki / Ki)) - Free_HP_MiB = Total_HP_MiB + total_hp_mb += int(max_vm_pages_2mb * (SIZE_2M_KB / SIZE_KB)) + free_hp_mb = total_hp_mb attr.update({ - 'vm_hugepages_nr_2M': max_vm_pages_2M, - 'vm_hugepages_avail_2M': max_vm_pages_2M, + 'vm_hugepages_nr_2M': max_vm_pages_2mb, + 'vm_hugepages_avail_2M': max_vm_pages_2mb, 'vm_hugepages_nr_1G': 0 }) attr.update({ 'numa_node': node, - 'memtotal_mib': Total_HP_MiB, - 'memavail_mib': Free_HP_MiB, + 'memtotal_mib': total_hp_mb, + 'memavail_mib': free_hp_mb, 'hugepages_configured': 'True', - 'node_memtotal_mib': node_total_kib / 1024, - }) + 'node_memtotal_mib': node_total_kb / 1024, + }) imemory.append(attr) @@ -541,53 +540,53 @@ class NodeOperator(object): ''' imemory = [] - self.total_memory_MiB = 0 + self.total_memory_mb = 0 - re_node_MemTotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)') - re_node_MemFree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)') - re_node_FilePages = re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)') - re_node_SReclaim = re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)') + re_node_memtotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)') + re_node_memfree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)') + re_node_filepages = re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)') + re_node_sreclaim = re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)') for node in range(self.num_nodes): attr = {} - Total_MiB = 0 - Free_MiB = 0 + total_mb = 0 + free_mb = 0 meminfo = "/sys/devices/system/node/node%d/meminfo" % node try: with open(meminfo, 'r') as infile: for line in infile: - match = re_node_MemTotal.search(line) + match = re_node_memtotal.search(line) if match: - Total_MiB += int(match.group(1)) + total_mb += int(match.group(1)) continue - match = re_node_MemFree.search(line) + match = re_node_memfree.search(line) if match: - Free_MiB += int(match.group(1)) + free_mb += int(match.group(1)) continue - match = re_node_FilePages.search(line) + match = re_node_filepages.search(line) if match: - Free_MiB += int(match.group(1)) + free_mb += int(match.group(1)) continue - match = re_node_SReclaim.search(line) + match = re_node_sreclaim.search(line) if match: - Free_MiB += int(match.group(1)) + free_mb += int(match.group(1)) continue except IOError: # silently ignore IO errors (eg. file missing) pass - Total_MiB /= 1024 - Free_MiB /= 1024 - self.total_memory_nodes_MiB.append(Total_MiB) + total_mb /= 1024 + free_mb /= 1024 + self.total_memory_nodes_mb.append(total_mb) attr = { - 'numa_node': node, - 'memtotal_mib': Total_MiB, - 'memavail_mib': Free_MiB, - 'hugepages_configured': 'False', - } + 'numa_node': node, + 'memtotal_mib': total_mb, + 'memavail_mib': free_mb, + 'hugepages_configured': 'False', + } imemory.append(attr)