VMware: fix memory stats

The total memory for the vCenter cluster managed by Nova should be the aggregated sum of total memory of each ESX host in the cluster. This is more accurate than using the available memory of the resource pool associated to the cluster. Partial-Bug: #1462957 Change-Id: I030cee9cebb0f030361aa6bbb612da5cd4202a7f
2017-10-31 11:22:20 +02:00 · 2017-10-31 11:22:20 +02:00 · 93bd310b91
parent 32c8ac6b7d
commit 93bd310b91
5 changed files with 51 additions and 36 deletions
--- a/nova/tests/unit/virt/vmwareapi/fake.py
+++ b/nova/tests/unit/virt/vmwareapi/fake.py
@ -772,6 +772,7 @@ class HostSystem(ManagedObject):
        self.set("capability.maxHostSupportedVcpus", 600)
        self.set("summary.hardware", hardware)
        self.set("summary.runtime", runtime)
+        self.set("summary.quickStats", quickstats)
        self.set("config.network.pnic", net_info_pnic)
        self.set("connected", connected)

--- a/nova/tests/unit/virt/vmwareapi/test_driver_api.py
+++ b/nova/tests/unit/virt/vmwareapi/test_driver_api.py
@ -2113,8 +2113,8 @@ class VMwareAPIVMTestCase(test.NoDBTestCase,
        self.assertEqual(32, stats['vcpus'])
        self.assertEqual(1024, stats['local_gb'])
        self.assertEqual(1024 - 500, stats['local_gb_used'])
-        self.assertEqual(1000, stats['memory_mb'])
-        self.assertEqual(500, stats['memory_mb_used'])
+        self.assertEqual(2048, stats['memory_mb'])
+        self.assertEqual(1000, stats['memory_mb_used'])
        self.assertEqual('VMware vCenter Server', stats['hypervisor_type'])
        self.assertEqual(5001000, stats['hypervisor_version'])
        self.assertEqual(self.node_name, stats['hypervisor_hostname'])
--- a/nova/tests/unit/virt/vmwareapi/test_vm_util.py
+++ b/nova/tests/unit/virt/vmwareapi/test_vm_util.py
@ -17,6 +17,7 @@
 import collections

 import mock
+from oslo_utils import units
 from oslo_utils import uuidutils
 from oslo_vmware import exceptions as vexc
 from oslo_vmware.objects import datastore as ds_obj
@ -68,21 +69,32 @@ class VMwareVMUtilTestCase(test.NoDBTestCase):
        hardware.numCpuThreads = 16
        hardware.vendor = "Intel"
        hardware.cpuModel = "Intel(R) Xeon(R)"
+        hardware.memorySize = 4 * units.Gi

        runtime_host_1 = fake.DataObject()
        runtime_host_1.connectionState = "connected"
        runtime_host_1.inMaintenanceMode = False

+        quickstats_1 = fake.DataObject()
+        quickstats_1.overallMemoryUsage = 512
+
+        quickstats_2 = fake.DataObject()
+        quickstats_2.overallMemoryUsage = 512
+
        runtime_host_2 = fake.DataObject()
        runtime_host_2.connectionState = connection_state
        runtime_host_2.inMaintenanceMode = maintenance_mode

-        prop_list_host_1 = [fake.Prop(name="hardware_summary", val=hardware),
-                            fake.Prop(name="runtime_summary",
-                                      val=runtime_host_1)]
-        prop_list_host_2 = [fake.Prop(name="hardware_summary", val=hardware),
-                            fake.Prop(name="runtime_summary",
-                                      val=runtime_host_2)]
+        prop_list_host_1 = [fake.Prop(name="summary.hardware", val=hardware),
+                            fake.Prop(name="summary.runtime",
+                                      val=runtime_host_1),
+                            fake.Prop(name="summary.quickStats",
+                                      val=quickstats_1)]
+        prop_list_host_2 = [fake.Prop(name="summary.hardware", val=hardware),
+                            fake.Prop(name="summary.runtime",
+                                      val=runtime_host_2),
+                            fake.Prop(name="summary.quickStats",
+                                      val=quickstats_2)]

        fake_objects = fake.FakeRetrieveResult()
        fake_objects.add_object(fake.ObjectContent("prop_list_host1",
@ -90,29 +102,25 @@ class VMwareVMUtilTestCase(test.NoDBTestCase):
        fake_objects.add_object(fake.ObjectContent("prop_list_host1",
                                                   prop_list_host_2))

-        respool_resource_usage = fake.DataObject()
-        respool_resource_usage.maxUsage = 5368709120
-        respool_resource_usage.overallUsage = 2147483648
-
        def fake_call_method(*args):
            if "get_object_properties_dict" in args:
                return prop_dict
            elif "get_properties_for_a_collection_of_objects" in args:
                return fake_objects
            else:
-                return respool_resource_usage
+                raise Exception('unexpected method call')

        session = fake.FakeSession()
        with mock.patch.object(session, '_call_method', fake_call_method):
            result = vm_util.get_stats_from_cluster(session, "cluster1")
-            mem_info = {}
            if connection_state == "connected" and not maintenance_mode:
-                vcpus = 32
+                num_hosts = 2
            else:
-                vcpus = 16
-            mem_info['total'] = 5120
-            mem_info['free'] = 3072
-            expected_stats = {'vcpus': vcpus, 'mem': mem_info}
+                num_hosts = 1
+            expected_stats = {'vcpus': num_hosts * 16,
+                              'mem': {'total': num_hosts * 4096,
+                                      'free': num_hosts * 4096 -
+                                              num_hosts * 512}}
            self.assertEqual(expected_stats, result)

    def test_get_stats_from_cluster_hosts_connected_and_active(self):
--- a/nova/virt/vmwareapi/vm_util.py
+++ b/nova/virt/vmwareapi/vm_util.py
@ -1159,7 +1159,8 @@ def get_vm_state(session, instance):
 def get_stats_from_cluster(session, cluster):
    """Get the aggregate resource stats of a cluster."""
    vcpus = 0
-    mem_info = {'total': 0, 'free': 0}
+    used_mem_mb = 0
+    total_mem_mb = 0
    # Get the Host and Resource Pool Managed Object Refs
    prop_dict = session._call_method(vutil,
                                     "get_object_properties_dict",
@ -1172,27 +1173,25 @@ def get_stats_from_cluster(session, cluster):
            result = session._call_method(vim_util,
                         "get_properties_for_a_collection_of_objects",
                         "HostSystem", host_mors,
-                         ["summary.hardware", "summary.runtime"])
+                         ["summary.hardware", "summary.runtime",
+                          "summary.quickStats"])
            for obj in result.objects:
-                hardware_summary = obj.propSet[0].val
-                runtime_summary = obj.propSet[1].val
+                host_props = propset_dict(obj.propSet)
+                hardware_summary = host_props['summary.hardware']
+                runtime_summary = host_props['summary.runtime']
+                stats_summary = host_props['summary.quickStats']
                if (runtime_summary.inMaintenanceMode is False and
                    runtime_summary.connectionState == "connected"):
                    # Total vcpus is the sum of all pCPUs of individual hosts
                    # The overcommitment ratio is factored in by the scheduler
-                    vcpus += hardware_summary.numCpuThreads
-
-        res_mor = prop_dict.get('resourcePool')
-        if res_mor:
-            res_usage = session._call_method(vutil, "get_object_property",
-                                             res_mor, "summary.runtime.memory")
-            if res_usage:
-                # maxUsage is the memory limit of the cluster available to VM's
-                mem_info['total'] = int(res_usage.maxUsage / units.Mi)
-                # overallUsage is the hypervisor's view of memory usage by VM's
-                consumed = int(res_usage.overallUsage / units.Mi)
-                mem_info['free'] = mem_info['total'] - consumed
-    stats = {'vcpus': vcpus, 'mem': mem_info}
+                    threads = hardware_summary.numCpuThreads
+                    vcpus += threads
+                    used_mem_mb += stats_summary.overallMemoryUsage
+                    mem_mb = hardware_summary.memorySize // units.Mi
+                    total_mem_mb += mem_mb
+    stats = {'vcpus': vcpus,
+             'mem': {'total': total_mem_mb,
+                     'free': total_mem_mb - used_mem_mb}}
    return stats


--- a/releasenotes/notes/vmware-mem-stats-a9b6fac815d2bc57.yaml
+++ b/releasenotes/notes/vmware-mem-stats-a9b6fac815d2bc57.yaml
@ -0,0 +1,7 @@
+---
+fixes:
+  - |
+    Fixes how memory stats are reported for VMware. The total memory for the
+    vCenter cluster managed by Nova should be the aggregated sum of total
+    memory of each ESX host in the cluster. This is more accurate than using
+    the available memory of the resource pool associated to the cluster.