From f7b17520a9d69b114c3baa425ce46d7c347e3171 Mon Sep 17 00:00:00 2001 From: Drew Thorstensen Date: Fri, 21 Aug 2015 09:59:35 -0400 Subject: [PATCH] Account for live migrated VMs In PowerVM, when a VM is migrated, the 'metrics come with it'. That means that the metrics are cumulative and you can be assured that when it is on a new host, its metrics are still valid. This presents some complications. The metrics are pulled from the host. To determine rates, we ask the host for the previous sample and the current. Previously we had assumed that if the previous sample was None, it must be a new VM so all the traffic from the current is 'new'. With live migration taken into account, we can not do that. There may be months of CPU, Network and Storage data in that VMs metrics. This really skews the metrics in undesirable ways. This change set will skip instances (for a given inspection cycle) that are in the current sample but not the previous. This does lose a sample cycle, but ensures that we maintain accurracy (which is more important). Change-Id: Iba7f1287b3433acd8517947e01cbfa14fe93413c --- .../compute/virt/powervm/inspector.py | 64 +++++++++++-------- .../compute/virt/powervm/test_inspector.py | 22 ++++--- 2 files changed, 50 insertions(+), 36 deletions(-) diff --git a/ceilometer_powervm/compute/virt/powervm/inspector.py b/ceilometer_powervm/compute/virt/powervm/inspector.py index 733306c..3a766c8 100644 --- a/ceilometer_powervm/compute/virt/powervm/inspector.py +++ b/ceilometer_powervm/compute/virt/powervm/inspector.py @@ -28,6 +28,7 @@ from pypowervm.wrappers import network as pvm_net from ceilometer.compute.virt import inspector as virt_inspector from ceilometer.i18n import _ +from ceilometer.i18n import _LW LOG = logging.getLogger(__name__) @@ -141,20 +142,27 @@ class PowerVMInspector(virt_inspector.Inspector): # Get the previous sample data if prev_metric is None: - # If there is no previous sample, we blanket these to zero. All - # the cycles that were used in the current sample are 'fully' - # utilized. - prev_util_cap = 0 - prev_util_uncap = 0 - prev_idle = 0 - prev_donated = 0 - prev_entitled = 0 - else: - prev_util_cap = prev_metric.processor.util_cap_proc_cycles - prev_util_uncap = prev_metric.processor.util_uncap_proc_cycles - prev_idle = prev_metric.processor.idle_proc_cycles - prev_donated = prev_metric.processor.donated_proc_cycles - prev_entitled = prev_metric.processor.entitled_proc_cycles + # If there is no previous sample, that is either a new VM or is + # a live migrated system. A live migrated system will pull all + # of its metrics with it. The issue with this is it could have + # CPU cycles for months of run time. So we can't really determine + # the CPU utilization within the last X seconds...because to THIS + # host its new (only in the cur_metric). So we error out, the + # inspector will use a debug message in the log. + LOG.warn(_LW("Unable to derive CPU Utilization for VM %s. It is " + "either a new VM or was recently migrated. It will " + "be collected in the next inspection cycle."), + instance.name) + message = (_("Unable to derive CPU Utilization for VM %s.") % + instance.name) + raise virt_inspector.InstanceNotFoundException(message) + + # Gather the previous metrics + prev_util_cap = prev_metric.processor.util_cap_proc_cycles + prev_util_uncap = prev_metric.processor.util_uncap_proc_cycles + prev_idle = prev_metric.processor.idle_proc_cycles + prev_donated = prev_metric.processor.donated_proc_cycles + prev_entitled = prev_metric.processor.entitled_proc_cycles # Utilization can be driven by multiple factors on PowerVM. # PowerVM has 'entitled' cycles. These are cycles that, if the VM @@ -299,7 +307,8 @@ class PowerVMInspector(virt_inspector.Inspector): # If there isn't network information, this is because the Virtual # I/O Metrics were turned off. Have to pass through this method. - if cur_metric.network is None: + if (cur_metric.network is None or prev_metric is None or + prev_metric.network is None): return # Get the network interfaces. A 'cna' is a Client VM's Network Adapter @@ -321,11 +330,7 @@ class PowerVMInspector(virt_inspector.Inspector): # Need to determine the time delta between the samples. This is # usually 30 seconds from the API, but the metrics will be specific. - # However, if there is no previous sample, then we have to estimate. - # Therefore, we estimate 15 seconds - half of the standard 30 seconds. - date_delta = ((cur_date - prev_date) if prev_date is not None else - datetime.timedelta(seconds=15)) - date_delta_num = float(date_delta.seconds) + date_delta_num = float((cur_date - prev_date).seconds) for metric_cna in cur_metric.network.cnas: # Get the mac, but if it isn't found, then move to the next. Might @@ -340,6 +345,10 @@ class PowerVMInspector(virt_inspector.Inspector): name=metric_cna.physical_location, mac=mac, fref=None, parameters=None) + # Note that here, the previous may be none. That simply indicates + # that the adapter was dynamically added to the VM before the + # previous collection. Not the migration scenario above. + # In this case, we can default the base to 0. prev = find_prev_net(metric_cna) rx_bytes_diff = (metric_cna.received_bytes - (0 if prev is None else prev.received_bytes)) @@ -449,16 +458,17 @@ class PowerVMInspector(virt_inspector.Inspector): # Loop through all the storage adapters for cur_adpt in cur_adpts: - prev_adpt = find_prev(cur_adpt) - if prev_adpt is None: - # Perhaps this is a new adapter, was recently LPM'd, etc... - # Don't assume 0, just skip until the next pull. - continue - # IOPs is the read/write counts of the current - prev divided by # second difference between the two, rounded to the integer. :-) cur_ops = cur_adpt.num_reads + cur_adpt.num_writes - prev_ops = prev_adpt.num_reads + prev_adpt.num_writes + + # The previous adapter may be None. This simply indicates that the + # adapter was added between the previous sample and this one. It + # does not indicate a live migrate scenario like noted above, as + # the VM itself hasn't moved. + prev_adpt = find_prev(cur_adpt) + prev_ops = ((prev_adpt.num_reads + prev_adpt.num_writes) + if prev_adpt else 0) iops = (cur_ops - prev_ops) / date_delta.seconds # PowerVM only shows the connection (SCSI or FC). Name after diff --git a/ceilometer_powervm/tests/compute/virt/powervm/test_inspector.py b/ceilometer_powervm/tests/compute/virt/powervm/test_inspector.py index f0c8dd0..2e44bd7 100644 --- a/ceilometer_powervm/tests/compute/virt/powervm/test_inspector.py +++ b/ceilometer_powervm/tests/compute/virt/powervm/test_inspector.py @@ -89,13 +89,13 @@ class TestPowerVMInspector(base.BaseTestCase): metric.processor.entitled_proc_cycles = entitled return metric - # Validate that we get CPU utilization if current metrics are found, - # but the previous are None + # Validate that the CPU metrics raise an issue if the previous metric + # can't be found (perhaps due to a live migration). self.mock_metrics.get_latest_metric.return_value = ( mock.Mock(), mock_metric(7000, 50, 1000, 5000, 10000)) self.mock_metrics.get_previous_metric.return_value = None, None - resp = self.inspector.inspect_cpu_util(mock.Mock()) - self.assertEqual(10.5, resp.util) + self.assertRaises(virt_inspector.InstanceNotFoundException, + self.inspector.inspect_cpu_util, mock.Mock()) # Mock up a mixed use environment. cur = mock_metric(7000, 50, 1000, 5000, 10000) @@ -338,17 +338,21 @@ class TestPowerVMInspector(base.BaseTestCase): # Execute resp = list(self.inspector.inspect_disk_iops(mock.Mock())) - self.assertEqual(2, len(resp)) + self.assertEqual(3, len(resp)) - # Only one vSCSI. Should be the first metric. + # Two vSCSI's disk1, stats1 = resp[0] self.assertEqual('vscsi1', disk1.device) self.assertEqual(33, stats1.iops_count) - # Next is the vFC metric disk2, stats2 = resp[1] - self.assertEqual('vfc1', disk2.device) - self.assertEqual(66, stats2.iops_count) + self.assertEqual('vscsi2', disk2.device) + self.assertEqual(133, stats2.iops_count) + + # Next is the vFC metric + disk3, stats3 = resp[2] + self.assertEqual('vfc1', disk3.device) + self.assertEqual(66, stats3.iops_count) def test_inspect_disks(self): """Tests the inspect_disks inspector method for PowerVM."""