Handle deprecated "cpu_util" metric
The "cpu_util" metric has been deprecated a few years ago. We'll obtain the same result by converting the cumulative cpu time to a percentage, leveraging the rate of change aggregation. Change-Id: I18fe0de6f74c785e674faceea0c48f44055818fe
This commit is contained in:
parent
922478fbda
commit
00fea975e2
|
@ -372,7 +372,7 @@ You can configure and install Ceilometer by following the documentation below :
|
||||||
#. https://docs.openstack.org/ceilometer/latest
|
#. https://docs.openstack.org/ceilometer/latest
|
||||||
|
|
||||||
The built-in strategy 'basic_consolidation' provided by watcher requires
|
The built-in strategy 'basic_consolidation' provided by watcher requires
|
||||||
"**compute.node.cpu.percent**" and "**cpu_util**" measurements to be collected
|
"**compute.node.cpu.percent**" and "**cpu**" measurements to be collected
|
||||||
by Ceilometer.
|
by Ceilometer.
|
||||||
The measurements available depend on the hypervisors that OpenStack manages on
|
The measurements available depend on the hypervisors that OpenStack manages on
|
||||||
the specific implementation.
|
the specific implementation.
|
||||||
|
|
|
@ -300,6 +300,6 @@ Using that you can now query the values for that specific metric:
|
||||||
.. code-block:: py
|
.. code-block:: py
|
||||||
|
|
||||||
avg_meter = self.datasource_backend.statistic_aggregation(
|
avg_meter = self.datasource_backend.statistic_aggregation(
|
||||||
instance.uuid, 'cpu_util', self.periods['instance'],
|
instance.uuid, 'instance_cpu_usage', self.periods['instance'],
|
||||||
self.granularity,
|
self.granularity,
|
||||||
aggregation=self.aggregation_method['instance'])
|
aggregation=self.aggregation_method['instance'])
|
||||||
|
|
|
@ -26,8 +26,7 @@ metric service name plugins comment
|
||||||
``compute_monitors`` option
|
``compute_monitors`` option
|
||||||
to ``cpu.virt_driver`` in
|
to ``cpu.virt_driver`` in
|
||||||
the nova.conf.
|
the nova.conf.
|
||||||
``cpu_util`` ceilometer_ none cpu_util has been removed
|
``cpu`` ceilometer_ none
|
||||||
since Stein.
|
|
||||||
============================ ============ ======= ===========================
|
============================ ============ ======= ===========================
|
||||||
|
|
||||||
.. _ceilometer: https://docs.openstack.org/ceilometer/latest/admin/telemetry-measurements.html#openstack-compute
|
.. _ceilometer: https://docs.openstack.org/ceilometer/latest/admin/telemetry-measurements.html#openstack-compute
|
||||||
|
|
|
@ -22,8 +22,7 @@ The *vm_workload_consolidation* strategy requires the following metrics:
|
||||||
============================ ============ ======= =========================
|
============================ ============ ======= =========================
|
||||||
metric service name plugins comment
|
metric service name plugins comment
|
||||||
============================ ============ ======= =========================
|
============================ ============ ======= =========================
|
||||||
``cpu_util`` ceilometer_ none cpu_util has been removed
|
``cpu`` ceilometer_ none
|
||||||
since Stein.
|
|
||||||
``memory.resident`` ceilometer_ none
|
``memory.resident`` ceilometer_ none
|
||||||
``memory`` ceilometer_ none
|
``memory`` ceilometer_ none
|
||||||
``disk.root.size`` ceilometer_ none
|
``disk.root.size`` ceilometer_ none
|
||||||
|
|
|
@ -27,9 +27,8 @@ metric service name plugins comment
|
||||||
to ``cpu.virt_driver`` in the
|
to ``cpu.virt_driver`` in the
|
||||||
nova.conf.
|
nova.conf.
|
||||||
``hardware.memory.used`` ceilometer_ SNMP_
|
``hardware.memory.used`` ceilometer_ SNMP_
|
||||||
``cpu_util`` ceilometer_ none cpu_util has been removed
|
``cpu`` ceilometer_ none
|
||||||
since Stein.
|
``instance_ram_usage`` ceilometer_ none
|
||||||
``memory.resident`` ceilometer_ none
|
|
||||||
============================ ============ ======= =============================
|
============================ ============ ======= =============================
|
||||||
|
|
||||||
.. _ceilometer: https://docs.openstack.org/ceilometer/latest/admin/telemetry-measurements.html#openstack-compute
|
.. _ceilometer: https://docs.openstack.org/ceilometer/latest/admin/telemetry-measurements.html#openstack-compute
|
||||||
|
@ -107,10 +106,10 @@ parameter type default Value description
|
||||||
period of all received ones.
|
period of all received ones.
|
||||||
==================== ====== ===================== =============================
|
==================== ====== ===================== =============================
|
||||||
|
|
||||||
.. |metrics| replace:: ["cpu_util", "memory.resident"]
|
.. |metrics| replace:: ["instance_cpu_usage", "instance_ram_usage"]
|
||||||
.. |thresholds| replace:: {"cpu_util": 0.2, "memory.resident": 0.2}
|
.. |thresholds| replace:: {"instance_cpu_usage": 0.2, "instance_ram_usage": 0.2}
|
||||||
.. |weights| replace:: {"cpu_util_weight": 1.0, "memory.resident_weight": 1.0}
|
.. |weights| replace:: {"instance_cpu_usage_weight": 1.0, "instance_ram_usage_weight": 1.0}
|
||||||
.. |instance_metrics| replace:: {"cpu_util": "compute.node.cpu.percent", "memory.resident": "hardware.memory.used"}
|
.. |instance_metrics| replace:: {"instance_cpu_usage": "compute.node.cpu.percent", "instance_ram_usage": "hardware.memory.used"}
|
||||||
.. |periods| replace:: {"instance": 720, "node": 600}
|
.. |periods| replace:: {"instance": 720, "node": 600}
|
||||||
|
|
||||||
Efficacy Indicator
|
Efficacy Indicator
|
||||||
|
@ -136,8 +135,8 @@ How to use it ?
|
||||||
at1 workload_balancing --strategy workload_stabilization
|
at1 workload_balancing --strategy workload_stabilization
|
||||||
|
|
||||||
$ openstack optimize audit create -a at1 \
|
$ openstack optimize audit create -a at1 \
|
||||||
-p thresholds='{"memory.resident": 0.05}' \
|
-p thresholds='{"instance_ram_usage": 0.05}' \
|
||||||
-p metrics='["memory.resident"]'
|
-p metrics='["instance_ram_usage"]'
|
||||||
|
|
||||||
External Links
|
External Links
|
||||||
--------------
|
--------------
|
||||||
|
|
|
@ -24,8 +24,7 @@ The *workload_balance* strategy requires the following metrics:
|
||||||
======================= ============ ======= =========================
|
======================= ============ ======= =========================
|
||||||
metric service name plugins comment
|
metric service name plugins comment
|
||||||
======================= ============ ======= =========================
|
======================= ============ ======= =========================
|
||||||
``cpu_util`` ceilometer_ none cpu_util has been removed
|
``cpu`` ceilometer_ none
|
||||||
since Stein.
|
|
||||||
``memory.resident`` ceilometer_ none
|
``memory.resident`` ceilometer_ none
|
||||||
======================= ============ ======= =========================
|
======================= ============ ======= =========================
|
||||||
|
|
||||||
|
@ -65,15 +64,16 @@ Configuration
|
||||||
|
|
||||||
Strategy parameters are:
|
Strategy parameters are:
|
||||||
|
|
||||||
============== ====== ============= ====================================
|
============== ====== ==================== ====================================
|
||||||
parameter type default Value description
|
parameter type default Value description
|
||||||
============== ====== ============= ====================================
|
============== ====== ==================== ====================================
|
||||||
``metrics`` String 'cpu_util' Workload balance base on cpu or ram
|
``metrics`` String 'instance_cpu_usage' Workload balance base on cpu or ram
|
||||||
utilization. choice: ['cpu_util',
|
utilization. Choices:
|
||||||
'memory.resident']
|
['instance_cpu_usage',
|
||||||
``threshold`` Number 25.0 Workload threshold for migration
|
'instance_ram_usage']
|
||||||
``period`` Number 300 Aggregate time period of ceilometer
|
``threshold`` Number 25.0 Workload threshold for migration
|
||||||
============== ====== ============= ====================================
|
``period`` Number 300 Aggregate time period of ceilometer
|
||||||
|
============== ====== ==================== ====================================
|
||||||
|
|
||||||
Efficacy Indicator
|
Efficacy Indicator
|
||||||
------------------
|
------------------
|
||||||
|
@ -95,7 +95,7 @@ How to use it ?
|
||||||
at1 workload_balancing --strategy workload_balance
|
at1 workload_balancing --strategy workload_balance
|
||||||
|
|
||||||
$ openstack optimize audit create -a at1 -p threshold=26.0 \
|
$ openstack optimize audit create -a at1 -p threshold=26.0 \
|
||||||
-p period=310 -p metrics=cpu_util
|
-p period=310 -p metrics=instance_cpu_usage
|
||||||
|
|
||||||
External Links
|
External Links
|
||||||
--------------
|
--------------
|
||||||
|
|
|
@ -38,7 +38,7 @@ class GnocchiHelper(base.DataSourceBase):
|
||||||
host_inlet_temp='hardware.ipmi.node.temperature',
|
host_inlet_temp='hardware.ipmi.node.temperature',
|
||||||
host_airflow='hardware.ipmi.node.airflow',
|
host_airflow='hardware.ipmi.node.airflow',
|
||||||
host_power='hardware.ipmi.node.power',
|
host_power='hardware.ipmi.node.power',
|
||||||
instance_cpu_usage='cpu_util',
|
instance_cpu_usage='cpu',
|
||||||
instance_ram_usage='memory.resident',
|
instance_ram_usage='memory.resident',
|
||||||
instance_ram_allocated='memory',
|
instance_ram_allocated='memory',
|
||||||
instance_l3_cache_usage='cpu_l3_cache',
|
instance_l3_cache_usage='cpu_l3_cache',
|
||||||
|
@ -93,6 +93,25 @@ class GnocchiHelper(base.DataSourceBase):
|
||||||
|
|
||||||
resource_id = resources[0]['id']
|
resource_id = resources[0]['id']
|
||||||
|
|
||||||
|
if meter_name == "instance_cpu_usage":
|
||||||
|
if resource_type != "instance":
|
||||||
|
LOG.warning("Unsupported resource type for metric "
|
||||||
|
"'instance_cpu_usage': ", resource_type)
|
||||||
|
return
|
||||||
|
|
||||||
|
# The "cpu_util" gauge (percentage) metric has been removed.
|
||||||
|
# We're going to obtain the same result by using the rate of change
|
||||||
|
# aggregate operation.
|
||||||
|
if aggregate not in ("mean", "rate:mean"):
|
||||||
|
LOG.warning("Unsupported aggregate for instance_cpu_usage "
|
||||||
|
"metric: %s. "
|
||||||
|
"Supported aggregates: mean, rate:mean ",
|
||||||
|
aggregate)
|
||||||
|
return
|
||||||
|
|
||||||
|
# TODO(lpetrut): consider supporting other aggregates.
|
||||||
|
aggregate = "rate:mean"
|
||||||
|
|
||||||
raw_kwargs = dict(
|
raw_kwargs = dict(
|
||||||
metric=meter,
|
metric=meter,
|
||||||
start=start_time,
|
start=start_time,
|
||||||
|
@ -117,6 +136,17 @@ class GnocchiHelper(base.DataSourceBase):
|
||||||
# Airflow from hardware.ipmi.node.airflow is reported as
|
# Airflow from hardware.ipmi.node.airflow is reported as
|
||||||
# 1/10 th of actual CFM
|
# 1/10 th of actual CFM
|
||||||
return_value *= 10
|
return_value *= 10
|
||||||
|
if meter_name == "instance_cpu_usage":
|
||||||
|
# "rate:mean" can return negative values for migrated vms.
|
||||||
|
return_value = max(0, return_value)
|
||||||
|
|
||||||
|
# We're converting the cumulative cpu time (ns) to cpu usage
|
||||||
|
# percentage.
|
||||||
|
vcpus = resource.vcpus
|
||||||
|
if not vcpus:
|
||||||
|
LOG.warning("instance vcpu count not set, assuming 1")
|
||||||
|
vcpus = 1
|
||||||
|
return_value *= 100 / (granularity * 10e+8) / vcpus
|
||||||
|
|
||||||
return return_value
|
return return_value
|
||||||
|
|
||||||
|
|
|
@ -295,7 +295,7 @@ class WorkloadBalance(base.WorkloadStabilizationBaseStrategy):
|
||||||
self.threshold)
|
self.threshold)
|
||||||
return self.solution
|
return self.solution
|
||||||
|
|
||||||
# choose the server with largest cpu_util
|
# choose the server with largest cpu usage
|
||||||
source_nodes = sorted(source_nodes,
|
source_nodes = sorted(source_nodes,
|
||||||
reverse=True,
|
reverse=True,
|
||||||
key=lambda x: (x[self._meter]))
|
key=lambda x: (x[self._meter]))
|
||||||
|
|
|
@ -40,17 +40,25 @@ class TestGnocchiHelper(base.BaseTestCase):
|
||||||
self.addCleanup(stat_agg_patcher.stop)
|
self.addCleanup(stat_agg_patcher.stop)
|
||||||
|
|
||||||
def test_gnocchi_statistic_aggregation(self, mock_gnocchi):
|
def test_gnocchi_statistic_aggregation(self, mock_gnocchi):
|
||||||
|
vcpus = 2
|
||||||
|
mock_instance = mock.Mock(
|
||||||
|
id='16a86790-327a-45f9-bc82-45839f062fdc',
|
||||||
|
vcpus=vcpus)
|
||||||
|
|
||||||
gnocchi = mock.MagicMock()
|
gnocchi = mock.MagicMock()
|
||||||
|
# cpu time rate of change (ns)
|
||||||
|
mock_rate_measure = 360 * 10e+8 * vcpus * 5.5 / 100
|
||||||
expected_result = 5.5
|
expected_result = 5.5
|
||||||
|
|
||||||
expected_measures = [["2017-02-02T09:00:00.000000", 360, 5.5]]
|
expected_measures = [
|
||||||
|
["2017-02-02T09:00:00.000000", 360, mock_rate_measure]]
|
||||||
|
|
||||||
gnocchi.metric.get_measures.return_value = expected_measures
|
gnocchi.metric.get_measures.return_value = expected_measures
|
||||||
mock_gnocchi.return_value = gnocchi
|
mock_gnocchi.return_value = gnocchi
|
||||||
|
|
||||||
helper = gnocchi_helper.GnocchiHelper()
|
helper = gnocchi_helper.GnocchiHelper()
|
||||||
result = helper.statistic_aggregation(
|
result = helper.statistic_aggregation(
|
||||||
resource=mock.Mock(id='16a86790-327a-45f9-bc82-45839f062fdc'),
|
resource=mock_instance,
|
||||||
resource_type='instance',
|
resource_type='instance',
|
||||||
meter_name='instance_cpu_usage',
|
meter_name='instance_cpu_usage',
|
||||||
period=300,
|
period=300,
|
||||||
|
@ -59,6 +67,14 @@ class TestGnocchiHelper(base.BaseTestCase):
|
||||||
)
|
)
|
||||||
self.assertEqual(expected_result, result)
|
self.assertEqual(expected_result, result)
|
||||||
|
|
||||||
|
gnocchi.metric.get_measures.assert_called_once_with(
|
||||||
|
metric="cpu",
|
||||||
|
start=mock.ANY,
|
||||||
|
stop=mock.ANY,
|
||||||
|
resource_id=mock_instance.uuid,
|
||||||
|
granularity=360,
|
||||||
|
aggregation="rate:mean")
|
||||||
|
|
||||||
def test_gnocchi_statistic_series(self, mock_gnocchi):
|
def test_gnocchi_statistic_series(self, mock_gnocchi):
|
||||||
gnocchi = mock.MagicMock()
|
gnocchi = mock.MagicMock()
|
||||||
expected_result = {
|
expected_result = {
|
||||||
|
|
Loading…
Reference in New Issue