Add Autoscaling test with prometheus

Change-Id: I74a897a213167da8b4bde99cc879a7078993ccd4
This commit is contained in:
mgirgisf 2024-03-13 10:31:56 +01:00 committed by Erno Kuvaja
parent b8fd0674ad
commit d00901d59e
5 changed files with 328 additions and 9 deletions

View File

@ -32,7 +32,7 @@
sg-core: https://github.com/infrawatch/sg-core
# NOTE(jokke): The following will disable the gabbi based integration tests for now.
# We will need to figure out how we refactor them to be stable in the CI.
tempest_exclude_regex: (^telemetry_tempest_plugin\.scenario\.test_telemetry_integration)
tempest_exclude_regex: (^telemetry_tempest_plugin\.scenario\.test_telemetry_integration\.)
devstack_services:
tempest: true
devstack_localrc:
@ -47,6 +47,7 @@
# be "gnocchi,sg-core"
CEILOMETER_BACKEND: "gnocchi"
CEILOMETER_BACKENDS: "gnocchi,sg-core"
PROMETHEUS_SERVICE_SCRAPE_TARGETS: "sg-core"
CEILOMETER_PIPELINE_INTERVAL: 15
CEILOMETER_ALARM_THRESHOLD: 6000000000
GLOBAL_VENV: False

View File

@ -76,6 +76,9 @@ TelemetryGroup = [
cfg.IntOpt('alarm_threshold',
default=10,
help="Threshold to cross for the alarm to trigger."),
cfg.IntOpt('scaledown_alarm_threshold',
default=2000000000,
help="Threshold to cross for the alarm to trigger."),
cfg.BoolOpt("disable_ssl_certificate_validation",
default=False,
help="Disable SSL certificate validation when running "
@ -83,6 +86,9 @@ TelemetryGroup = [
cfg.StrOpt('sg_core_service_url',
default="127.0.0.1:3000",
help="URL to sg-core prometheus endpoint"),
cfg.StrOpt('prometheus_service_url',
default="127.0.0.1:9090",
help="URL to prometheus endpoint"),
cfg.IntOpt('ceilometer_polling_interval',
default=300,
help="Polling interval configured for ceilometer. This can "

View File

@ -0,0 +1,162 @@
defaults:
request_headers:
x-auth-token: $ENVIRON['USER_TOKEN']
tests:
- name: list alarms none
desc: Lists alarms, none yet exist
verbose: all
url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms
method: GET
response_strings:
- "[]"
- name: list servers none
desc: List servers, none yet exists
verbose: all
url: $ENVIRON['NOVA_SERVICE_URL']/servers
method: GET
response_strings:
- "[]"
- name: create stack
desc: Create an autoscaling stack
verbose: all
url: $ENVIRON['HEAT_SERVICE_URL']/stacks
method: POST
request_headers:
content-type: application/json
data: <@create_stack.json
status: 201
- name: control stack status
desc: Checks the stack have been created successfully
url: $ENVIRON['HEAT_SERVICE_URL']/stacks/$ENVIRON['STACK_NAME']
redirects: true
verbose: all
method: GET
status: 200
poll:
count: 300
delay: 1
response_json_paths:
$.stack.stack_status: "CREATE_COMPLETE"
- name: list servers grow
verbose: all
desc: Wait the autoscaling stack grow to two servers
url: $ENVIRON['NOVA_SERVICE_URL']/servers/detail
method: GET
poll:
count: 600
delay: 1
response_json_paths:
$.servers[0].metadata.'metering.server_group': $RESPONSE['$.stack.id']
$.servers[1].metadata.'metering.server_group': $RESPONSE['$.stack.id']
$.servers[0].status: ACTIVE
$.servers[1].status: ACTIVE
$.servers.`len`: 2
- name: check prometheus query for the servers count .
desc: Check the Prometheus metric for the existence of servers
url: $ENVIRON['PROMETHEUS_SERVICE_URL']/api/v1/query
verbose: all
method: POST
request_headers:
content-type: application/x-www-form-urlencoded
data:
query=ceilometer_cpu{resource_name=~"te-$ENVIRON['RESOURCE_PREFIX'].*"}
poll:
count: 300
delay: 1
status: 200
response_json_paths:
$.data.result.`len`: 2
- name: check alarm cpu_alarm_high ALARM
verbose: all
desc: Check the aodh alarm and its state
url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms?sort=name%3Aasc
method: GET
poll:
count: 600
delay: 5
response_strings:
- "$ENVIRON['STACK_NAME']-cpu_alarm_high"
response_json_paths:
$[0].state: alarm
- name: check alarm cpu_alarm_high is OK
verbose: all
desc: Check the aodh alarm and its state
url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms?sort=name%3Aasc
method: GET
poll:
count: 900
delay: 5
response_strings:
- "$ENVIRON['STACK_NAME']-cpu_alarm_high-"
response_json_paths:
$[0].state: ok
- name: check alarm cpu_alarm_low is ALARM
verbose: all
desc: Check the aodh alarm and its state
url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms?sort=name%3Aasc
method: GET
poll:
count: 600
delay: 5
response_strings:
- "$ENVIRON['STACK_NAME']-cpu_alarm_low-"
response_json_paths:
$[1].state: alarm
- name: list servers shrink
verbose: all
desc: Wait for the autoscaling stack to delete one server
url: $ENVIRON['NOVA_SERVICE_URL']/servers/detail
method: GET
poll:
count: 600
delay: 1
response_json_paths:
$.servers[0].metadata.'metering.server_group': $HISTORY['control stack status'].$RESPONSE['$.stack.id']
$.servers[0].status: ACTIVE
$.servers.`len`: 1
- name: get stack location
desc: Get the stack location
url: $ENVIRON['HEAT_SERVICE_URL']/stacks/$ENVIRON['STACK_NAME']
method: GET
status: 302
- name: delete stack
desc: Delete the stack
url: $LOCATION
method: DELETE
status: 204
- name: confirm that stack have been deleted
desc: Check the stack have been deleted to procced
url: $ENVIRON['HEAT_SERVICE_URL']/stacks/$ENVIRON['STACK_NAME']
redirects: true
method: GET
poll:
count: 600
delay: 5
status: 404
- name: list alarms deleted
desc: List alarms, no more exist
url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms
method: GET
response_strings:
- "[]"
- name: list servers deleted
desc: List servers, no more exists
url: $ENVIRON['NOVA_SERVICE_URL']/servers
method: GET
response_strings:
- "[]"

View File

@ -0,0 +1,90 @@
{
"stack_name": "$ENVIRON['STACK_NAME']",
"template": {
"heat_template_version": "2013-05-23",
"description": "Integration Test AutoScaling with heat+ceilometer+prometheus+aodh",
"resources": {
"asg": {
"type": "OS::Heat::AutoScalingGroup",
"properties": {
"min_size": 1,
"max_size": 2,
"resource": {
"type": "OS::Nova::Server",
"properties": {
"networks": [{ "network": "$ENVIRON['NEUTRON_NETWORK']" }],
"flavor": "$ENVIRON['NOVA_FLAVOR_REF']",
"image": "$ENVIRON['GLANCE_IMAGE_NAME']",
"metadata": {
"metering.server_group": { "get_param": "OS::stack_id" }
},
"user_data_format": "RAW",
"user_data": {"Fn::Join": ["", [
"#!/bin/sh\n",
"echo 'Loading CPU'\n",
"set -v\n",
"cat /dev/urandom > /dev/null & sleep 120 ; kill $! \n"
]]}
}
}
}
},
"web_server_scaleup_policy": {
"type": "OS::Heat::ScalingPolicy",
"properties": {
"adjustment_type": "change_in_capacity",
"auto_scaling_group_id": { "get_resource": "asg" },
"cooldown": 60,
"scaling_adjustment": 1
}
},
"cpu_alarm_high": {
"type": "OS::Aodh::PrometheusAlarm",
"properties": {
"description": "Scale-up if the mean CPU is higher than the threshold",
"threshold": $ENVIRON["AODH_THRESHOLD"],
"comparison_operator": "gt",
"alarm_actions": [
{
"str_replace": {
"template": "trust+url",
"params": {
"url": { "get_attr": [ "web_server_scaleup_policy", "signal_url" ] }
}
}
}
],
"query": "(rate(ceilometer_cpu{resource_name=~'te-$ENVIRON['RESOURCE_PREFIX'].*'}[1m])) * 100"
}
},
"web_server_scaledown_policy": {
"type": "OS::Heat::ScalingPolicy",
"properties": {
"adjustment_type": "change_in_capacity",
"auto_scaling_group_id": { "get_resource": "asg" },
"cooldown": 60,
"scaling_adjustment": -1
}
},
"cpu_alarm_low": {
"type": "OS::Aodh::PrometheusAlarm",
"properties": {
"description": "Scale-down if the mean CPU is lower than the threshold",
"threshold": $ENVIRON["SCALEDOWN_THRESHOLD"],
"comparison_operator": "lt",
"alarm_actions": [
{
"str_replace": {
"template": "trust+url",
"params": {
"url": { "get_attr": [ "web_server_scaledown_policy", "signal_url" ] }
}
}
}
],
"query": "(rate(ceilometer_cpu{resource_name=~'te-$ENVIRON['RESOURCE_PREFIX'].*'}[1m])) * 100"
}
}
}
}
}

View File

@ -13,11 +13,11 @@
import os
from tempest import config
from tempest.lib.common.utils import data_utils
from tempest.scenario import manager
from telemetry_tempest_plugin.scenario import utils
CONF = config.CONF
TEST_DIR = os.path.join(os.path.dirname(__file__),
'telemetry_integration_prometheus_gabbits')
@ -31,19 +31,79 @@ class PrometheusGabbiTest(manager.ScenarioTest):
@classmethod
def skip_checks(cls):
super(PrometheusGabbiTest, cls).skip_checks()
for name in ["sg_core", "glance", "ceilometer"]:
if not getattr(CONF.service_available, name, False):
raise cls.skipException("%s support is required" %
name.capitalize())
for name in ["aodh", "nova", "heat",
"ceilometer", "glance", "sg_core"]:
cls._check_service(name)
@classmethod
def _check_service(cls, name):
if not getattr(config.CONF.service_available, name, False):
raise cls.skipException("%s support is required" %
name.capitalize())
@staticmethod
def _get_endpoint(auth, service):
opt_section = getattr(config.CONF, service)
endpoint_type = opt_section.endpoint_type
is_keystone_v3 = 'catalog' in auth[1]
if is_keystone_v3:
if endpoint_type.endswith("URL"):
endpoint_type = endpoint_type[:-3]
catalog = auth[1]['catalog']
endpoints = [e['endpoints'] for e in catalog
if e['type'] == opt_section.catalog_type]
if not endpoints:
raise Exception("%s endpoint not found" %
opt_section.catalog_type)
endpoints = [e['url'] for e in endpoints[0]
if e['interface'] == endpoint_type]
if not endpoints:
raise Exception("%s interface not found for endpoint %s" %
(endpoint_type,
opt_section.catalog_type))
return endpoints[0].rstrip('/')
else:
if not endpoint_type.endswith("URL"):
endpoint_type += "URL"
catalog = auth[1]['serviceCatalog']
endpoints = [e for e in catalog
if e['type'] == opt_section.catalog_type]
if not endpoints:
raise Exception("%s endpoint not found" %
opt_section.catalog_type)
return endpoints[0]['endpoints'][0][endpoint_type].rstrip('/')
def _prep_test(self, filename):
auth = self.os_primary.auth_provider.get_auth()
networks = self.os_primary.networks_client.list_networks(
**{'router:external': False, 'fields': 'id'})['networks']
stack_name = data_utils.rand_name('telemetry')
# NOTE(marihan): This is being used in prometheus query as heat is
# using the last 7 digits from stack_name to create the autoscaling
# resources.
resource_prefix = stack_name[-7:]
os.environ.update({
"USER_TOKEN": auth[0],
"AODH_THRESHOLD": str(config.CONF.telemetry.alarm_threshold),
"SCALEDOWN_THRESHOLD":
str(config.CONF.telemetry.scaledown_alarm_threshold),
"AODH_SERVICE_URL": self._get_endpoint(auth, "alarming_plugin"),
"HEAT_SERVICE_URL": self._get_endpoint(auth, "heat_plugin"),
"NOVA_SERVICE_URL": self._get_endpoint(auth, "compute"),
"SG_CORE_SERVICE_URL":
str(config.CONF.telemetry.sg_core_service_url),
config.CONF.telemetry.sg_core_service_url,
"CEILOMETER_POLLING_INTERVAL":
str(CONF.telemetry.ceilometer_polling_interval),
str(config.CONF.telemetry.ceilometer_polling_interval),
"PROMETHEUS_SERVICE_URL":
config.CONF.telemetry.prometheus_service_url,
"GLANCE_IMAGE_NAME": self.image_create(),
"NOVA_FLAVOR_REF": config.CONF.compute.flavor_ref,
"NEUTRON_NETWORK": networks[0].get('id'),
"STACK_NAME": stack_name,
"RESOURCE_PREFIX": resource_prefix,
})
self.image_create()
utils.generate_tests(PrometheusGabbiTest, TEST_DIR)