diff --git a/.zuul.yaml b/.zuul.yaml index ebb843a..b1dc084 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -47,6 +47,7 @@ # be "gnocchi,sg-core" CEILOMETER_BACKEND: "gnocchi" CEILOMETER_BACKENDS: "gnocchi,sg-core" + PROMETHEUS_SERVICE_SCRAPE_TARGETS: "sg-core" CEILOMETER_PIPELINE_INTERVAL: 15 CEILOMETER_ALARM_THRESHOLD: 6000000000 GLOBAL_VENV: False diff --git a/telemetry_tempest_plugin/config.py b/telemetry_tempest_plugin/config.py index ed1017a..8ff90aa 100644 --- a/telemetry_tempest_plugin/config.py +++ b/telemetry_tempest_plugin/config.py @@ -76,6 +76,9 @@ TelemetryGroup = [ cfg.IntOpt('alarm_threshold', default=10, help="Threshold to cross for the alarm to trigger."), + cfg.IntOpt('scaledown_alarm_threshold', + default=2000000000, + help="Threshold to cross for the alarm to trigger."), cfg.BoolOpt("disable_ssl_certificate_validation", default=False, help="Disable SSL certificate validation when running " @@ -83,6 +86,9 @@ TelemetryGroup = [ cfg.StrOpt('sg_core_service_url', default="127.0.0.1:3000", help="URL to sg-core prometheus endpoint"), + cfg.StrOpt('prometheus_service_url', + default="127.0.0.1:9090", + help="URL to prometheus endpoint"), cfg.IntOpt('ceilometer_polling_interval', default=300, help="Polling interval configured for ceilometer. This can " diff --git a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml new file mode 100644 index 0000000..abe0293 --- /dev/null +++ b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml @@ -0,0 +1,162 @@ +defaults: + request_headers: + x-auth-token: $ENVIRON['USER_TOKEN'] + +tests: + - name: list alarms none + desc: Lists alarms, none yet exist + verbose: all + url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms + method: GET + response_strings: + - "[]" + + - name: list servers none + desc: List servers, none yet exists + verbose: all + url: $ENVIRON['NOVA_SERVICE_URL']/servers + method: GET + response_strings: + - "[]" + + - name: create stack + desc: Create an autoscaling stack + verbose: all + url: $ENVIRON['HEAT_SERVICE_URL']/stacks + method: POST + request_headers: + content-type: application/json + data: <@create_stack.json + status: 201 + + - name: control stack status + desc: Checks the stack have been created successfully + url: $ENVIRON['HEAT_SERVICE_URL']/stacks/$ENVIRON['STACK_NAME'] + redirects: true + verbose: all + method: GET + status: 200 + poll: + count: 300 + delay: 1 + response_json_paths: + $.stack.stack_status: "CREATE_COMPLETE" + + - name: list servers grow + verbose: all + desc: Wait the autoscaling stack grow to two servers + url: $ENVIRON['NOVA_SERVICE_URL']/servers/detail + method: GET + poll: + count: 600 + delay: 1 + response_json_paths: + $.servers[0].metadata.'metering.server_group': $RESPONSE['$.stack.id'] + $.servers[1].metadata.'metering.server_group': $RESPONSE['$.stack.id'] + $.servers[0].status: ACTIVE + $.servers[1].status: ACTIVE + $.servers.`len`: 2 + + - name: check prometheus query for the servers count . + desc: Check the Prometheus metric for the existence of servers + url: $ENVIRON['PROMETHEUS_SERVICE_URL']/api/v1/query + verbose: all + method: POST + request_headers: + content-type: application/x-www-form-urlencoded + data: + query=ceilometer_cpu{resource_name=~"te-$ENVIRON['RESOURCE_PREFIX'].*"} + poll: + count: 300 + delay: 1 + status: 200 + response_json_paths: + $.data.result.`len`: 2 + + - name: check alarm cpu_alarm_high ALARM + verbose: all + desc: Check the aodh alarm and its state + url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms?sort=name%3Aasc + method: GET + poll: + count: 600 + delay: 5 + response_strings: + - "$ENVIRON['STACK_NAME']-cpu_alarm_high" + response_json_paths: + $[0].state: alarm + + - name: check alarm cpu_alarm_high is OK + verbose: all + desc: Check the aodh alarm and its state + url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms?sort=name%3Aasc + method: GET + poll: + count: 900 + delay: 5 + response_strings: + - "$ENVIRON['STACK_NAME']-cpu_alarm_high-" + response_json_paths: + $[0].state: ok + + - name: check alarm cpu_alarm_low is ALARM + verbose: all + desc: Check the aodh alarm and its state + url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms?sort=name%3Aasc + method: GET + poll: + count: 600 + delay: 5 + response_strings: + - "$ENVIRON['STACK_NAME']-cpu_alarm_low-" + response_json_paths: + $[1].state: alarm + + - name: list servers shrink + verbose: all + desc: Wait for the autoscaling stack to delete one server + url: $ENVIRON['NOVA_SERVICE_URL']/servers/detail + method: GET + poll: + count: 600 + delay: 1 + response_json_paths: + $.servers[0].metadata.'metering.server_group': $HISTORY['control stack status'].$RESPONSE['$.stack.id'] + $.servers[0].status: ACTIVE + $.servers.`len`: 1 + + - name: get stack location + desc: Get the stack location + url: $ENVIRON['HEAT_SERVICE_URL']/stacks/$ENVIRON['STACK_NAME'] + method: GET + status: 302 + + - name: delete stack + desc: Delete the stack + url: $LOCATION + method: DELETE + status: 204 + + - name: confirm that stack have been deleted + desc: Check the stack have been deleted to procced + url: $ENVIRON['HEAT_SERVICE_URL']/stacks/$ENVIRON['STACK_NAME'] + redirects: true + method: GET + poll: + count: 600 + delay: 5 + status: 404 + + - name: list alarms deleted + desc: List alarms, no more exist + url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms + method: GET + response_strings: + - "[]" + + - name: list servers deleted + desc: List servers, no more exists + url: $ENVIRON['NOVA_SERVICE_URL']/servers + method: GET + response_strings: + - "[]" diff --git a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json new file mode 100644 index 0000000..4f6962b --- /dev/null +++ b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json @@ -0,0 +1,90 @@ +{ + "stack_name": "$ENVIRON['STACK_NAME']", + "template": { + "heat_template_version": "2013-05-23", + "description": "Integration Test AutoScaling with heat+ceilometer+prometheus+aodh", + "resources": { + "asg": { + "type": "OS::Heat::AutoScalingGroup", + "properties": { + "min_size": 1, + "max_size": 2, + "resource": { + "type": "OS::Nova::Server", + "properties": { + "networks": [{ "network": "$ENVIRON['NEUTRON_NETWORK']" }], + "flavor": "$ENVIRON['NOVA_FLAVOR_REF']", + "image": "$ENVIRON['GLANCE_IMAGE_NAME']", + "metadata": { + "metering.server_group": { "get_param": "OS::stack_id" } + }, + "user_data_format": "RAW", + "user_data": {"Fn::Join": ["", [ + "#!/bin/sh\n", + "echo 'Loading CPU'\n", + "set -v\n", + "cat /dev/urandom > /dev/null & sleep 120 ; kill $! \n" + ]]} + } + } + } + }, + "web_server_scaleup_policy": { + "type": "OS::Heat::ScalingPolicy", + "properties": { + "adjustment_type": "change_in_capacity", + "auto_scaling_group_id": { "get_resource": "asg" }, + "cooldown": 60, + "scaling_adjustment": 1 + } + }, + "cpu_alarm_high": { + "type": "OS::Aodh::PrometheusAlarm", + "properties": { + "description": "Scale-up if the mean CPU is higher than the threshold", + "threshold": $ENVIRON["AODH_THRESHOLD"], + "comparison_operator": "gt", + "alarm_actions": [ + { + "str_replace": { + "template": "trust+url", + "params": { + "url": { "get_attr": [ "web_server_scaleup_policy", "signal_url" ] } + } + } + } + ], + "query": "(rate(ceilometer_cpu{resource_name=~'te-$ENVIRON['RESOURCE_PREFIX'].*'}[1m])) * 100" + } + }, + "web_server_scaledown_policy": { + "type": "OS::Heat::ScalingPolicy", + "properties": { + "adjustment_type": "change_in_capacity", + "auto_scaling_group_id": { "get_resource": "asg" }, + "cooldown": 60, + "scaling_adjustment": -1 + } + }, + "cpu_alarm_low": { + "type": "OS::Aodh::PrometheusAlarm", + "properties": { + "description": "Scale-down if the mean CPU is lower than the threshold", + "threshold": $ENVIRON["SCALEDOWN_THRESHOLD"], + "comparison_operator": "lt", + "alarm_actions": [ + { + "str_replace": { + "template": "trust+url", + "params": { + "url": { "get_attr": [ "web_server_scaledown_policy", "signal_url" ] } + } + } + } + ], + "query": "(rate(ceilometer_cpu{resource_name=~'te-$ENVIRON['RESOURCE_PREFIX'].*'}[1m])) * 100" + } + } + } + } +} diff --git a/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py b/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py index d4dcc0e..93d05de 100644 --- a/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py +++ b/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py @@ -13,11 +13,11 @@ import os from tempest import config +from tempest.lib.common.utils import data_utils from tempest.scenario import manager from telemetry_tempest_plugin.scenario import utils -CONF = config.CONF TEST_DIR = os.path.join(os.path.dirname(__file__), 'telemetry_integration_prometheus_gabbits') @@ -31,19 +31,79 @@ class PrometheusGabbiTest(manager.ScenarioTest): @classmethod def skip_checks(cls): super(PrometheusGabbiTest, cls).skip_checks() - for name in ["sg_core", "glance", "ceilometer"]: - if not getattr(CONF.service_available, name, False): - raise cls.skipException("%s support is required" % - name.capitalize()) + for name in ["aodh", "nova", "heat", + "ceilometer", "glance", "sg_core"]: + cls._check_service(name) + + @classmethod + def _check_service(cls, name): + if not getattr(config.CONF.service_available, name, False): + raise cls.skipException("%s support is required" % + name.capitalize()) + + @staticmethod + def _get_endpoint(auth, service): + opt_section = getattr(config.CONF, service) + endpoint_type = opt_section.endpoint_type + is_keystone_v3 = 'catalog' in auth[1] + + if is_keystone_v3: + if endpoint_type.endswith("URL"): + endpoint_type = endpoint_type[:-3] + catalog = auth[1]['catalog'] + endpoints = [e['endpoints'] for e in catalog + if e['type'] == opt_section.catalog_type] + if not endpoints: + raise Exception("%s endpoint not found" % + opt_section.catalog_type) + endpoints = [e['url'] for e in endpoints[0] + if e['interface'] == endpoint_type] + if not endpoints: + raise Exception("%s interface not found for endpoint %s" % + (endpoint_type, + opt_section.catalog_type)) + return endpoints[0].rstrip('/') + + else: + if not endpoint_type.endswith("URL"): + endpoint_type += "URL" + catalog = auth[1]['serviceCatalog'] + endpoints = [e for e in catalog + if e['type'] == opt_section.catalog_type] + if not endpoints: + raise Exception("%s endpoint not found" % + opt_section.catalog_type) + return endpoints[0]['endpoints'][0][endpoint_type].rstrip('/') def _prep_test(self, filename): + auth = self.os_primary.auth_provider.get_auth() + networks = self.os_primary.networks_client.list_networks( + **{'router:external': False, 'fields': 'id'})['networks'] + stack_name = data_utils.rand_name('telemetry') + # NOTE(marihan): This is being used in prometheus query as heat is + # using the last 7 digits from stack_name to create the autoscaling + # resources. + resource_prefix = stack_name[-7:] os.environ.update({ + "USER_TOKEN": auth[0], + "AODH_THRESHOLD": str(config.CONF.telemetry.alarm_threshold), + "SCALEDOWN_THRESHOLD": + str(config.CONF.telemetry.scaledown_alarm_threshold), + "AODH_SERVICE_URL": self._get_endpoint(auth, "alarming_plugin"), + "HEAT_SERVICE_URL": self._get_endpoint(auth, "heat_plugin"), + "NOVA_SERVICE_URL": self._get_endpoint(auth, "compute"), "SG_CORE_SERVICE_URL": - str(config.CONF.telemetry.sg_core_service_url), + config.CONF.telemetry.sg_core_service_url, "CEILOMETER_POLLING_INTERVAL": - str(CONF.telemetry.ceilometer_polling_interval), + str(config.CONF.telemetry.ceilometer_polling_interval), + "PROMETHEUS_SERVICE_URL": + config.CONF.telemetry.prometheus_service_url, + "GLANCE_IMAGE_NAME": self.image_create(), + "NOVA_FLAVOR_REF": config.CONF.compute.flavor_ref, + "NEUTRON_NETWORK": networks[0].get('id'), + "STACK_NAME": stack_name, + "RESOURCE_PREFIX": resource_prefix, }) - self.image_create() utils.generate_tests(PrometheusGabbiTest, TEST_DIR)