Replace the workers AFD filter
This patch uses the generic AFD filter with new alarms to replace the custom AFD for workers. Blueprint: allow-all-alarms-to-be-specified-in-alarming-file Change-Id: I6c432e60a16da5bb3c8d0ecd0bd22a1246fe6f82
This commit is contained in:
parent
215f693307
commit
9dbf48dbfe
|
@ -259,9 +259,6 @@ if hiera('lma::collector::influxdb::server', false) {
|
|||
class { 'lma_collector::logs::http_metrics': }
|
||||
|
||||
class { 'lma_collector::logs::aggregated_http_metrics': }
|
||||
|
||||
# AFD filters
|
||||
class { 'lma_collector::afd::workers': }
|
||||
}
|
||||
|
||||
$alerting_mode = $lma_collector['alerting_mode']
|
||||
|
|
|
@ -2427,6 +2427,506 @@ lma_collector:
|
|||
periods: 0
|
||||
function: last
|
||||
|
||||
# Following are the AFD generated to check workers
|
||||
# All workers are down
|
||||
- name: 'nova-scheduler-all-down'
|
||||
description: 'All Nova schedulers are down'
|
||||
severity: 'down'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_nova_services
|
||||
fields:
|
||||
service: 'scheduler'
|
||||
state: 'up'
|
||||
relational_operator: '=='
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'nova-cert-all-down'
|
||||
description: 'All Nova certs are down'
|
||||
severity: 'down'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_nova_services
|
||||
fields:
|
||||
service: 'cert'
|
||||
state: 'up'
|
||||
relational_operator: '=='
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'nova-consoleauth-all-down'
|
||||
description: 'All Nova consoleauths are down'
|
||||
severity: 'down'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_nova_services
|
||||
fields:
|
||||
service: 'consoleauth'
|
||||
state: 'up'
|
||||
relational_operator: '=='
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'nova-compute-all-down'
|
||||
description: 'All Nova computes are down'
|
||||
severity: 'down'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_nova_services
|
||||
fields:
|
||||
service: 'compute'
|
||||
state: 'up'
|
||||
relational_operator: '=='
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'nova-conductor-all-down'
|
||||
description: 'All Nova conductors are down'
|
||||
severity: 'down'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_nova_services
|
||||
fields:
|
||||
service: 'conductor'
|
||||
state: 'up'
|
||||
relational_operator: '=='
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'cinder-scheduler-all-down'
|
||||
description: 'All Cinder schedulers are down'
|
||||
severity: 'down'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_cinder_services
|
||||
fields:
|
||||
service: 'scheduler'
|
||||
state: 'up'
|
||||
relational_operator: '=='
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'cinder-volume-all-down'
|
||||
description: 'All Cinder volumes are down'
|
||||
severity: 'down'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_cinder_services
|
||||
fields:
|
||||
service: 'volume'
|
||||
state: 'up'
|
||||
relational_operator: '=='
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'neutron-l3-all-down'
|
||||
description: 'All Neutron L3 agents are down'
|
||||
severity: 'down'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_neutron_agents
|
||||
fields:
|
||||
service: 'l3'
|
||||
state: 'up'
|
||||
relational_operator: '=='
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'neutron-dhcp-all-down'
|
||||
description: 'All Neutron DHCP agents are down'
|
||||
severity: 'down'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_neutron_agents
|
||||
fields:
|
||||
service: 'dhcp'
|
||||
state: 'up'
|
||||
relational_operator: '=='
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'neutron-metadata-all-down'
|
||||
description: 'All Neutron metadata agents are down'
|
||||
severity: 'down'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_neutron_agents
|
||||
fields:
|
||||
service: 'metadata'
|
||||
state: 'up'
|
||||
relational_operator: '=='
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'neutron-openvswitch-all-down'
|
||||
description: 'All Neutron openvswitch agents are down'
|
||||
severity: 'down'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_neutron_agents
|
||||
fields:
|
||||
service: 'openvswitch'
|
||||
state: 'up'
|
||||
relational_operator: '=='
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
# At least one backend is down
|
||||
- name: 'nova-scheduler-one-down'
|
||||
description: 'At least one Nova scheduler is down'
|
||||
severity: 'warning'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_nova_services
|
||||
fields:
|
||||
service: 'scheduler'
|
||||
state: 'down'
|
||||
relational_operator: '>'
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'nova-cert-one-down'
|
||||
description: 'At least one Nova cert is down'
|
||||
severity: 'warning'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_nova_services
|
||||
fields:
|
||||
service: 'cert'
|
||||
state: 'down'
|
||||
relational_operator: '>'
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'nova-consoleauth-one-down'
|
||||
description: 'At least one Nova consoleauth is down'
|
||||
severity: 'warning'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_nova_services
|
||||
fields:
|
||||
service: 'consoleauth'
|
||||
state: 'down'
|
||||
relational_operator: '>'
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'nova-compute-one-down'
|
||||
description: 'At least one Nova compute is down'
|
||||
severity: 'warning'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_nova_services
|
||||
fields:
|
||||
service: 'compute'
|
||||
state: 'down'
|
||||
relational_operator: '>'
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'nova-conductor-one-down'
|
||||
description: 'At least one Nova conductor is down'
|
||||
severity: 'warning'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_nova_services
|
||||
fields:
|
||||
service: 'conductor'
|
||||
state: 'down'
|
||||
relational_operator: '>'
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'cinder-scheduler-one-down'
|
||||
description: 'At least one Cinder scheduler is down'
|
||||
severity: 'warning'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_cinder_services
|
||||
fields:
|
||||
service: 'scheduler'
|
||||
state: 'down'
|
||||
relational_operator: '>'
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'cinder-volume-one-down'
|
||||
description: 'At least one Cinder volume is down'
|
||||
severity: 'warning'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_cinder_services
|
||||
fields:
|
||||
service: 'volume'
|
||||
state: 'down'
|
||||
relational_operator: '>'
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'neutron-l3-one-down'
|
||||
description: 'At least one L3 agent is down'
|
||||
severity: 'warning'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_neutron_agents
|
||||
fields:
|
||||
service: 'l3'
|
||||
state: 'down'
|
||||
relational_operator: '>'
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'neutron-dhcp-one-down'
|
||||
description: 'At least one DHCP agent is down'
|
||||
severity: 'warning'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_neutron_agents
|
||||
fields:
|
||||
service: 'dhcp'
|
||||
state: 'down'
|
||||
relational_operator: '>'
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'neutron-metadata-one-down'
|
||||
description: 'At least one metadata agents is down'
|
||||
severity: 'warning'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_neutron_agents
|
||||
fields:
|
||||
service: 'metadata'
|
||||
state: 'down'
|
||||
relational_operator: '>'
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'neutron-openvswitch-one-down'
|
||||
description: 'At least one openvswitch agents is down'
|
||||
severity: 'warning'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_neutron_agents
|
||||
fields:
|
||||
service: 'openvswitch'
|
||||
state: 'down'
|
||||
relational_operator: '>'
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
# Less than 50% of service are up (compared to up and down).
|
||||
- name: 'nova-scheduler-majority-down'
|
||||
description: 'Less than 50% of Nova schedulers are up'
|
||||
severity: 'critical'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_nova_services_percent
|
||||
fields:
|
||||
service: 'scheduler'
|
||||
state: 'up'
|
||||
relational_operator: '<='
|
||||
threshold: 50
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'nova-cert-majority-down'
|
||||
description: 'Less than 50% of Nova certs are up'
|
||||
severity: 'critical'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_nova_services_percent
|
||||
fields:
|
||||
service: 'cert'
|
||||
state: 'up'
|
||||
relational_operator: '<='
|
||||
threshold: 50
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'nova-consoleauth-majority-down'
|
||||
description: 'Less than 50% of Nova consoleauths are up'
|
||||
severity: 'critical'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_nova_services_percent
|
||||
fields:
|
||||
service: 'consoleauth'
|
||||
state: 'up'
|
||||
relational_operator: '<='
|
||||
threshold: 50
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'nova-compute-majority-down'
|
||||
description: 'Less than 50% of Nova computes are up'
|
||||
severity: 'critical'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_nova_services_percent
|
||||
fields:
|
||||
service: 'compute'
|
||||
state: 'up'
|
||||
relational_operator: '<='
|
||||
threshold: 50
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'nova-conductor-majority-down'
|
||||
description: 'Less than 50% of Nova conductors are up'
|
||||
severity: 'critical'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_nova_services_percent
|
||||
fields:
|
||||
service: 'conductor'
|
||||
state: 'up'
|
||||
relational_operator: '<='
|
||||
threshold: 50
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'cinder-scheduler-majority-down'
|
||||
description: 'Less than 50% of Cinder schedulers are up'
|
||||
severity: 'critical'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_cinder_services_percent
|
||||
fields:
|
||||
service: 'scheduler'
|
||||
state: 'up'
|
||||
relational_operator: '<='
|
||||
threshold: 50
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'cinder-volume-majority-down'
|
||||
description: 'Less than 50% of Cinder volumes are up'
|
||||
severity: 'critical'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_cinder_services_percent
|
||||
fields:
|
||||
service: 'volume'
|
||||
state: 'up'
|
||||
relational_operator: '<='
|
||||
threshold: 50
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'neutron-l3-majority-down'
|
||||
description: 'Less than 50% of Neutron L3 agents are up'
|
||||
severity: 'critical'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_neutron_agents_percent
|
||||
fields:
|
||||
service: 'l3'
|
||||
state: 'up'
|
||||
relational_operator: '<='
|
||||
threshold: 50
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'neutron-dhcp-majority-down'
|
||||
description: 'Less than 50% of Neutron DHCP agents are up'
|
||||
severity: 'critical'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_neutron_agents_percent
|
||||
fields:
|
||||
service: 'dhcp'
|
||||
state: 'up'
|
||||
relational_operator: '<='
|
||||
threshold: 50
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'neutron-metadata-majority-down'
|
||||
description: 'Less than 50% of Neutron metadata agents are up'
|
||||
severity: 'critical'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_neutron_agents_percent
|
||||
fields:
|
||||
service: 'metadata'
|
||||
state: 'up'
|
||||
relational_operator: '<='
|
||||
threshold: 50
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'neutron-openvswitch-majority-down'
|
||||
description: 'Less than 50% of Neutron openvswitch agents are up'
|
||||
severity: 'critical'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: openstack_neutron_agents_percent
|
||||
fields:
|
||||
service: 'openvswitch'
|
||||
state: 'up'
|
||||
relational_operator: '<='
|
||||
threshold: 50
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
|
||||
# Definition of the AFD node filters
|
||||
node_cluster_alarms:
|
||||
controller:
|
||||
|
@ -2620,6 +3120,51 @@ lma_collector:
|
|||
activate_alerting: true
|
||||
alarms:
|
||||
error: ['nova-logs-error']
|
||||
nova-cert:
|
||||
apply_to_node: controller
|
||||
enable_notification: false
|
||||
activate_alerting: true
|
||||
alarms:
|
||||
workers:
|
||||
- 'nova-cert-all-down'
|
||||
- 'nova-cert-majority-down'
|
||||
- 'nova-cert-one-down'
|
||||
nova-consoleauth:
|
||||
apply_to_node: controller
|
||||
enable_notification: false
|
||||
activate_alerting: true
|
||||
alarms:
|
||||
workers:
|
||||
- 'nova-consoleauth-all-down'
|
||||
- 'nova-consoleauth-majority-down'
|
||||
- 'nova-consoleauth-one-down'
|
||||
nova-compute:
|
||||
apply_to_node: controller
|
||||
enable_notification: false
|
||||
activate_alerting: true
|
||||
alarms:
|
||||
workers:
|
||||
- 'nova-compute-all-down'
|
||||
- 'nova-compute-majority-down'
|
||||
- 'nova-compute-one-down'
|
||||
nova-conductor:
|
||||
apply_to_node: controller
|
||||
enable_notification: false
|
||||
activate_alerting: true
|
||||
alarms:
|
||||
workers:
|
||||
- 'nova-conductor-all-down'
|
||||
- 'nova-conductor-majority-down'
|
||||
- 'nova-conductor-one-down'
|
||||
nova-scheduler:
|
||||
apply_to_node: controller
|
||||
enable_notification: false
|
||||
activate_alerting: true
|
||||
alarms:
|
||||
workers:
|
||||
- 'nova-scheduler-all-down'
|
||||
- 'nova-scheduler-majority-down'
|
||||
- 'nova-scheduler-one-down'
|
||||
heat-api:
|
||||
apply_to_node: controller
|
||||
enable_notification: false
|
||||
|
@ -2742,6 +3287,24 @@ lma_collector:
|
|||
activate_alerting: true
|
||||
alarms:
|
||||
error: ['cinder-logs-error']
|
||||
cinder-scheduler:
|
||||
apply_to_node: controller
|
||||
enable_notification: false
|
||||
activate_alerting: true
|
||||
alarms:
|
||||
workers:
|
||||
- 'cinder-scheduler-all-down'
|
||||
- 'cinder-scheduler-majority-down'
|
||||
- 'cinder-scheduler-one-down'
|
||||
cinder-volume:
|
||||
apply_to_node: controller
|
||||
enable_notification: false
|
||||
activate_alerting: true
|
||||
alarms:
|
||||
workers:
|
||||
- 'cinder-volume-all-down'
|
||||
- 'cinder-volume-majority-down'
|
||||
- 'cinder-volume-one-down'
|
||||
<% if not @storage_options["volumes_ceph"] then -%>
|
||||
cinder-volume-logs:
|
||||
apply_to_node: storage
|
||||
|
@ -2813,6 +3376,42 @@ lma_collector:
|
|||
activate_alerting: true
|
||||
alarms:
|
||||
error: ['neutron-logs-error']
|
||||
neutron-l3:
|
||||
apply_to_node: controller
|
||||
enable_notification: false
|
||||
activate_alerting: true
|
||||
alarms:
|
||||
workers:
|
||||
- 'neutron-l3-all-down'
|
||||
- 'neutron-l3-majority-down'
|
||||
- 'neutron-l3-one-down'
|
||||
neutron-dhcp:
|
||||
apply_to_node: controller
|
||||
enable_notification: false
|
||||
activate_alerting: true
|
||||
alarms:
|
||||
workers:
|
||||
- 'neutron-dhcp-all-down'
|
||||
- 'neutron-dhcp-majority-down'
|
||||
- 'neutron-dhcp-one-down'
|
||||
neutron-metadata:
|
||||
apply_to_node: controller
|
||||
enable_notification: false
|
||||
activate_alerting: true
|
||||
alarms:
|
||||
workers:
|
||||
- 'neutron-metadata-all-down'
|
||||
- 'neutron-metadata-majority-down'
|
||||
- 'neutron-metadata-one-down'
|
||||
neutron-openvswitch:
|
||||
apply_to_node: controller
|
||||
enable_notification: false
|
||||
activate_alerting: true
|
||||
alarms:
|
||||
workers:
|
||||
- 'neutron-openvswitch-all-down'
|
||||
- 'neutron-openvswitch-majority-down'
|
||||
- 'neutron-openvswitch-one-down'
|
||||
neutron-logs-compute:
|
||||
apply_to_node: compute
|
||||
enable_notification: false
|
||||
|
|
|
@ -449,7 +449,6 @@ Public Classes:
|
|||
* [`lma_collector::notifications::metrics`](#class-lma_collectornotificationsmetrics)
|
||||
* [`lma_collector::aggregator::client`](#class-lma_collectoraggregatorclient)
|
||||
* [`lma_collector::aggregator::server`](#class-lma_collectoraggregatorserver)
|
||||
* [`lma_collector::afd::workers`](#class-lma_collectorafdworkers)
|
||||
* [`lma_collector::gse_policies`](#class-lma_collectorgse_policies)
|
||||
* [`lma_collector::metrics::heka_monitoring`](#class-lma_collectormetricsheka_monitoring)
|
||||
* [`lma_collector::smtp_alert`](#class-lma_collectorsmtp_alert)
|
||||
|
@ -870,12 +869,6 @@ Declare this class to make Heka run the aggregator service.
|
|||
to check the health of the aggregator service. Valid options: an integer.
|
||||
Default: `undef`.
|
||||
|
||||
#### Class: `lma_collector::afd::workers`
|
||||
|
||||
Declare this class to configure the Heka filter that sends AFD metrics
|
||||
reporting the availability of the Neutron agents and the Cinder and Nova
|
||||
services.
|
||||
|
||||
#### Class: `lma_collector::gse_policies`
|
||||
|
||||
Declare this class to configure the GSE cluster policies on the aggregator node.
|
||||
|
|
|
@ -1,94 +0,0 @@
|
|||
-- Copyright 2015 Mirantis, Inc.
|
||||
--
|
||||
-- Licensed under the Apache License, Version 2.0 (the "License");
|
||||
-- you may not use this file except in compliance with the License.
|
||||
-- You may obtain a copy of the License at
|
||||
--
|
||||
-- http://www.apache.org/licenses/LICENSE-2.0
|
||||
--
|
||||
-- Unless required by applicable law or agreed to in writing, software
|
||||
-- distributed under the License is distributed on an "AS IS" BASIS,
|
||||
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
-- See the License for the specific language governing permissions and
|
||||
-- limitations under the License.
|
||||
|
||||
require 'string'
|
||||
|
||||
local afd = require 'afd'
|
||||
local consts = require 'gse_constants'
|
||||
|
||||
local worker_states = {}
|
||||
|
||||
-- emit AFD event metrics based on openstack_nova_services, openstack_cinder_services and openstack_neutron_agents metrics
|
||||
function process_message()
|
||||
local metric_name = read_message('Fields[name]')
|
||||
local service = string.format('%s-%s',
|
||||
string.match(metric_name, 'openstack_([^_]+)'),
|
||||
read_message('Fields[service]'))
|
||||
local worker_key = string.format('%s.%s', metric_name, service)
|
||||
|
||||
if not worker_states[worker_key] then
|
||||
worker_states[worker_key] = {}
|
||||
end
|
||||
|
||||
local worker = worker_states[worker_key]
|
||||
worker[read_message('Fields[state]')] = read_message('Fields[value]')
|
||||
|
||||
local state = consts.OKAY
|
||||
if not(worker.up and worker.down) then
|
||||
-- not enough data for now
|
||||
return 0
|
||||
end
|
||||
|
||||
if worker.up == 0 then
|
||||
state = consts.DOWN
|
||||
afd.add_to_alarms(consts.DOWN,
|
||||
'last',
|
||||
metric_name,
|
||||
{service=service,state='up'},
|
||||
{},
|
||||
'==',
|
||||
worker.up,
|
||||
0,
|
||||
nil,
|
||||
nil,
|
||||
string.format("All instances for the service %s are down or disabled", service))
|
||||
elseif worker.down >= worker.up then
|
||||
state = consts.CRIT
|
||||
afd.add_to_alarms(consts.CRIT,
|
||||
'last',
|
||||
metric_name,
|
||||
{service=service,state='down'},
|
||||
{},
|
||||
'>=',
|
||||
worker.down,
|
||||
worker.up,
|
||||
nil,
|
||||
nil,
|
||||
string.format("More instances of %s are down than up", service))
|
||||
elseif worker.down > 0 then
|
||||
state = consts.WARN
|
||||
afd.add_to_alarms(consts.WARN,
|
||||
'last',
|
||||
metric_name,
|
||||
{service=service,state='down'},
|
||||
{},
|
||||
'>',
|
||||
worker.down,
|
||||
0,
|
||||
nil,
|
||||
nil,
|
||||
string.format("At least one %s instance is down", service))
|
||||
end
|
||||
|
||||
afd.inject_afd_service_metric(service,
|
||||
state,
|
||||
read_message('Fields[hostname]'),
|
||||
0,
|
||||
'workers')
|
||||
|
||||
-- reset the cache for this worker
|
||||
worker_states[worker_key] = {}
|
||||
|
||||
return 0
|
||||
end
|
|
@ -1,33 +0,0 @@
|
|||
# Copyright 2015 Mirantis, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
class lma_collector::afd::workers () {
|
||||
include lma_collector::params
|
||||
include lma_collector::service::metric
|
||||
|
||||
$lua_modules_dir = $lma_collector::params::lua_modules_dir
|
||||
|
||||
$metrics_matcher = join([
|
||||
'(Type == \'metric\' || Type == \'heka.sandbox.metric\')', ' && ',
|
||||
'Fields[name] =~ /^openstack_(nova|cinder|neutron)_(services|agents)$/',
|
||||
], '')
|
||||
|
||||
heka::filter::sandbox { 'afd_workers':
|
||||
config_dir => $lma_collector::params::metric_config_dir,
|
||||
filename => "${lma_collector::params::plugins_dir}/filters/afd_workers.lua",
|
||||
message_matcher => $metrics_matcher,
|
||||
module_directory => $lua_modules_dir,
|
||||
notify => Class['lma_collector::service::metric'],
|
||||
}
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
# Copyright 2015 Mirantis, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
require 'spec_helper'
|
||||
|
||||
describe 'lma_collector::afd::workers' do
|
||||
let(:facts) do
|
||||
{:kernel => 'Linux', :operatingsystem => 'Ubuntu',
|
||||
:osfamily => 'Debian'}
|
||||
end
|
||||
|
||||
describe 'with defaults' do
|
||||
it { is_expected.to contain_heka__filter__sandbox('afd_workers') }
|
||||
end
|
||||
end
|
||||
|
Loading…
Reference in New Issue