Skip service configuration for remote alarms
When an alarm contains metric(s) with collected_on:'aggregator', the corresponding Nagios service check is skipped. Change-Id: I758e7bd412a68314e59ec86a40370661525a5af9
This commit is contained in:
parent
07526249a7
commit
8ee53ec594
|
@ -386,6 +386,8 @@ class { 'lma_infra_alerting::nagios::hosts':
|
|||
node_profiles => $node_profiles,
|
||||
node_cluster_alarms => $node_cluster_alarms,
|
||||
service_cluster_alarms => $service_cluster_alarms,
|
||||
alarms => $lma_collector['alarms'],
|
||||
metrics => $lma_collector['metrics'],
|
||||
require => Class['lma_infra_alerting::nagios'],
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,8 @@ It expects 5 arguments:
|
|||
3. The key containing the node's role.
|
||||
4. The mapping between AFD profiles and node's roles
|
||||
5. The mapping between AFD profiles and alarms
|
||||
6. Array of alarm definitions
|
||||
7. Hash table mapping metric names to the place where there are collected
|
||||
|
||||
*Examples:*
|
||||
|
||||
|
@ -45,7 +47,7 @@ Would return:
|
|||
) do |arguments|
|
||||
|
||||
raise(Puppet::ParseError, "afds_to_nagios_services(): Wrong number of arguments " +
|
||||
"given (#{arguments.size} expecting 5") if arguments.size != 5
|
||||
"given (#{arguments.size} expecting 7") if arguments.size != 7
|
||||
|
||||
nodes = arguments[0]
|
||||
raise(Puppet::ParseError, "arg0 isn't an array!") if ! nodes.is_a?(Array)
|
||||
|
@ -56,6 +58,10 @@ Would return:
|
|||
raise(Puppet::ParseError, "arg3 isn't a hash!") if ! role_to_cluster.is_a?(Hash)
|
||||
afds = arguments[4]
|
||||
raise(Puppet::ParseError, "arg4 isn't a hash!") if ! afds.is_a?(Hash)
|
||||
alarms = arguments[5]
|
||||
alarms = [] if ! alarms.is_a?(Array)
|
||||
metrics = arguments[6]
|
||||
metrics = {} if ! metrics.is_a?(Hash)
|
||||
|
||||
result = {}
|
||||
|
||||
|
@ -90,8 +96,28 @@ Would return:
|
|||
if a['alerting'] == 'enabled_with_notification'
|
||||
notifications_enabled = 1
|
||||
end
|
||||
a['alarms'].keys.each do |source|
|
||||
node_services["#{node}.#{logical_cluster}.#{source}"] = "#{ logical_cluster }.#{ source }".gsub(/\s+/, '_')
|
||||
a['alarms'].each do |source, afd|
|
||||
# collect metric names
|
||||
m = Set.new([])
|
||||
afd.each do |alarm|
|
||||
# find metric definition
|
||||
alarm_def = alarms.select {|defi| defi['name'] == alarm}
|
||||
next if alarm_def.empty?
|
||||
alarm_def[0]['trigger']['rules'].each do |r|
|
||||
m << r['metric']
|
||||
end
|
||||
end
|
||||
matches = true
|
||||
m.each do |metric_name|
|
||||
if metrics.has_key?(metric_name) and metrics[metric_name]['collected_on'] == 'aggregator'
|
||||
matches = false
|
||||
end
|
||||
|
||||
end
|
||||
# skip the source if collected_on differs
|
||||
if matches
|
||||
node_services["#{node}.#{logical_cluster}.#{source}"] = "#{ logical_cluster }.#{ source }".gsub(/\s+/, '_')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -32,14 +32,16 @@ class lma_infra_alerting::nagios::hosts (
|
|||
$role_key = undef,
|
||||
$node_profiles = {},
|
||||
$node_cluster_alarms = {},
|
||||
$alarms = [],
|
||||
$metrics = {},
|
||||
$service_cluster_alarms = {},
|
||||
){
|
||||
|
||||
include lma_infra_alerting::params
|
||||
|
||||
validate_string($host_name_key, $network_role_key)
|
||||
validate_array($hosts, $host_display_name_keys, $host_custom_vars_keys)
|
||||
validate_hash($node_profiles, $node_cluster_alarms)
|
||||
validate_array($hosts, $host_display_name_keys, $host_custom_vars_keys, $alarms)
|
||||
validate_hash($node_profiles, $node_cluster_alarms, $metrics)
|
||||
|
||||
$nagios_hosts = nodes_to_nagios_hosts($hosts,
|
||||
$host_name_key,
|
||||
|
@ -73,14 +75,20 @@ class lma_infra_alerting::nagios::hosts (
|
|||
$host_name_key,
|
||||
$role_key,
|
||||
$node_profiles,
|
||||
$node_cluster_alarms)
|
||||
$node_cluster_alarms,
|
||||
$alarms,
|
||||
$metrics
|
||||
)
|
||||
create_resources(lma_infra_alerting::nagios::services, $afd_nodes)
|
||||
|
||||
$afd_services = afds_to_nagios_services($hosts,
|
||||
$host_name_key,
|
||||
$role_key,
|
||||
$node_profiles,
|
||||
$service_cluster_alarms)
|
||||
$service_cluster_alarms,
|
||||
$alarms,
|
||||
$metrics
|
||||
)
|
||||
create_resources(lma_infra_alerting::nagios::services, $afd_services)
|
||||
|
||||
if empty($node_profiles) and empty($node_cluster_alarms) {
|
||||
|
|
|
@ -93,7 +93,8 @@ describe 'afds_to_nagios_services' do
|
|||
"alerting" => "enabled_with_notification",
|
||||
"alarms" => {
|
||||
"system-ctrl" => ["cpu-critical-controller", "cpu-warning-controller"],
|
||||
"fs" => ["fs-critical", "fs-warning"]
|
||||
"fs" => ["fs-critical", "fs-warning"],
|
||||
"rabbitmq" => ["rabbitmq-cluster-warning"]
|
||||
}
|
||||
},
|
||||
"compute" => {
|
||||
|
@ -137,8 +138,27 @@ describe 'afds_to_nagios_services' do
|
|||
}
|
||||
}
|
||||
}
|
||||
alarms_services = [
|
||||
{"name"=>"rabbitmq-cluster-warning",
|
||||
"description"=>"The RabbitMQ cluster is degraded because some RabbitMQ nodes are missing",
|
||||
"severity"=>"warning",
|
||||
"trigger"=>
|
||||
{"logical_operator"=>"or",
|
||||
"rules"=>
|
||||
[{"metric"=>"pacemaker_resource_percent",
|
||||
"relational_operator"=>"<",
|
||||
"threshold"=>50,
|
||||
"window"=>60,
|
||||
"periods"=>0,
|
||||
"function"=>"last"}]}},
|
||||
]
|
||||
metrics = {
|
||||
"pacemaker_resource_percent" => {
|
||||
"collected_on" => "aggregator"
|
||||
}
|
||||
}
|
||||
describe 'with arguments' do
|
||||
it { should run.with_params(all_nodes, 'name', 'node_roles', role_to_cluster, afds).and_return(
|
||||
it { should run.with_params(all_nodes, 'name', 'node_roles', role_to_cluster, afds, alarms_services, metrics).and_return(
|
||||
{
|
||||
"default checks for node-1" => {
|
||||
"hostname" => "node-1",
|
||||
|
|
Loading…
Reference in New Issue