Skip service configuration for remote alarms
When an alarm contains metric(s) with collected_on:'aggregator', the corresponding Nagios service check is skipped. Change-Id: I758e7bd412a68314e59ec86a40370661525a5af9
This commit is contained in:
parent
07526249a7
commit
8ee53ec594
|
@ -386,6 +386,8 @@ class { 'lma_infra_alerting::nagios::hosts':
|
||||||
node_profiles => $node_profiles,
|
node_profiles => $node_profiles,
|
||||||
node_cluster_alarms => $node_cluster_alarms,
|
node_cluster_alarms => $node_cluster_alarms,
|
||||||
service_cluster_alarms => $service_cluster_alarms,
|
service_cluster_alarms => $service_cluster_alarms,
|
||||||
|
alarms => $lma_collector['alarms'],
|
||||||
|
metrics => $lma_collector['metrics'],
|
||||||
require => Class['lma_infra_alerting::nagios'],
|
require => Class['lma_infra_alerting::nagios'],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,8 @@ It expects 5 arguments:
|
||||||
3. The key containing the node's role.
|
3. The key containing the node's role.
|
||||||
4. The mapping between AFD profiles and node's roles
|
4. The mapping between AFD profiles and node's roles
|
||||||
5. The mapping between AFD profiles and alarms
|
5. The mapping between AFD profiles and alarms
|
||||||
|
6. Array of alarm definitions
|
||||||
|
7. Hash table mapping metric names to the place where there are collected
|
||||||
|
|
||||||
*Examples:*
|
*Examples:*
|
||||||
|
|
||||||
|
@ -45,7 +47,7 @@ Would return:
|
||||||
) do |arguments|
|
) do |arguments|
|
||||||
|
|
||||||
raise(Puppet::ParseError, "afds_to_nagios_services(): Wrong number of arguments " +
|
raise(Puppet::ParseError, "afds_to_nagios_services(): Wrong number of arguments " +
|
||||||
"given (#{arguments.size} expecting 5") if arguments.size != 5
|
"given (#{arguments.size} expecting 7") if arguments.size != 7
|
||||||
|
|
||||||
nodes = arguments[0]
|
nodes = arguments[0]
|
||||||
raise(Puppet::ParseError, "arg0 isn't an array!") if ! nodes.is_a?(Array)
|
raise(Puppet::ParseError, "arg0 isn't an array!") if ! nodes.is_a?(Array)
|
||||||
|
@ -56,6 +58,10 @@ Would return:
|
||||||
raise(Puppet::ParseError, "arg3 isn't a hash!") if ! role_to_cluster.is_a?(Hash)
|
raise(Puppet::ParseError, "arg3 isn't a hash!") if ! role_to_cluster.is_a?(Hash)
|
||||||
afds = arguments[4]
|
afds = arguments[4]
|
||||||
raise(Puppet::ParseError, "arg4 isn't a hash!") if ! afds.is_a?(Hash)
|
raise(Puppet::ParseError, "arg4 isn't a hash!") if ! afds.is_a?(Hash)
|
||||||
|
alarms = arguments[5]
|
||||||
|
alarms = [] if ! alarms.is_a?(Array)
|
||||||
|
metrics = arguments[6]
|
||||||
|
metrics = {} if ! metrics.is_a?(Hash)
|
||||||
|
|
||||||
result = {}
|
result = {}
|
||||||
|
|
||||||
|
@ -90,8 +96,28 @@ Would return:
|
||||||
if a['alerting'] == 'enabled_with_notification'
|
if a['alerting'] == 'enabled_with_notification'
|
||||||
notifications_enabled = 1
|
notifications_enabled = 1
|
||||||
end
|
end
|
||||||
a['alarms'].keys.each do |source|
|
a['alarms'].each do |source, afd|
|
||||||
node_services["#{node}.#{logical_cluster}.#{source}"] = "#{ logical_cluster }.#{ source }".gsub(/\s+/, '_')
|
# collect metric names
|
||||||
|
m = Set.new([])
|
||||||
|
afd.each do |alarm|
|
||||||
|
# find metric definition
|
||||||
|
alarm_def = alarms.select {|defi| defi['name'] == alarm}
|
||||||
|
next if alarm_def.empty?
|
||||||
|
alarm_def[0]['trigger']['rules'].each do |r|
|
||||||
|
m << r['metric']
|
||||||
|
end
|
||||||
|
end
|
||||||
|
matches = true
|
||||||
|
m.each do |metric_name|
|
||||||
|
if metrics.has_key?(metric_name) and metrics[metric_name]['collected_on'] == 'aggregator'
|
||||||
|
matches = false
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
# skip the source if collected_on differs
|
||||||
|
if matches
|
||||||
|
node_services["#{node}.#{logical_cluster}.#{source}"] = "#{ logical_cluster }.#{ source }".gsub(/\s+/, '_')
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -32,14 +32,16 @@ class lma_infra_alerting::nagios::hosts (
|
||||||
$role_key = undef,
|
$role_key = undef,
|
||||||
$node_profiles = {},
|
$node_profiles = {},
|
||||||
$node_cluster_alarms = {},
|
$node_cluster_alarms = {},
|
||||||
|
$alarms = [],
|
||||||
|
$metrics = {},
|
||||||
$service_cluster_alarms = {},
|
$service_cluster_alarms = {},
|
||||||
){
|
){
|
||||||
|
|
||||||
include lma_infra_alerting::params
|
include lma_infra_alerting::params
|
||||||
|
|
||||||
validate_string($host_name_key, $network_role_key)
|
validate_string($host_name_key, $network_role_key)
|
||||||
validate_array($hosts, $host_display_name_keys, $host_custom_vars_keys)
|
validate_array($hosts, $host_display_name_keys, $host_custom_vars_keys, $alarms)
|
||||||
validate_hash($node_profiles, $node_cluster_alarms)
|
validate_hash($node_profiles, $node_cluster_alarms, $metrics)
|
||||||
|
|
||||||
$nagios_hosts = nodes_to_nagios_hosts($hosts,
|
$nagios_hosts = nodes_to_nagios_hosts($hosts,
|
||||||
$host_name_key,
|
$host_name_key,
|
||||||
|
@ -73,14 +75,20 @@ class lma_infra_alerting::nagios::hosts (
|
||||||
$host_name_key,
|
$host_name_key,
|
||||||
$role_key,
|
$role_key,
|
||||||
$node_profiles,
|
$node_profiles,
|
||||||
$node_cluster_alarms)
|
$node_cluster_alarms,
|
||||||
|
$alarms,
|
||||||
|
$metrics
|
||||||
|
)
|
||||||
create_resources(lma_infra_alerting::nagios::services, $afd_nodes)
|
create_resources(lma_infra_alerting::nagios::services, $afd_nodes)
|
||||||
|
|
||||||
$afd_services = afds_to_nagios_services($hosts,
|
$afd_services = afds_to_nagios_services($hosts,
|
||||||
$host_name_key,
|
$host_name_key,
|
||||||
$role_key,
|
$role_key,
|
||||||
$node_profiles,
|
$node_profiles,
|
||||||
$service_cluster_alarms)
|
$service_cluster_alarms,
|
||||||
|
$alarms,
|
||||||
|
$metrics
|
||||||
|
)
|
||||||
create_resources(lma_infra_alerting::nagios::services, $afd_services)
|
create_resources(lma_infra_alerting::nagios::services, $afd_services)
|
||||||
|
|
||||||
if empty($node_profiles) and empty($node_cluster_alarms) {
|
if empty($node_profiles) and empty($node_cluster_alarms) {
|
||||||
|
|
|
@ -93,7 +93,8 @@ describe 'afds_to_nagios_services' do
|
||||||
"alerting" => "enabled_with_notification",
|
"alerting" => "enabled_with_notification",
|
||||||
"alarms" => {
|
"alarms" => {
|
||||||
"system-ctrl" => ["cpu-critical-controller", "cpu-warning-controller"],
|
"system-ctrl" => ["cpu-critical-controller", "cpu-warning-controller"],
|
||||||
"fs" => ["fs-critical", "fs-warning"]
|
"fs" => ["fs-critical", "fs-warning"],
|
||||||
|
"rabbitmq" => ["rabbitmq-cluster-warning"]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"compute" => {
|
"compute" => {
|
||||||
|
@ -137,8 +138,27 @@ describe 'afds_to_nagios_services' do
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
alarms_services = [
|
||||||
|
{"name"=>"rabbitmq-cluster-warning",
|
||||||
|
"description"=>"The RabbitMQ cluster is degraded because some RabbitMQ nodes are missing",
|
||||||
|
"severity"=>"warning",
|
||||||
|
"trigger"=>
|
||||||
|
{"logical_operator"=>"or",
|
||||||
|
"rules"=>
|
||||||
|
[{"metric"=>"pacemaker_resource_percent",
|
||||||
|
"relational_operator"=>"<",
|
||||||
|
"threshold"=>50,
|
||||||
|
"window"=>60,
|
||||||
|
"periods"=>0,
|
||||||
|
"function"=>"last"}]}},
|
||||||
|
]
|
||||||
|
metrics = {
|
||||||
|
"pacemaker_resource_percent" => {
|
||||||
|
"collected_on" => "aggregator"
|
||||||
|
}
|
||||||
|
}
|
||||||
describe 'with arguments' do
|
describe 'with arguments' do
|
||||||
it { should run.with_params(all_nodes, 'name', 'node_roles', role_to_cluster, afds).and_return(
|
it { should run.with_params(all_nodes, 'name', 'node_roles', role_to_cluster, afds, alarms_services, metrics).and_return(
|
||||||
{
|
{
|
||||||
"default checks for node-1" => {
|
"default checks for node-1" => {
|
||||||
"hostname" => "node-1",
|
"hostname" => "node-1",
|
||||||
|
|
Loading…
Reference in New Issue