From 8ee53ec594a2dba22d6c6c18e77a29a677bb1b39 Mon Sep 17 00:00:00 2001 From: Swann Croiset Date: Mon, 10 Oct 2016 11:32:05 +0200 Subject: [PATCH] Skip service configuration for remote alarms When an alarm contains metric(s) with collected_on:'aggregator', the corresponding Nagios service check is skipped. Change-Id: I758e7bd412a68314e59ec86a40370661525a5af9 --- deployment_scripts/puppet/manifests/nagios.pp | 2 ++ .../functions/afds_to_nagios_services.rb | 32 +++++++++++++++++-- .../manifests/nagios/hosts.pp | 16 +++++++--- .../functions/afds_to_nagios_services_spec.rb | 24 ++++++++++++-- 4 files changed, 65 insertions(+), 9 deletions(-) diff --git a/deployment_scripts/puppet/manifests/nagios.pp b/deployment_scripts/puppet/manifests/nagios.pp index d8690dc..86f7ce3 100644 --- a/deployment_scripts/puppet/manifests/nagios.pp +++ b/deployment_scripts/puppet/manifests/nagios.pp @@ -386,6 +386,8 @@ class { 'lma_infra_alerting::nagios::hosts': node_profiles => $node_profiles, node_cluster_alarms => $node_cluster_alarms, service_cluster_alarms => $service_cluster_alarms, + alarms => $lma_collector['alarms'], + metrics => $lma_collector['metrics'], require => Class['lma_infra_alerting::nagios'], } diff --git a/deployment_scripts/puppet/modules/lma_infra_alerting/lib/puppet/parser/functions/afds_to_nagios_services.rb b/deployment_scripts/puppet/modules/lma_infra_alerting/lib/puppet/parser/functions/afds_to_nagios_services.rb index b1652c8..8cd8264 100644 --- a/deployment_scripts/puppet/modules/lma_infra_alerting/lib/puppet/parser/functions/afds_to_nagios_services.rb +++ b/deployment_scripts/puppet/modules/lma_infra_alerting/lib/puppet/parser/functions/afds_to_nagios_services.rb @@ -22,6 +22,8 @@ It expects 5 arguments: 3. The key containing the node's role. 4. The mapping between AFD profiles and node's roles 5. The mapping between AFD profiles and alarms +6. Array of alarm definitions +7. Hash table mapping metric names to the place where there are collected *Examples:* @@ -45,7 +47,7 @@ Would return: ) do |arguments| raise(Puppet::ParseError, "afds_to_nagios_services(): Wrong number of arguments " + - "given (#{arguments.size} expecting 5") if arguments.size != 5 + "given (#{arguments.size} expecting 7") if arguments.size != 7 nodes = arguments[0] raise(Puppet::ParseError, "arg0 isn't an array!") if ! nodes.is_a?(Array) @@ -56,6 +58,10 @@ Would return: raise(Puppet::ParseError, "arg3 isn't a hash!") if ! role_to_cluster.is_a?(Hash) afds = arguments[4] raise(Puppet::ParseError, "arg4 isn't a hash!") if ! afds.is_a?(Hash) + alarms = arguments[5] + alarms = [] if ! alarms.is_a?(Array) + metrics = arguments[6] + metrics = {} if ! metrics.is_a?(Hash) result = {} @@ -90,8 +96,28 @@ Would return: if a['alerting'] == 'enabled_with_notification' notifications_enabled = 1 end - a['alarms'].keys.each do |source| - node_services["#{node}.#{logical_cluster}.#{source}"] = "#{ logical_cluster }.#{ source }".gsub(/\s+/, '_') + a['alarms'].each do |source, afd| + # collect metric names + m = Set.new([]) + afd.each do |alarm| + # find metric definition + alarm_def = alarms.select {|defi| defi['name'] == alarm} + next if alarm_def.empty? + alarm_def[0]['trigger']['rules'].each do |r| + m << r['metric'] + end + end + matches = true + m.each do |metric_name| + if metrics.has_key?(metric_name) and metrics[metric_name]['collected_on'] == 'aggregator' + matches = false + end + + end + # skip the source if collected_on differs + if matches + node_services["#{node}.#{logical_cluster}.#{source}"] = "#{ logical_cluster }.#{ source }".gsub(/\s+/, '_') + end end end diff --git a/deployment_scripts/puppet/modules/lma_infra_alerting/manifests/nagios/hosts.pp b/deployment_scripts/puppet/modules/lma_infra_alerting/manifests/nagios/hosts.pp index c4887eb..9bed59f 100644 --- a/deployment_scripts/puppet/modules/lma_infra_alerting/manifests/nagios/hosts.pp +++ b/deployment_scripts/puppet/modules/lma_infra_alerting/manifests/nagios/hosts.pp @@ -32,14 +32,16 @@ class lma_infra_alerting::nagios::hosts ( $role_key = undef, $node_profiles = {}, $node_cluster_alarms = {}, + $alarms = [], + $metrics = {}, $service_cluster_alarms = {}, ){ include lma_infra_alerting::params validate_string($host_name_key, $network_role_key) - validate_array($hosts, $host_display_name_keys, $host_custom_vars_keys) - validate_hash($node_profiles, $node_cluster_alarms) + validate_array($hosts, $host_display_name_keys, $host_custom_vars_keys, $alarms) + validate_hash($node_profiles, $node_cluster_alarms, $metrics) $nagios_hosts = nodes_to_nagios_hosts($hosts, $host_name_key, @@ -73,14 +75,20 @@ class lma_infra_alerting::nagios::hosts ( $host_name_key, $role_key, $node_profiles, - $node_cluster_alarms) + $node_cluster_alarms, + $alarms, + $metrics + ) create_resources(lma_infra_alerting::nagios::services, $afd_nodes) $afd_services = afds_to_nagios_services($hosts, $host_name_key, $role_key, $node_profiles, - $service_cluster_alarms) + $service_cluster_alarms, + $alarms, + $metrics + ) create_resources(lma_infra_alerting::nagios::services, $afd_services) if empty($node_profiles) and empty($node_cluster_alarms) { diff --git a/deployment_scripts/puppet/modules/lma_infra_alerting/spec/functions/afds_to_nagios_services_spec.rb b/deployment_scripts/puppet/modules/lma_infra_alerting/spec/functions/afds_to_nagios_services_spec.rb index cab639f..458ad29 100644 --- a/deployment_scripts/puppet/modules/lma_infra_alerting/spec/functions/afds_to_nagios_services_spec.rb +++ b/deployment_scripts/puppet/modules/lma_infra_alerting/spec/functions/afds_to_nagios_services_spec.rb @@ -93,7 +93,8 @@ describe 'afds_to_nagios_services' do "alerting" => "enabled_with_notification", "alarms" => { "system-ctrl" => ["cpu-critical-controller", "cpu-warning-controller"], - "fs" => ["fs-critical", "fs-warning"] + "fs" => ["fs-critical", "fs-warning"], + "rabbitmq" => ["rabbitmq-cluster-warning"] } }, "compute" => { @@ -137,8 +138,27 @@ describe 'afds_to_nagios_services' do } } } + alarms_services = [ + {"name"=>"rabbitmq-cluster-warning", + "description"=>"The RabbitMQ cluster is degraded because some RabbitMQ nodes are missing", + "severity"=>"warning", + "trigger"=> + {"logical_operator"=>"or", + "rules"=> + [{"metric"=>"pacemaker_resource_percent", + "relational_operator"=>"<", + "threshold"=>50, + "window"=>60, + "periods"=>0, + "function"=>"last"}]}}, + ] + metrics = { + "pacemaker_resource_percent" => { + "collected_on" => "aggregator" + } + } describe 'with arguments' do - it { should run.with_params(all_nodes, 'name', 'node_roles', role_to_cluster, afds).and_return( + it { should run.with_params(all_nodes, 'name', 'node_roles', role_to_cluster, afds, alarms_services, metrics).and_return( { "default checks for node-1" => { "hostname" => "node-1",