Alarming refactoring

blueprint: alarming-refactoring
DocImpact

Depends-On: I8c053f2fbc4b4b85958be8413919f9bf1b168027

Change-Id: I611866a4c450d8bd3c4cf8d1237ba91b1bf911c6
This commit is contained in:
Swann Croiset 2016-09-13 15:50:53 +02:00
parent 9b4e403327
commit 18788ae9f8
4 changed files with 66 additions and 30 deletions

View File

@ -344,16 +344,21 @@ class { 'lma_infra_alerting::nagios::contact':
require => Class['lma_infra_alerting::nagios'],
}
if $lma_collector['node_cluster_roles'] {
$node_cluster_roles = $lma_collector['node_cluster_roles']
if $lma_collector['node_profiles'] {
$node_profiles = $lma_collector['node_profiles']
} else {
$node_cluster_roles = {}
$node_profiles = {}
}
if $lma_collector['node_cluster_alarms'] {
$node_cluster_alarms = $lma_collector['node_cluster_alarms']
} else {
$node_cluster_alarms = {}
}
if $lma_collector['service_cluster_alarms'] {
$service_cluster_alarms = $lma_collector['service_cluster_alarms']
} else {
$service_cluster_alarms = {}
}
$network_metadata = hiera_hash('network_metadata')
class { 'lma_infra_alerting::nagios::hosts':
@ -363,8 +368,9 @@ class { 'lma_infra_alerting::nagios::hosts':
role_key => 'node_roles',
host_display_name_keys => ['name', 'user_node_name'],
host_custom_vars_keys => ['fqdn', 'node_roles'],
node_cluster_roles => $node_cluster_roles,
node_profiles => $node_profiles,
node_cluster_alarms => $node_cluster_alarms,
service_cluster_alarms => $service_cluster_alarms,
require => Class['lma_infra_alerting::nagios'],
}

View File

@ -21,14 +21,15 @@ It expects 5 arguments:
2. The key containing the node's name.
3. The key containing the node's role.
4. The mapping between AFD profiles and node's roles
4. The mapping between AFD profiles and alarms
5. The mapping between AFD profiles and alarms
*Examples:*
$hash = afds_to_nagios_services(
[{'name' => 'node-1', node_roles => ['primary-controller']}, {'name' => 'node-2', node_roles => ['controller']}],
'name', 'node_roles',
{'control_nodes' => ['primary-controller', 'controller']},
'name',
'node_roles',
{'control_nodes' => {'roles' => ['primary-controller', 'controller']}},
{'control_nodes' => {'cpu' => ['alarm1'], 'fs' => ['alarm1']}}
)
@ -66,7 +67,7 @@ Would return:
node_clusters[node_name] = Set.new([])
end
role_to_cluster.each do |cluster, roles|
node_clusters[node_name] << cluster if (roles & node[role_key]).length > 0
node_clusters[node_name] << cluster if (roles['roles'] & node[role_key]).length > 0
end
end
@ -76,8 +77,11 @@ Would return:
node_services = {}
clusters.each do |cluster|
(afds[cluster] || {}).keys.each do |source|
node_services["#{node}.#{cluster}.#{source}"] = "#{ cluster }.#{ source }".gsub(/\s+/, '_')
afds_map = afds.select {|c, a| a.has_key?('apply_to_node') and a['apply_to_node'] == cluster}
afds_map.each do |c, a|
a['alarms'].keys.each do |source|
node_services["#{node}.#{cluster}.#{source}"] = "#{ cluster }.#{ source }".gsub(/\s+/, '_')
end
end
end

View File

@ -30,15 +30,16 @@ class lma_infra_alerting::nagios::hosts (
$host_display_name_keys = [],
$host_custom_vars_keys = [],
$role_key = undef,
$node_cluster_roles = {},
$node_profiles = {},
$node_cluster_alarms = {},
$service_cluster_alarms = {},
){
include lma_infra_alerting::params
validate_string($host_name_key, $network_role_key)
validate_array($hosts, $host_display_name_keys, $host_custom_vars_keys)
validate_hash($node_cluster_roles, $node_cluster_alarms)
validate_hash($node_profiles, $node_cluster_alarms)
$nagios_hosts = nodes_to_nagios_hosts($hosts,
$host_name_key,
@ -68,15 +69,23 @@ class lma_infra_alerting::nagios::hosts (
create_resources(nagios::hostgroup, $nagios_hostgroups, $hostgroup_defaults)
# Configure AFD-based service checks
$afd_nodes = afds_to_nagios_services($hosts,
$host_name_key,
$role_key,
$node_profiles,
$node_cluster_alarms)
#$afd_service_defaults = {'notifications_enabled' => 0}
create_resources(lma_infra_alerting::nagios::services, $afd_nodes, {})
$afd_services = afds_to_nagios_services($hosts,
$host_name_key,
$role_key,
$node_cluster_roles,
$node_cluster_alarms)
$node_profiles,
$service_cluster_alarms)
$afd_service_defaults = {'notifications_enabled' => 0}
create_resources(lma_infra_alerting::nagios::services, $afd_services, $afd_service_defaults)
if empty($node_cluster_roles) and empty($node_cluster_alarms) {
if empty($node_profiles) and empty($node_cluster_alarms) {
$node_uid= hiera('uid')
nagios::service { 'dummy-check-for-ci':
prefix => $lma_infra_alerting::params::nagios_config_filename_prefix,

View File

@ -61,27 +61,44 @@ describe 'afds_to_nagios_services' do
]
role_to_cluster = {
"controller" => ["primary-controller", "controller"],
"compute" => ["compute"],
"storage" => ["cinder", "ceph-osd"]
"controller" => {"roles" => ["primary-controller", "controller"]},
"compute" => {"roles" => ["compute"]},
"storage" => {"roles" => ["cinder", "ceph-osd"]}
}
afds = {
"controller" => {
"system" => ["cpu-critical-controller", "cpu-warning-controller"],
"fs" => ["fs-critical", "fs-warning"]},
"apply_to_node" => "controller",
"alarms" => {
"system-ctrl" => ["cpu-critical-controller", "cpu-warning-controller"],
"fs" => ["fs-critical", "fs-warning"]
}
},
"compute" => {
"system" => ["cpu-critical-compute", "cpu-warning-compute"],
"fs" => ["fs-critical", "fs-critical-compute", "fs-warning"]},
"apply_to_node" => "compute",
"alarms" => {
"system-compute" => ["cpu-critical-compute", "cpu-warning-compute"],
"fs" => ["fs-critical", "fs-critical-compute", "fs-warning"]
}
},
"storage" => {
"system" => ["cpu-critical-storage", "cpu-warning-storage"],
"fs" => ["fs-critical-storage", "fs-warning-storage"]},
"apply_to_node" => "storage",
"alarms" => {
"system-storage" => ["cpu-critical-storage", "cpu-warning-storage"],
"fs" => ["fs-critical-storage", "fs-warning-storage"]
}
},
"default" => {
"cpu" => ["cpu-critical-default"],
"fs" => ["fs-critical", "fs-warning"]}
"apply_to_node" => "default",
"alarms" => {
"cpu" => ["cpu-critical-default"],
"fs" => ["fs-critical", "fs-warning"]
}
}
}
describe 'with arguments' do
it { should run.with_params(all_nodes, 'name', 'node_roles', role_to_cluster, afds).and_return(
{"default checks for node-1" => {
{
"default checks for node-1" => {
"hostname" => "node-1",
"services" => {
"node-1.default.cpu" => "default.cpu",
@ -91,7 +108,7 @@ describe 'afds_to_nagios_services' do
"hostname" => "node-3",
"services" => {
"node-3.controller.fs" => "controller.fs",
"node-3.controller.system" => "controller.system"
"node-3.controller.system-ctrl" => "controller.system-ctrl"
}},
"default checks for node-4" => {
"hostname" => "node-4",
@ -103,9 +120,9 @@ describe 'afds_to_nagios_services' do
"hostname" => "node-2",
"services" => {
"node-2.compute.fs" => "compute.fs",
"node-2.compute.system" => "compute.system",
"node-2.compute.system-compute" => "compute.system-compute",
"node-2.storage.fs" => "storage.fs",
"node-2.storage.system" => "storage.system"
"node-2.storage.system-storage" => "storage.system-storage"
}}})
}
end