Merge "Alarming refactoring"
This commit is contained in:
commit
cfa2118e71
|
@ -346,16 +346,21 @@ class { 'lma_infra_alerting::nagios::contact':
|
|||
require => Class['lma_infra_alerting::nagios'],
|
||||
}
|
||||
|
||||
if $lma_collector['node_cluster_roles'] {
|
||||
$node_cluster_roles = $lma_collector['node_cluster_roles']
|
||||
if $lma_collector['node_profiles'] {
|
||||
$node_profiles = $lma_collector['node_profiles']
|
||||
} else {
|
||||
$node_cluster_roles = {}
|
||||
$node_profiles = {}
|
||||
}
|
||||
if $lma_collector['node_cluster_alarms'] {
|
||||
$node_cluster_alarms = $lma_collector['node_cluster_alarms']
|
||||
} else {
|
||||
$node_cluster_alarms = {}
|
||||
}
|
||||
if $lma_collector['service_cluster_alarms'] {
|
||||
$service_cluster_alarms = $lma_collector['service_cluster_alarms']
|
||||
} else {
|
||||
$service_cluster_alarms = {}
|
||||
}
|
||||
|
||||
$network_metadata = hiera_hash('network_metadata')
|
||||
class { 'lma_infra_alerting::nagios::hosts':
|
||||
|
@ -365,8 +370,9 @@ class { 'lma_infra_alerting::nagios::hosts':
|
|||
role_key => 'node_roles',
|
||||
host_display_name_keys => ['name', 'user_node_name'],
|
||||
host_custom_vars_keys => ['fqdn', 'node_roles'],
|
||||
node_cluster_roles => $node_cluster_roles,
|
||||
node_profiles => $node_profiles,
|
||||
node_cluster_alarms => $node_cluster_alarms,
|
||||
service_cluster_alarms => $service_cluster_alarms,
|
||||
require => Class['lma_infra_alerting::nagios'],
|
||||
}
|
||||
|
||||
|
|
|
@ -21,14 +21,15 @@ It expects 5 arguments:
|
|||
2. The key containing the node's name.
|
||||
3. The key containing the node's role.
|
||||
4. The mapping between AFD profiles and node's roles
|
||||
4. The mapping between AFD profiles and alarms
|
||||
5. The mapping between AFD profiles and alarms
|
||||
|
||||
*Examples:*
|
||||
|
||||
$hash = afds_to_nagios_services(
|
||||
[{'name' => 'node-1', node_roles => ['primary-controller']}, {'name' => 'node-2', node_roles => ['controller']}],
|
||||
'name', 'node_roles',
|
||||
{'control_nodes' => ['primary-controller', 'controller']},
|
||||
'name',
|
||||
'node_roles',
|
||||
{'control_nodes' => {'roles' => ['primary-controller', 'controller']}},
|
||||
{'control_nodes' => {'cpu' => ['alarm1'], 'fs' => ['alarm1']}}
|
||||
)
|
||||
|
||||
|
@ -66,7 +67,7 @@ Would return:
|
|||
node_clusters[node_name] = Set.new([])
|
||||
end
|
||||
role_to_cluster.each do |cluster, roles|
|
||||
node_clusters[node_name] << cluster if (roles & node[role_key]).length > 0
|
||||
node_clusters[node_name] << cluster if (roles['roles'] & node[role_key]).length > 0
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -76,8 +77,11 @@ Would return:
|
|||
|
||||
node_services = {}
|
||||
clusters.each do |cluster|
|
||||
(afds[cluster] || {}).keys.each do |source|
|
||||
node_services["#{node}.#{cluster}.#{source}"] = "#{ cluster }.#{ source }".gsub(/\s+/, '_')
|
||||
afds_map = afds.select {|c, a| a.has_key?('apply_to_node') and a['apply_to_node'] == cluster}
|
||||
afds_map.each do |c, a|
|
||||
a['alarms'].keys.each do |source|
|
||||
node_services["#{node}.#{cluster}.#{source}"] = "#{ cluster }.#{ source }".gsub(/\s+/, '_')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -30,15 +30,16 @@ class lma_infra_alerting::nagios::hosts (
|
|||
$host_display_name_keys = [],
|
||||
$host_custom_vars_keys = [],
|
||||
$role_key = undef,
|
||||
$node_cluster_roles = {},
|
||||
$node_profiles = {},
|
||||
$node_cluster_alarms = {},
|
||||
$service_cluster_alarms = {},
|
||||
){
|
||||
|
||||
include lma_infra_alerting::params
|
||||
|
||||
validate_string($host_name_key, $network_role_key)
|
||||
validate_array($hosts, $host_display_name_keys, $host_custom_vars_keys)
|
||||
validate_hash($node_cluster_roles, $node_cluster_alarms)
|
||||
validate_hash($node_profiles, $node_cluster_alarms)
|
||||
|
||||
$nagios_hosts = nodes_to_nagios_hosts($hosts,
|
||||
$host_name_key,
|
||||
|
@ -68,15 +69,23 @@ class lma_infra_alerting::nagios::hosts (
|
|||
create_resources(nagios::hostgroup, $nagios_hostgroups, $hostgroup_defaults)
|
||||
|
||||
# Configure AFD-based service checks
|
||||
$afd_nodes = afds_to_nagios_services($hosts,
|
||||
$host_name_key,
|
||||
$role_key,
|
||||
$node_profiles,
|
||||
$node_cluster_alarms)
|
||||
#$afd_service_defaults = {'notifications_enabled' => 0}
|
||||
create_resources(lma_infra_alerting::nagios::services, $afd_nodes, {})
|
||||
|
||||
$afd_services = afds_to_nagios_services($hosts,
|
||||
$host_name_key,
|
||||
$role_key,
|
||||
$node_cluster_roles,
|
||||
$node_cluster_alarms)
|
||||
$node_profiles,
|
||||
$service_cluster_alarms)
|
||||
$afd_service_defaults = {'notifications_enabled' => 0}
|
||||
create_resources(lma_infra_alerting::nagios::services, $afd_services, $afd_service_defaults)
|
||||
|
||||
if empty($node_cluster_roles) and empty($node_cluster_alarms) {
|
||||
if empty($node_profiles) and empty($node_cluster_alarms) {
|
||||
$node_uid= hiera('uid')
|
||||
nagios::service { 'dummy-check-for-ci':
|
||||
prefix => $lma_infra_alerting::params::nagios_config_filename_prefix,
|
||||
|
|
|
@ -61,27 +61,44 @@ describe 'afds_to_nagios_services' do
|
|||
]
|
||||
|
||||
role_to_cluster = {
|
||||
"controller" => ["primary-controller", "controller"],
|
||||
"compute" => ["compute"],
|
||||
"storage" => ["cinder", "ceph-osd"]
|
||||
"controller" => {"roles" => ["primary-controller", "controller"]},
|
||||
"compute" => {"roles" => ["compute"]},
|
||||
"storage" => {"roles" => ["cinder", "ceph-osd"]}
|
||||
}
|
||||
afds = {
|
||||
"controller" => {
|
||||
"system" => ["cpu-critical-controller", "cpu-warning-controller"],
|
||||
"fs" => ["fs-critical", "fs-warning"]},
|
||||
"apply_to_node" => "controller",
|
||||
"alarms" => {
|
||||
"system-ctrl" => ["cpu-critical-controller", "cpu-warning-controller"],
|
||||
"fs" => ["fs-critical", "fs-warning"]
|
||||
}
|
||||
},
|
||||
"compute" => {
|
||||
"system" => ["cpu-critical-compute", "cpu-warning-compute"],
|
||||
"fs" => ["fs-critical", "fs-critical-compute", "fs-warning"]},
|
||||
"apply_to_node" => "compute",
|
||||
"alarms" => {
|
||||
"system-compute" => ["cpu-critical-compute", "cpu-warning-compute"],
|
||||
"fs" => ["fs-critical", "fs-critical-compute", "fs-warning"]
|
||||
}
|
||||
},
|
||||
"storage" => {
|
||||
"system" => ["cpu-critical-storage", "cpu-warning-storage"],
|
||||
"fs" => ["fs-critical-storage", "fs-warning-storage"]},
|
||||
"apply_to_node" => "storage",
|
||||
"alarms" => {
|
||||
"system-storage" => ["cpu-critical-storage", "cpu-warning-storage"],
|
||||
"fs" => ["fs-critical-storage", "fs-warning-storage"]
|
||||
}
|
||||
},
|
||||
"default" => {
|
||||
"cpu" => ["cpu-critical-default"],
|
||||
"fs" => ["fs-critical", "fs-warning"]}
|
||||
"apply_to_node" => "default",
|
||||
"alarms" => {
|
||||
"cpu" => ["cpu-critical-default"],
|
||||
"fs" => ["fs-critical", "fs-warning"]
|
||||
}
|
||||
}
|
||||
}
|
||||
describe 'with arguments' do
|
||||
it { should run.with_params(all_nodes, 'name', 'node_roles', role_to_cluster, afds).and_return(
|
||||
{"default checks for node-1" => {
|
||||
{
|
||||
"default checks for node-1" => {
|
||||
"hostname" => "node-1",
|
||||
"services" => {
|
||||
"node-1.default.cpu" => "default.cpu",
|
||||
|
@ -91,7 +108,7 @@ describe 'afds_to_nagios_services' do
|
|||
"hostname" => "node-3",
|
||||
"services" => {
|
||||
"node-3.controller.fs" => "controller.fs",
|
||||
"node-3.controller.system" => "controller.system"
|
||||
"node-3.controller.system-ctrl" => "controller.system-ctrl"
|
||||
}},
|
||||
"default checks for node-4" => {
|
||||
"hostname" => "node-4",
|
||||
|
@ -103,9 +120,9 @@ describe 'afds_to_nagios_services' do
|
|||
"hostname" => "node-2",
|
||||
"services" => {
|
||||
"node-2.compute.fs" => "compute.fs",
|
||||
"node-2.compute.system" => "compute.system",
|
||||
"node-2.compute.system-compute" => "compute.system-compute",
|
||||
"node-2.storage.fs" => "storage.fs",
|
||||
"node-2.storage.system" => "storage.system"
|
||||
"node-2.storage.system-storage" => "storage.system-storage"
|
||||
}}})
|
||||
}
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue