Check memcached service on controller nodes
The patch replaces the service_heartbeat mechanism. Change-Id: I060e10320cf6f8b874a39037b1f9257ed1996342
This commit is contained in:
parent
5c4b3eb2e6
commit
26c5788684
|
@ -424,11 +424,6 @@ if hiera('lma::collector::influxdb::server', false) {
|
|||
# Enable the Apache status module
|
||||
class { 'fuel_lma_collector::mod_status': }
|
||||
|
||||
# Enable service heartbeat metrics
|
||||
class { 'lma_collector::metrics::service_heartbeat':
|
||||
services => ['memcached']
|
||||
}
|
||||
|
||||
# AFD filters
|
||||
class { 'lma_collector::afd::api': }
|
||||
class { 'lma_collector::afd::workers': }
|
||||
|
|
|
@ -868,6 +868,18 @@ lma_collector:
|
|||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'memcached-check'
|
||||
description: "memcached cannot be checked"
|
||||
severity: 'down'
|
||||
enabled: 'true'
|
||||
trigger:
|
||||
rules:
|
||||
- metric: memcached_check
|
||||
relational_operator: '=='
|
||||
threshold: 0
|
||||
window: 60
|
||||
periods: 0
|
||||
function: last
|
||||
- name: 'network-warning-dropped-rx'
|
||||
description: "Some received packets have been dropped"
|
||||
severity: 'warning'
|
||||
|
@ -970,6 +982,7 @@ lma_collector:
|
|||
pacemaker-service: ['primary-controller', 'controller']
|
||||
haproxy-openstack: ['primary-controller', 'controller']
|
||||
libvirt-service: ['compute']
|
||||
memcached-service: ['primary-controller', 'controller']
|
||||
|
||||
# Definition of the AFD node filters
|
||||
node_cluster_alarms:
|
||||
|
@ -1085,3 +1098,5 @@ lma_collector:
|
|||
check: ['pacemaker-check']
|
||||
libvirt-service:
|
||||
check: ['libvirt-check']
|
||||
memcached-service:
|
||||
check: ['memcached-check']
|
||||
|
|
|
@ -166,13 +166,11 @@ lma_collector:
|
|||
group_by: hostname
|
||||
members:
|
||||
- worker
|
||||
memcached:
|
||||
memcached-service:
|
||||
policy: majority_of_members
|
||||
group_by: hostname
|
||||
members:
|
||||
# This AFD heartbeat is emitted by a dedicated sandbox filter
|
||||
# TODO(all): replace by a check metric
|
||||
- heartbeat
|
||||
- check
|
||||
rabbitmq-cluster:
|
||||
policy: highest_severity
|
||||
group_by: member
|
||||
|
@ -535,7 +533,7 @@ lma_collector:
|
|||
policy: highest_severity
|
||||
group_by: member
|
||||
members:
|
||||
- memcached
|
||||
- memcached-service
|
||||
- controller
|
||||
rabbitmq:
|
||||
policy: highest_severity
|
||||
|
|
|
@ -453,7 +453,6 @@ Public Classes:
|
|||
* [`lma_collector::afd::workers`](#class-lma_collectorafdworkers)
|
||||
* [`lma_collector::gse_policies`](#class-lma_collectorgse_policies)
|
||||
* [`lma_collector::metrics::heka_monitoring`](#class-lma_collectormetricsheka_monitoring)
|
||||
* [`lma_collector::metrics::service_heartbeat`](#class-lma_collectormetricsservice_heartbeat)
|
||||
* [`lma_collector::smtp_alert`](#class-lma_collectorsmtp_alert)
|
||||
|
||||
Private Classes:
|
||||
|
@ -896,18 +895,6 @@ Declare this class to collect metrics for the Heka services themselves.
|
|||
log collector listens on.
|
||||
Valid options: a string. Default: `4352`.
|
||||
|
||||
#### Class: `lma_collector::metrics::service_heartbeat`
|
||||
|
||||
Declare this class to configure the AFD filter that sends the heartbeat metrics.
|
||||
|
||||
##### Parameters
|
||||
|
||||
* `services`: *Required*. List of services for which the AFD filter should
|
||||
emit the heartbeat metrics. Valid options: an array of strings.
|
||||
* `timeout`: *Optional*. Number of seconds after which the AFD filter will
|
||||
consider a service to be down if no metric has been received during this
|
||||
period. Valid options: an integer. Default: `30`.
|
||||
|
||||
#### Class: `lma_collector::smtp_alert`
|
||||
|
||||
Declare this class to send by email the alert notifications based on the GSE
|
||||
|
|
|
@ -1,61 +0,0 @@
|
|||
-- Copyright 2015 Mirantis, Inc.
|
||||
--
|
||||
-- Licensed under the Apache License, Version 2.0 (the "License");
|
||||
-- you may not use this file except in compliance with the License.
|
||||
-- You may obtain a copy of the License at
|
||||
--
|
||||
-- http://www.apache.org/licenses/LICENSE-2.0
|
||||
--
|
||||
-- Unless required by applicable law or agreed to in writing, software
|
||||
-- distributed under the License is distributed on an "AS IS" BASIS,
|
||||
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
-- See the License for the specific language governing permissions and
|
||||
-- limitations under the License.
|
||||
require 'string'
|
||||
local utils = require 'lma_utils'
|
||||
local afd = require 'afd'
|
||||
local consts = require 'gse_constants'
|
||||
|
||||
local timeout = read_config("timeout") or 30
|
||||
local timeout_ns = timeout * 1e9 -- in ns
|
||||
local hostname
|
||||
|
||||
services = {}
|
||||
|
||||
function process_message ()
|
||||
local ts = read_message('Timestamp')
|
||||
local service = string.match(read_message("Fields[name]"), '^[^_]+')
|
||||
if not hostname then
|
||||
hostname = read_message("Fields[hostname]")
|
||||
end
|
||||
|
||||
local entry = services[service]
|
||||
if entry then
|
||||
entry.last_seen = ts
|
||||
else
|
||||
services[service] = {last_seen = ts}
|
||||
end
|
||||
|
||||
return 0
|
||||
end
|
||||
|
||||
function timer_event(ns)
|
||||
for service, data in pairs(services) do
|
||||
local status = consts.OKAY
|
||||
if ns - data.last_seen > timeout_ns then
|
||||
status = consts.UNKW
|
||||
afd.add_to_alarms(status,
|
||||
'last', -- function
|
||||
'*', -- metric
|
||||
{{name='service', value=service}}, -- fields
|
||||
{}, -- tags
|
||||
'==',
|
||||
0, -- value
|
||||
0, -- threshold
|
||||
timeout, -- window
|
||||
nil, -- period
|
||||
'No metric received from the service')
|
||||
end
|
||||
afd.inject_afd_service_metric(service, status, hostname, timeout, 'heartbeat')
|
||||
end
|
||||
end
|
|
@ -1,38 +0,0 @@
|
|||
# Copyright 2015 Mirantis, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
class lma_collector::metrics::service_heartbeat (
|
||||
$services,
|
||||
$timeout = $lma_collector::params::heartbeat_timeout,
|
||||
) inherits lma_collector::params {
|
||||
include lma_collector::service::metric
|
||||
|
||||
$lua_modules_dir = $lma_collector::params::lua_modules_dir
|
||||
|
||||
validate_array($services)
|
||||
|
||||
if (size($services) > 0) {
|
||||
heka::filter::sandbox { 'service_heartbeat':
|
||||
config_dir => $lma_collector::params::metric_config_dir,
|
||||
filename => "${lma_collector::params::plugins_dir}/filters/service_heartbeat.lua",
|
||||
message_matcher => join(['Fields[name] =~ /^', join(sort($services), '|'), '/'], ''),
|
||||
ticker_interval => 10,
|
||||
config => {
|
||||
timeout => $timeout,
|
||||
},
|
||||
module_directory => $lua_modules_dir,
|
||||
notify => Class['lma_collector::service::metric'],
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue