Check memcached service on controller nodes

The patch replaces the service_heartbeat mechanism.

Change-Id: I060e10320cf6f8b874a39037b1f9257ed1996342
This commit is contained in:
Swann Croiset 2016-08-17 13:53:47 +02:00 committed by Simon Pasquier
parent 5c4b3eb2e6
commit 26c5788684
6 changed files with 18 additions and 122 deletions

View File

@ -424,11 +424,6 @@ if hiera('lma::collector::influxdb::server', false) {
# Enable the Apache status module
class { 'fuel_lma_collector::mod_status': }
# Enable service heartbeat metrics
class { 'lma_collector::metrics::service_heartbeat':
services => ['memcached']
}
# AFD filters
class { 'lma_collector::afd::api': }
class { 'lma_collector::afd::workers': }

View File

@ -868,6 +868,18 @@ lma_collector:
window: 60
periods: 0
function: last
- name: 'memcached-check'
description: "memcached cannot be checked"
severity: 'down'
enabled: 'true'
trigger:
rules:
- metric: memcached_check
relational_operator: '=='
threshold: 0
window: 60
periods: 0
function: last
- name: 'network-warning-dropped-rx'
description: "Some received packets have been dropped"
severity: 'warning'
@ -970,6 +982,7 @@ lma_collector:
pacemaker-service: ['primary-controller', 'controller']
haproxy-openstack: ['primary-controller', 'controller']
libvirt-service: ['compute']
memcached-service: ['primary-controller', 'controller']
# Definition of the AFD node filters
node_cluster_alarms:
@ -1085,3 +1098,5 @@ lma_collector:
check: ['pacemaker-check']
libvirt-service:
check: ['libvirt-check']
memcached-service:
check: ['memcached-check']

View File

@ -166,13 +166,11 @@ lma_collector:
group_by: hostname
members:
- worker
memcached:
memcached-service:
policy: majority_of_members
group_by: hostname
members:
# This AFD heartbeat is emitted by a dedicated sandbox filter
# TODO(all): replace by a check metric
- heartbeat
- check
rabbitmq-cluster:
policy: highest_severity
group_by: member
@ -535,7 +533,7 @@ lma_collector:
policy: highest_severity
group_by: member
members:
- memcached
- memcached-service
- controller
rabbitmq:
policy: highest_severity

View File

@ -453,7 +453,6 @@ Public Classes:
* [`lma_collector::afd::workers`](#class-lma_collectorafdworkers)
* [`lma_collector::gse_policies`](#class-lma_collectorgse_policies)
* [`lma_collector::metrics::heka_monitoring`](#class-lma_collectormetricsheka_monitoring)
* [`lma_collector::metrics::service_heartbeat`](#class-lma_collectormetricsservice_heartbeat)
* [`lma_collector::smtp_alert`](#class-lma_collectorsmtp_alert)
Private Classes:
@ -896,18 +895,6 @@ Declare this class to collect metrics for the Heka services themselves.
log collector listens on.
Valid options: a string. Default: `4352`.
#### Class: `lma_collector::metrics::service_heartbeat`
Declare this class to configure the AFD filter that sends the heartbeat metrics.
##### Parameters
* `services`: *Required*. List of services for which the AFD filter should
emit the heartbeat metrics. Valid options: an array of strings.
* `timeout`: *Optional*. Number of seconds after which the AFD filter will
consider a service to be down if no metric has been received during this
period. Valid options: an integer. Default: `30`.
#### Class: `lma_collector::smtp_alert`
Declare this class to send by email the alert notifications based on the GSE

View File

@ -1,61 +0,0 @@
-- Copyright 2015 Mirantis, Inc.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
require 'string'
local utils = require 'lma_utils'
local afd = require 'afd'
local consts = require 'gse_constants'
local timeout = read_config("timeout") or 30
local timeout_ns = timeout * 1e9 -- in ns
local hostname
services = {}
function process_message ()
local ts = read_message('Timestamp')
local service = string.match(read_message("Fields[name]"), '^[^_]+')
if not hostname then
hostname = read_message("Fields[hostname]")
end
local entry = services[service]
if entry then
entry.last_seen = ts
else
services[service] = {last_seen = ts}
end
return 0
end
function timer_event(ns)
for service, data in pairs(services) do
local status = consts.OKAY
if ns - data.last_seen > timeout_ns then
status = consts.UNKW
afd.add_to_alarms(status,
'last', -- function
'*', -- metric
{{name='service', value=service}}, -- fields
{}, -- tags
'==',
0, -- value
0, -- threshold
timeout, -- window
nil, -- period
'No metric received from the service')
end
afd.inject_afd_service_metric(service, status, hostname, timeout, 'heartbeat')
end
end

View File

@ -1,38 +0,0 @@
# Copyright 2015 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
class lma_collector::metrics::service_heartbeat (
$services,
$timeout = $lma_collector::params::heartbeat_timeout,
) inherits lma_collector::params {
include lma_collector::service::metric
$lua_modules_dir = $lma_collector::params::lua_modules_dir
validate_array($services)
if (size($services) > 0) {
heka::filter::sandbox { 'service_heartbeat':
config_dir => $lma_collector::params::metric_config_dir,
filename => "${lma_collector::params::plugins_dir}/filters/service_heartbeat.lua",
message_matcher => join(['Fields[name] =~ /^', join(sort($services), '|'), '/'], ''),
ticker_interval => 10,
config => {
timeout => $timeout,
},
module_directory => $lua_modules_dir,
notify => Class['lma_collector::service::metric'],
}
}
}