Merge remote-tracking branch 'upstream/stacklight' into merge-branch

Change-Id: I02375439dce9dca8098c64768d8483cb9e47820d
This commit is contained in:
Simon Pasquier 2016-11-25 16:18:46 +01:00
commit 96a8ed3c84
11 changed files with 8782 additions and 85 deletions

View File

@ -11,3 +11,5 @@ parameters:
enabled: true
config:
enabled: true
grafana:
enabled: true

View File

@ -0,0 +1,11 @@
Import "hypervisor_stats"
<Module "hypervisor_stats">
KeystoneUrl "{{ plugin.url }}"
Tenant "{{ plugin.project }}"
Username "{{ plugin.user }}"
Password "{{ plugin.password }}"
Timeout "5"
CpuAllocationRatio "{{ plugin.cpu_allocation_ratio }}"
</Module>

View File

@ -1,10 +1,9 @@
LoadPlugin libvirt
<LoadPlugin libvirt>
Globals false
</LoadPlugin>
<Plugin "libvirt">
Connection "qemu:///system"
RefreshInterval 60
# Domain "nova"
# BlockDevice "name:device"
# InterfaceDevice "name:interface"
# IgnoreSelected false
HostnameFormat "uuid"
</Plugin>

View File

@ -0,0 +1,5 @@
Import "collectd_libvirt_check"
<Module "collectd_libvirt_check">
Uri "qemu:///system"
</Module>

View File

@ -1,17 +0,0 @@
<LoadPlugin "python">
Globals true
</LoadPlugin>
<Plugin "python">
ModulePath "/usr/lib/collectd-plugins/openstack"
Import "openstack_nova"
<Module "openstack_nova">
Username "admin"
Password "123456"
TenantName "openstack"
AuthURL "https://api.example.com:5000/v2.0"
Verbose "False"
</Module>
</Plugin>

View File

@ -0,0 +1,10 @@
Import "openstack_nova"
<Module "openstack_nova">
KeystoneUrl "{{ plugin.url }}"
Tenant "{{ plugin.project }}"
Username "{{ plugin.user }}"
Password "{{ plugin.password }}"
Timeout "20"
MaxRetries "2"
</Module>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +1,36 @@
{%- if pillar.nova.compute is defined %}
plugin:
local_plugin:
nova_compute_libvirt:
plugin: interface
interval: 60
plugin: libvirt
template: nova/files/collectd_libvirt.conf
nova_libvirt_check:
plugin: python
template: nova/files/collectd_libvirt_check.conf
{%- endif %}
{%- if pillar.nova.controller is defined %}
plugin: {}
# nova_controller_nova:
# plugin: nova
# interval: 60
# template: nova/files/collectd_nova.conf
{%- endif %}
{% from "nova/map.jinja" import controller with context %}
{%- if controller.get('enabled', False) %}
local_plugin:
collectd_check_local_endpoint:
endpoint:
nova-api:
expected_code: 200
url: "http://{{ controller.bind.private_address|replace('0.0.0.0', '127.0.0.1') }}:8774/"
remote_plugin:
nova_resources:
plugin: python
template: nova/files/collectd_openstack_nova.conf
url: "http://{{ controller.identity.host }}:{{ controller.identity.port }}/v{% if controller.identity.get('api_version', 2)|int == 2 %}2.0{% else %}3{% endif %}"
project: {{ controller.identity.tenant }}
user: {{ controller.identity.user }}
password: {{ controller.identity.password }}
nova_hypervisor_stats:
plugin: python
template: nova/files/collectd_hypervisor_stats.conf
url: "http://{{ controller.identity.host }}:{{ controller.identity.port }}/v{% if controller.identity.get('api_version', 2)|int == 2 %}2.0{% else %}3{% endif %}"
project: {{ controller.identity.tenant }}
user: {{ controller.identity.user }}
password: {{ controller.identity.password }}
cpu_allocation_ratio: {{ controller.cpu_allocation_ratio }}
{%- endif %}
{%- endif %}

7
nova/meta/grafana.yml Normal file
View File

@ -0,0 +1,7 @@
dashboard:
nova:
format: json
template: nova/files/grafana_dashboards/nova_influxdb.json
hypervisor:
format: json
template: nova/files/grafana_dashboards/hypervisor_influxdb.json

View File

@ -1,54 +1,300 @@
input:
{%- if pillar.nova.compute is defined %}
nova_compute:
engine: logstreamer
log_directory: /var/log/nova
file_match: nova-compute\.log\.?(?P<Index>\d+)?(.gz)?
priority: ["^Index"]
decoder: multidecoder_openstack
libvirtd:
engine: logstreamer
log_directory: /var/log/libvirt
file_match: libvirtd\.log\.?(?P<Index>\d+)?(.gz)?
priority: ['^Index']
decoder: multidecoder_libvirtd
{%- endif %}
{%- if pillar.nova.controller is defined %}
nova_compute:
engine: logstreamer
log_directory: /var/log/nova
file_match: nova-api\.log\.?(?P<Index>\d+)?(.gz)?
priority: ["^Index"]
decoder: multidecoder_openstack
{%- endif %}
decoder:
openstack:
engine: multidecoder
subs: [ 'Payloadregex_OpenStackLog','Payloadregex_OpenStackLogTrace' ]
cascade_strategy: first-wins
log_sub_errors: "false"
OpenStackLog:
engine: payloadregex
match_regex: ^(?P<timestamp>\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}.\d{3})\s(?P<pid>\d+)\s(?P<severity>\w+)\s(?P<programname>\D+)\s\[(?P<id>.+)\]\s(?P<payload>.*)
timestamp_layout: "2015-10-06 11:34:37.243"
OpenStackLogTrace:
engine: payloadregex
match_regex: ^(?P<timestamp>\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}.\d{3})\s(?P<pid>\d+)\s(?P<severity>TRACE)\s(?P<programname>.\S+)\s(?P<payload>.*)
timestamp_layout: "2015-10-06 11:34:37.243"
openstacksandbox:
engine: sandbox
file_name: "/etc/heka/plugins.d/decoders/openstack.lua"
module_directory: "/etc/heka/plugins.d/common;/usr/share/heka/lua_modules"
libvirtd:
engine: multidecoder
subs: [ 'Payloadregex_libvirtcommon', 'Payloadregex_libvirtaudit' ]
cascade_strategy: first-wins
log_sub_errors: "false"
libvirtcommon:
engine: payloadregex
match_regex: ^(?P<timestamp>\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}.\d{3}\+\d{4}):\s+(?P<pid>\d+):\s+(?P<severity>\w+)\s+:\s+(?P<qemutrace>\w+:\d+)\s+:\s+(?P<payload>.*)
timestamp_layout: "2015-09-01 15:56:14.675+0000"
libvirtaudit:
engine: payloadregex
match_regex: ^(?P<timestamp>\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}.\d{3}\+\d{4}):\s+(?P<pid>\d+):\s+(?P<severity>\w+)\s+:\s+(?P<message>.+):\s(?P<payload>.+)
timestamp_layout: "2015-09-01 15:56:14.675+0000"
log_collector:
decoder:
nova:
engine: sandbox
module_file: /usr/share/lma_collector/decoders/openstack_log.lua
module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules
adjust_timezone: true
{%- if pillar.nova.compute is defined %}
libvirt:
engine: sandbox
module_file: /usr/share/lma_collector/decoders/libvirt_log.lua
module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules
{%- endif %}
splitter:
nova:
engine: token
delimiter: '\n'
input:
nova_log:
engine: logstreamer
log_directory: "/var/log"
file_match: 'nova/(?P<Service>.+)\.log\.?(?P<Seq>\d*)$'
differentiator: ['nova', '_', 'Service']
priority: ["^Seq"]
decoder: "nova_decoder"
splitter: "nova_splitter"
{%- if pillar.nova.compute is defined %}
libvirt_log:
engine: logstreamer
log_directory: "/var/log"
file_match: 'libvirt/libvirtd.log'
differentiator: ['libvirt']
decoder: "libvirt_decoder"
splitter: "TokenSplitter"
{%- endif %}
metric_collector:
trigger:
nova_logs_error:
description: 'Too many errors have been detected in Nova logs'
severity: warning
no_data_policy: okay
rules:
- metric: log_messages
field:
service: nova
level: error
relational_operator: '>'
threshold: 0.1
window: 70
periods: 0
function: max
{%- if pillar.nova.controller is defined %}
nova_api_local_endpoint:
description: 'Nova API is locally down'
severity: down
rules:
- metric: openstack_check_local_api
field:
service: nova-api
relational_operator: '=='
threshold: 0
window: 60
periods: 0
function: last
{%- endif %}
alarm:
{%- if pillar.nova.compute is defined %}
nova_logs_compute:
alerting: enabled
triggers:
- nova_logs_error
dimension:
service: nova-logs-compute
{%- endif %}
{%- if pillar.nova.controller is defined %}
nova_logs:
alerting: enabled
triggers:
- nova_logs_error
dimension:
service: nova-logs
nova_api_endpoint:
alerting: enabled
triggers:
- nova_api_local_endpoint
dimension:
service: nova-api-endpoint
{%- endif %}
remote_collector:
trigger:
{%- if pillar.nova.controller is defined %}
nova_api_check_failed:
description: 'Endpoint check for nova-api is failed'
severity: down
rules:
- metric: openstack_check_api
field:
service: nova-api
relational_operator: '=='
threshold: 0
window: 60
periods: 0
function: last
{%- for nova_service in ('cert', 'consoleauth', 'compute', 'conductor', 'scheduler') %}
nova_{{ nova_service }}_one_down:
description: 'At least one Nova {{ nova_service }} is down'
severity: warning
rules:
- metric: openstack_nova_services
field:
service: {{ nova_service }}
state: down
relational_operator: '>'
threshold: 0
window: 60
periods: 0
function: last
nova_{{ nova_service }}_majority_down:
description: 'Majority of Nova {{ nova_service }}s are down'
severity: critical
rules:
- metric: openstack_nova_services_percent
field:
service: {{ nova_service }}
state: up
relational_operator: '<='
threshold: 50
window: 60
periods: 0
function: last
nova_{{ nova_service }}_all_down:
description: 'All Nova {{ nova_service }}s are down'
severity: down
rules:
- metric: openstack_nova_services
field:
service: {{ nova_service }}
state: up
relational_operator: '=='
threshold: 0
window: 60
periods: 0
function: last
{%- endfor %}
nova_total_free_vcpu_warning:
description: 'There is no VCPU available for new instances'
severity: warning
rules:
- metric: openstack_nova_total_free_vcpus
relational_operator: '=='
threshold: 10
window: 60
periods: 0
function: max
nova_total_free_memory_warning:
description: 'There is no memory available for new instances'
severity: warning
rules:
- metric: openstack_nova_total_free_ram
relational_operator: '=='
threshold: 0
window: 60
periods: 0
function: max
{%- endif %}
alarm:
{%- if pillar.nova.controller is defined %}
nova_api_check:
alerting: true
triggers:
- nova_api_check_failed
dimension:
service: nova-api-check
{%- for nova_service in ('cert', 'consoleauth', 'compute', 'conductor', 'scheduler') %}
nova_{{ nova_service }}:
alerting: true
triggers:
- nova_{{ nova_service }}_all_down
- nova_{{ nova_service }}_majority_down
- nova_{{ nova_service }}_one_down
dimension:
service: nova-{{ nova_service }}
{%- endfor %}
nova_free_vcpu:
alerting: enabled
triggers:
- nova_total_free_vcpu_warning
dimension:
service: nova-free-vcpu
nova_free_memory:
alerting: enabled
triggers:
- nova_total_free_memory_warning
dimension:
service: nova-free-memory
{%- endif %}
aggregator:
alarm_cluster:
nova_logs_compute:
policy: highest_severity
group_by: hostname
alerting: enabled
match:
service: nova-logs-compute
members:
- nova_logs_compute
dimension:
service: nova-compute
nagios_host: 01-service-clusters
nova_logs:
policy: highest_severity
group_by: hostname
alerting: enabled
match:
service: nova-logs
members:
- nova_logs
dimension:
service: nova-control
nagios_host: 01-service-clusters
nova_api_endpoint:
policy: availability_of_members
group_by: hostname
alerting: enabled
match:
service: nova-api-endpoint
members:
- nova_api_endpoint
dimension:
service: nova-control
nagios_host: 01-service-clusters
nova_api_check:
policy: highest_severity
alerting: enabled
match:
service: nova-api-check
members:
- nova_api_check
dimension:
service: nova-control
nagios_host: 01-service-clusters
{%- for nova_service in ('cert', 'consoleauth', 'compute', 'conductor', 'scheduler') %}
nova_{{ nova_service }}:
policy: highest_severity
alerting: enabled
match:
service: nova-{{ nova_service }}
members:
- nova_{{ nova_service }}
dimension:
service: nova-control
nagios_host: 01-service-clusters
{%- endfor %}
nova_free_vcpu:
policy: highest_severity
alerting: enabled
match:
service: nova-free-vcpu
members:
- nova_free_vcpu
dimension:
service: nova-compute
nagios_host: 01-service-clusters
nova_free_memory:
policy: highest_severity
alerting: enabled
match:
service: nova-free-memory
members:
- nova_free_memory
dimension:
service: nova-compute
nagios_host: 01-service-clusters
nova_control:
policy: highest_severity
alerting: enabled_with_notification
match:
service: nova-control
members:
- nova_logs
- nova_api_endpoint
- nova_api_check
{%- for nova_service in ('cert', 'consoleauth', 'compute', 'conductor', 'scheduler') %}
- nova_{{ nova_service }}
{%- endfor %}
dimension:
cluster_name: nova-control
nagios_host: 00-top-clusters
nova_compute:
policy: highest_severity
alerting: enabled_with_notification
match:
service: nova-compute
members:
- nova_logs_compute
- nova_free_vcpu
- nova_free_memory
dimension:
cluster_name: nova-compute
nagios_host: 00-top-clusters