[prometheus] Initial implementation of prometheus-alertmanager

This patch extends the prometheus role for being able
to deploy the prometheus-alertmanager[0] container.

The variable enable_prometheus_alertmanager
decides if the container should be deployed and enabled.

If enabled, the following configuration and actions are performed:

- The alerting section on the prometheus-server configuration
is added pointing the prometheus-alertmanager host group as targets.

- HAProxy is configured to load-balance over the prometheus-alertmanager
host group. (external/internal).

Please note that a default (dummy) configuration is provided, that
allows the service to start, the operator should extend it via a node custom config

[0] https://github.com/openstack/kolla/tree/master/docker/prometheus/prometheus-alertmanager

Change-Id: I3a13342c67744a278cc8d52900a913c3ccc452ae
Closes-Bug: 1774725
Signed-off-by: Jorge Niedbalski <jorge.niedbalski@linaro.org>
This commit is contained in:
Jorge Niedbalski 2018-06-01 17:03:58 -04:00
parent 2f37a2b4af
commit 1596475db6
14 changed files with 160 additions and 0 deletions

View File

@ -278,6 +278,9 @@ prometheus_memcached_exporter_port: "9150"
# Default cadvisor port of 8080 already in use
prometheus_cadvisor_port: "18080"
# Prometheus alertmanager ports
prometheus_alertmanager_port: "9093"
prometheus_alertmanager_cluster_port: "9094"
qdrouterd_port: "31459"
rabbitmq_port: "5672"
@ -834,3 +837,5 @@ enable_prometheus_mysqld_exporter: "{{ enable_mariadb | bool }}"
enable_prometheus_node_exporter: "{{ enable_prometheus | bool }}"
enable_prometheus_memcached_exporter: "{{ enable_memcached | bool }}"
enable_prometheus_cadvisor: "{{ enable_prometheus | bool }}"
enable_prometheus_alertmanager: "{{ enable_prometheus | bool }}"
prometheus_alertmanager_user: "admin"

View File

@ -688,3 +688,6 @@ control
compute
network
storage
[prometheus-alertmanager:children]
monitoring

View File

@ -707,3 +707,6 @@ control
compute
network
storage
[prometheus-alertmanager:children]
monitoring

View File

@ -1397,3 +1397,35 @@ listen opendaylight_websocket
{% endfor %}
{% endif %}
{% if enable_prometheus_alertmanager | bool %}
userlist prometheus-alertmanager-user
user {{ prometheus_alertmanager_user }} insecure-password {{ prometheus_alertmanager_password }}
listen prometheus_alertmanager
bind {{ kolla_internal_vip_address }}:{{ prometheus_alertmanager_port }}
acl auth_acl http_auth(prometheus-alertmanager-user)
http-request auth realm basicauth unless auth_acl
{% for http_option in haproxy_listen_http_extra %}
{{ http_option }}
{% endfor %}
{% for host in groups['prometheus-alertmanager'] %}
server {{ hostvars[host]['ansible_hostname'] }} {{ hostvars[host]['ansible_' + hostvars[host]['api_interface']]['ipv4']['address'] }}:{{ prometheus_alertmanager_port }} check inter 2000 rise 2 fall 5
{% endfor %}
{% if haproxy_enable_external_vip | bool %}
listen prometheus_alertmanager_external
bind {{ kolla_external_vip_address }}:{{ prometheus_alertmanager_port }} {{ tls_bind_info }}
http-request del-header X-Forwarded-Proto
http-request set-header X-Forwarded-Proto https if { ssl_fc }
acl auth_acl http_auth(prometheus-alertmanager-user)
http-request auth realm basicauth unless auth_acl
{% for http_option in haproxy_listen_http_extra %}
{{ http_option }}
{% endfor %}
{% for host in groups['prometheus-alertmanager'] %}
server {{ hostvars[host]['ansible_hostname'] }} {{ hostvars[host]['ansible_' + hostvars[host]['api_interface']]['ipv4']['address'] }}:{{ prometheus_alertmanager_port }} check inter 2000 rise 2 fall 5
{% endfor %}
{% endif %}
{% endif %}

View File

@ -66,6 +66,16 @@ prometheus_services:
- "/sys:/sys:ro"
- "/varlib/docker/:/var/lib/docker:ro"
- "/dev/disk/:/dev/disk:ro"
prometheus-alertmanager:
container_name: "prometheus_alertmanager"
group: "prometheus-alertmanager"
enabled: "{{ enable_prometheus_alertmanager | bool }}"
image: "{{ prometheus_alertmanager_image_full }}"
volumes:
- "{{ node_config_directory }}/prometheus-alertmanager/:{{ container_config_directory }}/:ro"
- "/etc/localtime:/etc/localtime:ro"
- "kolla_logs:/var/log/kolla/"
- "prometheus:/var/lib/prometheus"
####################
# Database
@ -101,3 +111,7 @@ prometheus_memcached_exporter_image_full: "{{ prometheus_memcached_exporter_imag
prometheus_cadvisor_image: "{{ docker_registry ~ '/' if docker_registry else '' }}{{ docker_namespace }}/{{ kolla_base_distro }}-{{ kolla_install_type }}-prometheus-cadvisor"
prometheus_cadvisor_tag: "{{ openstack_release }}"
prometheus_cadvisor_image_full: "{{ prometheus_cadvisor_image }}:{{ prometheus_cadvisor_tag }}"
prometheus_alertmanager_image: "{{ docker_registry ~ '/' if docker_registry else '' }}{{ docker_namespace }}/{{ kolla_base_distro }}-{{ kolla_install_type }}-prometheus-alertmanager"
prometheus_alertmanager_tag: "{{ openstack_release }}"
prometheus_alertmanager_image_full: "{{ prometheus_alertmanager_image }}:{{ prometheus_alertmanager_tag }}"

View File

@ -120,3 +120,22 @@
- service.enabled | bool
- config_json.changed | bool
or prometheus_container.changed | bool
- name: Restart prometheus-alertmanager container
vars:
service_name: "prometheus-alertmanager"
service: "{{ prometheus_services[service_name] }}"
config_json: "{{ prometheus_config_jsons.results|selectattr('item.key', 'equalto', service_name)|first }}"
prometheus_container: "{{ check_prometheus_containers.results|selectattr('item.key', 'equalto', service_name)|first }}"
kolla_docker:
action: "recreate_or_restart_container"
common_options: "{{ docker_common_options }}"
name: "{{ service.container_name }}"
image: "{{ service.image }}"
volumes: "{{ service.volumes }}"
when:
- kolla_action != "config"
- inventory_hostname in groups[service.group]
- service.enabled | bool
- config_json.changed | bool
or prometheus_container.changed | bool

View File

@ -41,6 +41,22 @@
notify:
- Restart prometheus-server container
- name: Copying over prometheus alertmanager config file
vars:
service: "{{ prometheus_services['prometheus-alertmanager']}}"
merge_yaml:
sources:
- "{{ node_custom_config }}/prometheus/{{ inventory_hostname }}/prometheus-alertmanager.yml"
- "{{ node_custom_config }}/prometheus/prometheus-alertmanager.yml"
- "{{ role_path }}/templates/prometheus-alertmanager.yml.j2"
dest: "{{ node_config_directory }}/prometheus-alertmanager/prometheus-alertmanager.yml"
register: prometheus_confs
when:
- inventory_hostname in groups[service.group]
- service.enabled | bool
notify:
- Restart prometheus-alertmanager container
- name: Copying over my.cnf for mysqld_exporter
vars:
service: "{{ prometheus_services['prometheus-mysqld-exporter']}}"

View File

@ -7,6 +7,7 @@
- prometheus_haproxy_exporter
- prometheus_mysqld_exporter
- prometheus_cadvisor
- prometheus_alertmanager
register: container_facts
- name: Checking free port for Prometheus server
@ -79,3 +80,18 @@
- container_facts['prometheus_cadvisor'] is not defined
- inventory_hostname in groups['prometheus-cadvisor']
- enable_prometheus_cadvisor | bool
- name: Checking free ports for Prometheus Alertmanager
wait_for:
host: "{{ hostvars[inventory_hostname]['ansible_' + api_interface]['ipv4']['address'] }}"
port: "{{ item }}"
connect_timeout: 1
timeout: 1
state: stopped
when:
- container_facts['prometheus_alertmanager'] is not defined
- inventory_hostname in groups['prometheus-alertmanager']
- enable_prometheus_alertmanager | bool
with_items:
- "{{ prometheus_alertmanager_port }}"
- "{{ prometheus_alertmanager_cluster_port }}"

View File

@ -0,0 +1,23 @@
{
"command": "/opt/prometheus_alertmanager/alertmanager --config.file=/etc/prometheus/alertmanager.yml --web.listen-address={{ api_interface_address }}:{{ prometheus_alertmanager_port }}{% if groups["prometheus-alertmanager"] | length > 1 %} --mesh.listen-address={{ api_interface_address }}:{{ prometheus_alertmanager_cluster_port }} {% for host in groups["prometheus-alertmanager"] %} --mesh.peer={{ hostvars[host]['ansible_' + hostvars[host]['api_interface']]['ipv4']['address'] }}:{{ hostvars[host]['prometheus_alertmanager_cluster_port'] }}{% endfor %}{% endif %} --storage.path /var/lib/prometheus",
"config_files": [
{
"source": "{{ container_config_directory }}/prometheus-alertmanager.yml",
"dest": "/etc/prometheus/alertmanager.yml",
"owner": "prometheus",
"perm": "0600"
}
],
"permissions": [
{
"path": "/data",
"owner": "prometheus:kolla",
"recurse": true
},
{
"path": "/var/log/kolla/prometheus",
"owner": "prometheus:kolla",
"recurse": true
}
]
}

View File

@ -0,0 +1,11 @@
global:
resolve_timeout: 5m
smtp_require_tls: true
route:
receiver: default-receiver
group_wait: 10s
group_interval: 5m
repeat_interval: 3h
receivers:
- name: default-receiver
templates: []

View File

@ -57,3 +57,13 @@ scrape_configs:
- '{{ hostvars[host]['ansible_' + hostvars[host]['api_interface']]['ipv4']['address'] }}:{{ hostvars[host]['prometheus_cadvisor_port'] }}'
{% endfor %}
{% endif %}
{% if enable_prometheus_alertmanager | bool %}
alerting:
alertmanagers:
- static_configs:
- targets:
{% for host in groups["prometheus-alertmanager"] %}
- '{{ hostvars[host]['ansible_' + hostvars[host]['api_interface']]['ipv4']['address'] }}:{{ hostvars[host]['prometheus_alertmanager_port'] }}'
{% endfor %}
{% endif %}

View File

@ -467,3 +467,5 @@ tempest_floating_network_name:
#enable_prometheus_mysqld_exporter: "{{ enable_mariadb | bool }}"
#enable_prometheus_node_exporter: "{{ enable_prometheus | bool }}"
#enable_prometheus_cadvisor: "{{ enable_prometheus | bool }}"
#enable_prometheus_memcached: "{{ enable_prometheus | bool }}"
#enable_prometheus_alertmanager: "{{ enable_prometheus | bool }}"

View File

@ -258,3 +258,4 @@ xenserver_password:
# Prometheus options
####################
prometheus_mysql_exporter_database_password:
prometheus_alertmanager_password:

View File

@ -0,0 +1,5 @@
---
features:
- |
Deploy prometheus-alertmanager (https://prometheus.io/docs/alerting/alertmanager/)
as part of the prometheus monitoring stack.