From 7c94deae4337b4981e2d3f3f8dda8f7c508778b1 Mon Sep 17 00:00:00 2001 From: "Xiaoguang(William) Zhang" Date: Fri, 7 Aug 2020 11:44:54 -0400 Subject: [PATCH] Update alertmanager include snmp_notifier function Change-Id: I5aedbdcdbba397a9fddde19a0898cb91de08553a --- .../templates/bin/_alertmanager.sh.tpl | 4 +- .../templates/configmap-etc.yaml | 3 +- .../snmp-notifier/snmp-deployment.yaml | 75 +++++++++++++++++ .../templates/snmp-notifier/snmp-service.yaml | 34 ++++++++ .../templates/statefulset.yaml | 8 +- prometheus-alertmanager/values.yaml | 83 ++++++++++++++++--- prometheus/values.yaml | 10 +-- .../deployment/multinode/060-alertmanager.sh | 3 +- 8 files changed, 192 insertions(+), 28 deletions(-) create mode 100644 prometheus-alertmanager/templates/snmp-notifier/snmp-deployment.yaml create mode 100644 prometheus-alertmanager/templates/snmp-notifier/snmp-service.yaml diff --git a/prometheus-alertmanager/templates/bin/_alertmanager.sh.tpl b/prometheus-alertmanager/templates/bin/_alertmanager.sh.tpl index a9b4bf398..b211fb0dd 100644 --- a/prometheus-alertmanager/templates/bin/_alertmanager.sh.tpl +++ b/prometheus-alertmanager/templates/bin/_alertmanager.sh.tpl @@ -20,8 +20,8 @@ COMMAND="${@:-start}" function start () { exec /bin/alertmanager \ --config.file=/etc/alertmanager/config.yml \ - --storage.path={{ .Values.conf.command_flags.storage.path }} \ - --cluster.listen-address={{ .Values.conf.command_flags.cluster.listen_address }} \ + --storage.path={{ .Values.conf.command_flags.alertmanager.storage.path }} \ + --cluster.listen-address={{ .Values.conf.command_flags.alertmanager.cluster.listen_address }} \ $(generate_peers) } diff --git a/prometheus-alertmanager/templates/configmap-etc.yaml b/prometheus-alertmanager/templates/configmap-etc.yaml index 1f3c02fc7..e9ff07ab8 100644 --- a/prometheus-alertmanager/templates/configmap-etc.yaml +++ b/prometheus-alertmanager/templates/configmap-etc.yaml @@ -20,8 +20,7 @@ kind: ConfigMap metadata: name: alertmanager-etc data: - config.yml: | -{{ toYaml .Values.conf.alertmanager | indent 4 }} +{{- include "helm-toolkit.snippets.values_template_renderer" (dict "envAll" $envAll "template" .Values.conf.alertmanager "key" "config.yml") | indent 2 }} alert-templates.tmpl: | {{- if .Values.conf.alert_templates }} {{ .Values.conf.alert_templates | indent 4 }} diff --git a/prometheus-alertmanager/templates/snmp-notifier/snmp-deployment.yaml b/prometheus-alertmanager/templates/snmp-notifier/snmp-deployment.yaml new file mode 100644 index 000000000..708260337 --- /dev/null +++ b/prometheus-alertmanager/templates/snmp-notifier/snmp-deployment.yaml @@ -0,0 +1,75 @@ +{{/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.snmpnotifier.deployment }} +{{- $envAll := . }} + +{{- $mounts_snmpnotifier := .Values.pod.mounts.snmpnotifier.snmpnotifier }} +{{- $mounts_snmpnotifier_init := .Values.pod.mounts.snmpnotifier.init_container }} + +{{- $serviceAccountName := "snmpnotifier" }} +{{ tuple $envAll "snmpnotifier" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: snmpnotifier + annotations: + {{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" }} + labels: +{{ tuple $envAll "snmpnotifier" "server" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} +spec: + podManagementPolicy: "Parallel" + replicas: {{ .Values.pod.replicas.snmpnotifier }} + selector: + matchLabels: +{{ tuple $envAll "snmpnotifier" "server" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 6 }} + template: + metadata: + labels: +{{ tuple $envAll "snmpnotifier" "server" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + annotations: +{{ dict "envAll" $envAll "podName" "snmpnotifier" "containerNames" (list "snmpnotifier" "init") | include "helm-toolkit.snippets.kubernetes_mandatory_access_control_annotation" | indent 8 }} + spec: +{{ dict "envAll" $envAll "application" "server" | include "helm-toolkit.snippets.kubernetes_pod_security_context" | indent 6 }} + serviceAccountName: {{ $serviceAccountName }} + affinity: +{{ tuple $envAll "snmpnotifier" "server" | include "helm-toolkit.snippets.kubernetes_pod_anti_affinity" | indent 8 }} + nodeSelector: + {{ .Values.labels.snmpnotifier.node_selector_key }}: {{ .Values.labels.snmpnotifier.node_selector_value | quote }} + terminationGracePeriodSeconds: {{ .Values.pod.lifecycle.termination_grace_period.snmpnotifier.timeout | default "30" }} + containers: + - name: snmpnotifier +{{ tuple $envAll "snmpnotifier" | include "helm-toolkit.snippets.image" | indent 10 }} +{{ tuple $envAll $envAll.Values.pod.resources.snmpnotifier | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} +{{ dict "envAll" $envAll "application" "server" "container" "snmpnotifier" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }} + args: + - --alert.severity-label={{ .Values.conf.command_flags.snmpnotifier.alert_severity_label}} + - --alert.default-severity={{ .Values.conf.command_flags.snmpnotifier.alert_default_severity}} + - --snmp.version={{ .Values.conf.command_flags.snmpnotifier.snmp_version}} + - --snmp.destination={{ .Values.conf.command_flags.snmpnotifier.snmp_desination}} + - --snmp.trap-default-oid={{ .Values.conf.command_flags.snmpnotifier.snmp_trap_default_oid}} + - --snmp.trap-description-template={{ .Values.conf.command_flags.snmpnotifier.snmp_trap_description_template}} + - --snmp.community={{ .Values.conf.command_flags.snmpnotifier.snmp_community}} + - --log.level={{ .Values.conf.command_flags.snmpnotifier.log_level}} + ports: + - name: snmp-api + containerPort: {{ tuple "snmpnotifier" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} + readinessProbe: + httpGet: + path: /health + port: {{ tuple "snmpnotifier" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} + initialDelaySeconds: 30 + timeoutSeconds: 30 +{{- end }} diff --git a/prometheus-alertmanager/templates/snmp-notifier/snmp-service.yaml b/prometheus-alertmanager/templates/snmp-notifier/snmp-service.yaml new file mode 100644 index 000000000..e07da5f5a --- /dev/null +++ b/prometheus-alertmanager/templates/snmp-notifier/snmp-service.yaml @@ -0,0 +1,34 @@ +{{/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.snmpnotifier.service }} +{{- $envAll := . }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ tuple "snmpnotifier" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} +spec: + ports: + - name: snmpnotifier-api + {{ if .Values.network.snmpnotifier.node_port.enabled }} + nodePort: {{ .Values.network.snmpnotifier.node_port.port }} + {{ end }} + port: {{ tuple "snmpnotifier" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} + selector: +{{ tuple $envAll "snmpnotifier" "server" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} + {{ if .Values.network.snmpnotifier.node_port.enabled }} + type: NodePort + {{ end }} +{{- end }} diff --git a/prometheus-alertmanager/templates/statefulset.yaml b/prometheus-alertmanager/templates/statefulset.yaml index dfafc1715..ee377db79 100644 --- a/prometheus-alertmanager/templates/statefulset.yaml +++ b/prometheus-alertmanager/templates/statefulset.yaml @@ -132,7 +132,7 @@ spec: name: alertmanager-bin defaultMode: 0555 {{ if $mounts_alertmanager.volumes }}{{ toYaml $mounts_alertmanager.volumes | indent 8 }}{{ end }} -{{- if not .Values.storage.enabled }} +{{- if not .Values.storage.alertmanager.enabled }} - name: alertmanager-data emptyDir: {} {{- else }} @@ -140,10 +140,10 @@ spec: - metadata: name: alertmanager-data spec: - accessModes: {{ .Values.storage.pvc.access_mode }} + accessModes: {{ .Values.storage.alertmanager.pvc.access_mode }} resources: requests: - storage: {{ .Values.storage.requests.storage }} - storageClassName: {{ .Values.storage.storage_class }} + storage: {{ .Values.storage.alertmanager.requests.storage }} + storageClassName: {{ .Values.storage.alertmanager.storage_class }} {{- end }} {{- end }} diff --git a/prometheus-alertmanager/values.yaml b/prometheus-alertmanager/values.yaml index 389dae168..84eba3c3a 100644 --- a/prometheus-alertmanager/values.yaml +++ b/prometheus-alertmanager/values.yaml @@ -19,6 +19,7 @@ images: tags: prometheus-alertmanager: docker.io/prom/alertmanager:v0.20.0 + snmpnotifier: docker.io/maxwo/snmp-notifier:v1.0.0 dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0 image_repo_sync: docker.io/docker:17.07.0 pull_policy: IfNotPresent @@ -32,6 +33,9 @@ labels: alertmanager: node_selector_key: openstack-control-plane node_selector_value: enabled + snmpnotifier: + node_selector_key: openstack-control-plane + node_selector_value: enabled job: node_selector_key: openstack-control-plane node_selector_value: enabled @@ -60,15 +64,23 @@ pod: alertmanager: alertmanager: init_container: null + snmpnotifier: + snmpnotifier: + init_container: null replicas: alertmanager: 1 + snmpnotifier: 1 lifecycle: upgrades: + deployment: + pod_replacement_strategy: RollingUpdate statefulsets: pod_replacement_strategy: RollingUpdate termination_grace_period: alertmanager: timeout: 30 + snmpnotifier: + timeout: 30 resources: enabled: false alertmanager: @@ -86,6 +98,13 @@ pod: limits: memory: "1024Mi" cpu: "2000m" + snmpnotifier: + limits: + memory: "1024Mi" + cpu: "2000m" + requests: + memory: "128Mi" + cpu: "500m" endpoints: cluster_domain_suffix: cluster.local @@ -127,6 +146,20 @@ endpoints: public: 80 mesh: default: 9094 + snmpnotifier: + name: snmpnotifier + namespace: null + hosts: + default: snmp-engine + host_fqdn_override: + default: null + path: + default: /alerts + scheme: + default: 'http' + port: + api: + default: 9464 dependencies: dynamic: @@ -157,6 +190,10 @@ network: node_port: enabled: false port: 30903 + snmpnotifier: + node_port: + enabled: false + port: 30464 secrets: tls: @@ -165,12 +202,13 @@ secrets: public: alerts-tls-public storage: - enabled: true - pvc: - access_mode: ["ReadWriteOnce"] - requests: - storage: 5Gi - storage_class: general + alertmanager: + enabled: true + pvc: + access_mode: ["ReadWriteOnce"] + requests: + storage: 5Gi + storage_class: general manifests: clusterrolebinding: true @@ -184,6 +222,9 @@ manifests: service_discovery: true service_ingress: true statefulset: true + snmpnotifier: + service: true + deployment: true network_policy: alertmanager: @@ -194,11 +235,21 @@ network_policy: conf: command_flags: - storage: - path: /var/lib/alertmanager/data - cluster: - listen_address: "0.0.0.0:9094" - alertmanager: + alertmanager: + storage: + path: /var/lib/alertmanager/data + cluster: + listen_address: "0.0.0.0:9094" + snmpnotifier: + alert_severity_label: severity + alert_default_severity: crititcal + snmp_version: V2c + snmp_desination: 192.168.89.128:162 + snmp_trap_default_oid: 1.3.6.1.4.1.98789.0.1 + snmp_trap_description_template: /etc/snmp_notifier/description-template.tpl + snmp_community: public + log_level: debug + alertmanager: | global: # The smarthost and SMTP sender used for mail notifications. smtp_smarthost: 'localhost:25' @@ -234,7 +285,8 @@ conf: # resend them. repeat_interval: 3h # A default receiver - receiver: team-X-mails + # receiver: team-X-mails + receiver: snmp_notifier # All the above attributes are inherited by all child routes and can # overwritten on each. # The child route trees. @@ -291,6 +343,11 @@ conf: - cluster - service receivers: + - name: 'snmp_notifier' + webhook_configs: + - send_resolved: true + #url: http://snmp-engine.osh-infra.svc.cluster.local:9464/alerts + url: {{ tuple "snmpnotifier" "internal" "api" . | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" }} - name: 'team-X-mails' email_configs: - to: 'team-X+alerts@example.org' @@ -313,6 +370,6 @@ conf: - auth_token: room_id: 85 message_format: html - notify: true + notify: false alert_templates: null ... diff --git a/prometheus/values.yaml b/prometheus/values.yaml index b32614eb2..74c5c3beb 100644 --- a/prometheus/values.yaml +++ b/prometheus/values.yaml @@ -156,13 +156,13 @@ endpoints: default: 9090 http: default: 80 - alerts: - name: alertmanager + alertmanager: + name: prometheus-alertmanager namespace: null hosts: default: alerts-engine - public: alertmanager - discovery: alertmanager-discovery + public: prometheus-alertmanager + discovery: prometheus-alertmanager-discovery host_fqdn_override: default: null path: @@ -1081,7 +1081,7 @@ conf: bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - source_labels: [__meta_kubernetes_pod_label_application] - regex: alertmanager + regex: prometheus-alertmanager action: keep - source_labels: [__meta_kubernetes_pod_container_port_name] regex: alerts-api diff --git a/tools/deployment/multinode/060-alertmanager.sh b/tools/deployment/multinode/060-alertmanager.sh index 269eab398..e8434f500 100755 --- a/tools/deployment/multinode/060-alertmanager.sh +++ b/tools/deployment/multinode/060-alertmanager.sh @@ -19,8 +19,7 @@ make prometheus-alertmanager #NOTE: Deploy command helm upgrade --install alertmanager ./prometheus-alertmanager \ - --namespace=osh-infra \ - --set pod.replicas.alertmanager=3 + --namespace=osh-infra #NOTE: Wait for deploy ./tools/deployment/common/wait-for-pods.sh osh-infra