diff --git a/.gitignore b/.gitignore index f0eeb7d9a..c42c18d44 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,7 @@ log/* # Swap files *.sw[po] + +# Containers +browbeat-containers/collectd-openstack/config/* +ansible-errors.json diff --git a/ansible/README.rst b/ansible/README.rst index 67823b56c..7fd3f211c 100644 --- a/ansible/README.rst +++ b/ansible/README.rst @@ -68,7 +68,7 @@ file to ensure the correct parameters are passed. :: - # ansible-playbook -i hosts install/collectd-openstack.yml + # ansible-playbook -i hosts install/collectd.yml To install collectd on everything other than Openstack machines, view the `README for collectd-generic `__. diff --git a/ansible/install/collectd-openstack.yml b/ansible/install/collectd-baremetal.yml similarity index 86% rename from ansible/install/collectd-openstack.yml rename to ansible/install/collectd-baremetal.yml index b254d0cf7..6311b93c8 100644 --- a/ansible/install/collectd-openstack.yml +++ b/ansible/install/collectd-baremetal.yml @@ -22,7 +22,7 @@ config_type: undercloud roles: - { role: common, when: collectd_undercloud|bool } - - { role: epel, when: collectd_undercloud|bool and ansible_distribution_major_version == '7'} + - { role: epel, when: collectd_undercloud|bool } - { role: repo } - { role: collectd-openstack, when: collectd_undercloud|bool } tasks: @@ -41,7 +41,7 @@ roles: - { role: osp_version } - { role: common, when: collectd_controller|bool } - - { role: epel, when: collectd_controller|bool and ansible_distribution_major_version == '7'} + - { role: epel, when: collectd_controller|bool } - { role: repo } - { role: collectd-openstack, when: collectd_controller|bool } tasks: @@ -59,7 +59,7 @@ config_type: networker roles: - { role: common, when: collectd_networker|bool } - - { role: epel, when: collectd_networker|bool and ansible_distribution_major_version == '7'} + - { role: epel, when: collectd_networker|bool } - { role: repo } - { role: collectd-openstack, when: collectd_networker|bool } tasks: @@ -77,7 +77,7 @@ config_type: blockstorage roles: - { role: common, when: collectd_blockstorage|bool } - - { role: epel, when: collectd_blockstorage|bool and ansible_distribution_major_version == '7' } + - { role: epel, when: collectd_blockstorage|bool } - { role: repo } - { role: collectd-openstack, when: collectd_blockstorage|bool } tasks: @@ -95,7 +95,7 @@ config_type: objectstorage roles: - { role: common, when: collectd_objectstorage|bool } - - { role: epel, when: collectd_objectstorage and ansible_distribution_major_version == '7' } + - { role: epel, when: collectd_objectstorage|bool } - { role: repo } - { role: collectd-openstack, when: collectd_objectstorage|bool } tasks: @@ -113,7 +113,7 @@ config_type: cephstorage roles: - { role: common, when: collectd_cephstorage|bool } - - { role: epel, when: collectd_cephstorage|bool and ansible_distribution_major_version == '7'} + - { role: epel, when: collectd_cephstorage|bool } - { role: repo } - { role: collectd-openstack, when: collectd_cephstorage|bool } tasks: @@ -132,7 +132,7 @@ roles: - { role: osp_version } - { role: common, when: collectd_compute|bool } - - { role: epel, when: collectd_compute|bool and ansible_distribution_major_version == '7'} + - { role: epel, when: collectd_compute|bool } - { role: repo } - { role: collectd-openstack, when: collectd_compute|bool } tasks: diff --git a/ansible/install/collectd-container.yml b/ansible/install/collectd-container.yml new file mode 100644 index 000000000..76827c7c7 --- /dev/null +++ b/ansible/install/collectd-container.yml @@ -0,0 +1,83 @@ +--- +# +# Generic Playbook to install collectd, use tags to separate machine type: +# +# Examples: +# +# ansible-playbook -i hosts install/collectd.yml --tags="undercloud" +# ansible-playbook -i hosts install/collectd.yml --tags="controller" +# ansible-playbook -i hosts install/collectd.yml --tags="compute" + + +- hosts: undercloud + remote_user: "{{ local_remote_user }}" + vars: + config_type: undercloud + roles: + - { role: osp_version } + - { role: common, when: collectd_undercloud|bool } + - { role: collectd, when: collectd_undercloud|bool } + tags: undercloud + environment: "{{proxy_env}}" + +- hosts: controller + remote_user: "{{ host_remote_user }}" + vars: + config_type: controller + roles: + - { role: osp_version } + - { role: common, when: collectd_controller|bool } + - { role: collectd, when: collectd_controller|bool } + tags: controller + +- hosts: compute + remote_user: "{{ host_remote_user }}" + vars: + config_type: compute + roles: + - { role: osp_version } + - { role: common, when: collectd_compute|bool } + - { role: collectd, when: collectd_compute|bool } + tags: compute + +- hosts: networker + remote_user: "{{ host_remote_user }}" + vars: + config_type: networker + roles: + - { role: osp_version } + - { role: common, when: collectd_networker|bool } + - { role: collectd, when: collectd_networker|bool } + tags: networker + +- hosts: blockstroage + remote_user: "{{ host_remote_user }}" + vars: + config_type: blockstorage + roles: + - { role: osp_version } + - { role: common, when: collectd_blockstoarge|bool } + - { role: collectd, when: collectd_blockstorage|bool } + tags: blockstorage + +- hosts: objectstorage + remote_user: "{{ host_remote_user }}" + vars: + config_type: objectstorage + roles: + - { role: osp_version } + - { role: common, when: collectd_objectstorage|bool } + - { role: collectd, when: collectd_objectstorage|bool } + tags: objectstorage + +- hosts: cephstorage + remote_user: "{{ host_remote_user }}" + vars: + config_type: cephstorage + roles: + - { role: osp_version } + - { role: common, when: collectd_cephstorage|bool } + - { role: collectd, when: collectd_cephstorage|bool } + tags: cephstorage + + diff --git a/ansible/install/collectd-generic.yml b/ansible/install/collectd-generic.yml index fc62e2fa1..1777e34a4 100644 --- a/ansible/install/collectd-generic.yml +++ b/ansible/install/collectd-generic.yml @@ -15,7 +15,7 @@ vars: config_type: baremetal roles: - - {role: epel, when: ansible_distribution_major_version == '7'} + - {role: epel } - collectd-generic tags: baremetal environment: "{{proxy_env}}" @@ -25,7 +25,7 @@ vars: config_type: guest roles: - - {role: epel, when: ansible_distribution_major_version == '7'} + - {role: epel } - collectd-generic tags: guest environment: "{{proxy_env}}" @@ -35,7 +35,7 @@ vars: config_type: graphite roles: - - {role: epel, when: ansible_distribution_major_version == '7'} + - {role: epel } - collectd-generic tags: graphite environment: "{{proxy_env}}" diff --git a/ansible/install/collectd.yml b/ansible/install/collectd.yml new file mode 100644 index 000000000..ac8a3f316 --- /dev/null +++ b/ansible/install/collectd.yml @@ -0,0 +1,9 @@ +--- +- name: Run containerized collectd (Stein and greater recommended) + import_playbook: collectd-container.yml + when: collectd_container + +- name: Run collectd installed through RPMs + import_playbook: collectd-baremetal.yml + when: not collectd_container + diff --git a/ansible/install/group_vars/all.yml b/ansible/install/group_vars/all.yml index 4e4b797e8..3c4567846 100644 --- a/ansible/install/group_vars/all.yml +++ b/ansible/install/group_vars/all.yml @@ -129,10 +129,6 @@ proxy_env: {} # Disables dns lookup by overcloud sshd process disable_ssh_dns: false -# epel7 rpm for collectd packages -epel7_rpm: https://download.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm -epel7_rpmkey: https://download.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-7 - # Extra Repos to add during collectd install repos: {} # repos: @@ -142,6 +138,9 @@ repos: {} ######################################## # Collectd Configuration ######################################## +# Install release specific templates +# Set collectd_container true for OSP >= 15 +collectd_container: true # Install collectd from EPEL collectd_from_epel: true # Interval in seconds @@ -157,12 +156,7 @@ collectd_objectstorage: true collectd_cephstorage: true collectd_compute: false -####################################### -# OSP15 Collectd Configuration -###################################### -rhos_release_rpm: -osp_release: 15 - +######################## # Opt-In Collectd plugins configuration: ######################## # Apache plugin @@ -319,15 +313,6 @@ regex_info: false ping_plugin: false ping_interval: 1 -############################ -# OpenDaylight JAVA Plugin -########################### - -# Plugin assumes that JAVA is already installed on the host -opendaylight_java_plugin: false -karaf_user: karaf -karaf_password: karaf - ######################################## # Carbon/Graphite Configuration ######################################## @@ -337,7 +322,7 @@ graphite_host: graphite_port: 80 carbon_cache_port: 2003 # Graphite prefix / Cloud name used both with graphite and grafana dashboards -graphite_prefix: openstack +graphite_prefix: # Graphite username and password for login on the dashboard # credential aren't created when you deploy graphite, use manage.py graphite_username: root diff --git a/ansible/install/group_vars/zuul_all.yml b/ansible/install/group_vars/zuul_all.yml index 5356ea0ce..aac4a7faa 100644 --- a/ansible/install/group_vars/zuul_all.yml +++ b/ansible/install/group_vars/zuul_all.yml @@ -124,10 +124,6 @@ proxy_env: {} # Disables dns lookup by overcloud sshd process disable_ssh_dns: false -# epel7 rpm for collectd packages -epel7_rpm: https://download.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm -epel7_rpmkey: https://download.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-7 - # Extra Repos to add during collectd install repos: {} # repos: @@ -152,12 +148,6 @@ collectd_objectstorage: true collectd_cephstorage: true collectd_compute: false -####################################### -# OSP15 Collectd Configuration -####################################### -rhos_release_rpm: -osp_release: 15 - # Opt-In Collectd plugins configuration: ######################## # Apache plugin @@ -314,15 +304,6 @@ regex_info: false ping_plugin: false ping_interval: 1 -############################ -# OpenDaylight JAVA Plugin -########################### - -# Plugin assumes that JAVA is already installed on the host -opendaylight_java_plugin: false -karaf_user: karaf -karaf_password: karaf - ######################################## # Carbon/Graphite Configuration ######################################## diff --git a/ansible/install/roles/collectd-openstack/defaults/main.yml b/ansible/install/roles/collectd-openstack/defaults/main.yml index 7157dcc54..394ded062 100644 --- a/ansible/install/roles/collectd-openstack/defaults/main.yml +++ b/ansible/install/roles/collectd-openstack/defaults/main.yml @@ -145,16 +145,6 @@ controller_monitored_ns: swift_stat_controller_collectd_plugin: false swift_stat_controller_collectd_interval: 10 - -############################ -# OpenDaylight JAVA Plugin -########################### - -# Plugin assumes that JAVA is already installed on the host -opendaylight_java_plugin: false -karaf_user: karaf -karaf_password: karaf - ######################## # tail plugin ######################## diff --git a/ansible/install/roles/collectd-openstack/tasks/main.yml b/ansible/install/roles/collectd-openstack/tasks/main.yml index 9f71f0285..633414e85 100644 --- a/ansible/install/roles/collectd-openstack/tasks/main.yml +++ b/ansible/install/roles/collectd-openstack/tasks/main.yml @@ -15,19 +15,12 @@ - collectd-ceph - collectd-mysql - collectd-turbostat - when: collectd_from_epel and ansible_distribution_major_version < '8' + when: collectd_from_epel - name: Clean Non-EPEL collectd configuration shell: "rm -rf /etc/collectd.d/*.conf" become: true - when: collectd_from_epel and ansible_distribution_major_version < '8' - -- name: Enable OSP repos for controller, compute - shell: | - dnf install -y {{ rhos_release_rpm }} - rhos-release {{ osp_release }} - when: ('controller' in group_names or 'compute' in group_names) and ansible_distribution_major_version == '8' - become: yes + when: collectd_from_epel # # (akrzos) yum module works at this point due to the fact the EPEL repo now exists. EPEL rpm is @@ -47,66 +40,7 @@ - collectd-mysql - collectd-ping - collectd-turbostat - when: collectd_from_epel and ansible_distribution_major_version < '8' - -# -# (zul) Remove rhelosp15-0-brew when EPEL 8 exists -# -- name: Install collectd rpms for centos - dnf: - name: "{{ item }}" - state: present - enablerepo: "rhelosp-15.0-brew, rhelosp-15.0-trunk-brew" - become: true - with_items: - - collectd - - collectd-apache - - collectd-ceph - - collectd-mysql - - collectd-ping - - collectd-turbostat - - collectd-disk - - collectd-python - when: ansible_distribution_major_version == '8' - -# (sai) Since we moved to containers we don't have java installed on the host -# anymore but it is needed for collectd-java -- name: Add repository - yum_repository: - name: CentOS-7-Base - description: Core CentOS7 Packages - baseurl: http://mirror.centos.org/centos/7/os/$basearch/ - enabled: yes - become: true - register: repo_add - when: ('controller' in group_names and opendaylight_java_plugin and ansible_distribution_major_version < '8') - -- name: Add key - rpm_key: - state: present - key: https://www.centos.org/keys/RPM-GPG-KEY-CentOS-7 - become: true - when: ansible_distribution_major_version < '8' - -# (sai) Separating out collectd java rpms as they have a lot of dependencies and -# are only required for ODL monitoring on controllers only -- name: Install collectd java specific rpms - yum: - name: "{{ item }}" - state: present - become: true - with_items: - - java-1.8.0-openjdk - - collectd-java - - collectd-generic-jmx - when: (repo_add is success and 'controller' in group_names and opendaylight_java_plugin and ansible_distribution_major_version < '8') - -- name: Remove repository - yum_repository: - name: CentOS-7-Base - state: absent - become: true - when: (repo_add is success and 'controller' in group_names and opendaylight_java_plugin and ansible_distribution_major_version < '8') + when: collectd_from_epel # Iostat plugin requires sysstat since shelling iostat for stats, Also it is # handy to have sysstat. @@ -398,15 +332,6 @@ mode: 0644 become: true -# OpenDaylight Monitoring -- name: Symlink libjvm - file: - src: /usr/lib/jvm/jre/lib/amd64/server/libjvm.so - dest: /usr/lib64/libjvm.so - state: link - become: true - when: ('controller' in group_names and opendaylight_java_plugin) - # # Configure selinux bits # diff --git a/ansible/install/roles/collectd-openstack/templates/controller.collectd.conf.j2 b/ansible/install/roles/collectd-openstack/templates/controller.collectd.conf.j2 index 290d16779..e315fe8d0 100644 --- a/ansible/install/roles/collectd-openstack/templates/controller.collectd.conf.j2 +++ b/ansible/install/roles/collectd-openstack/templates/controller.collectd.conf.j2 @@ -54,9 +54,6 @@ LoadPlugin uptime {% if ping_plugin %} LoadPlugin ping {% endif %} -{% if opendaylight_java_plugin %} -LoadPlugin java -{% endif %} # Open unix domain socket for collectdctl @@ -476,7 +473,7 @@ PreCacheChain "PreCache" # Tail httpd request time {%if apache_controller_collectd_request_time %} - {%if 'Queens' in osp_version['content'] | b64decode or 'Pike' in osp_version['content'] | b64decode %} + {% if osp_version | version_compare('12.0.0', '>=') and osp_version | version_compare('14.0.0', '<') %} {% else %} @@ -589,7 +586,7 @@ PreCacheChain "PreCache" - {%if 'Queens' in osp_version['content'] | b64decode or 'Pike' in osp_version['content'] | b64decode %} + {% if osp_version | version_compare('12.0.0', '>=') and osp_version | version_compare('14.0.0', '<') %} {% else %} @@ -2039,98 +2036,6 @@ PreCacheChain "PreCache" -{% if opendaylight_java_plugin %} - - -JVMArg "-Djava.class.path=/usr/share/collectd/java/collectd-api.jar:/usr/share/collectd/java/generic-jmx.jar" -LoadPlugin "org.collectd.java.GenericJMX" - - - ObjectName "java.lang:type=GarbageCollector,*" - InstancePrefix "gc-" - InstanceFrom "name" - - Type "derive" - Table false - Attribute "CollectionCount" - InstancePrefix "count" - - - - ObjectName "java.lang:type=GarbageCollector,*" - InstancePrefix "gc-" - InstanceFrom "name" - - Type "derive" - Table false - Attribute "CollectionTime" - InstancePrefix "time" - - - - ObjectName "java.lang:type=MemoryPool,*" - InstancePrefix "memory_pool-" - InstanceFrom "name" - - Type "memory" - Table true - Attribute "Usage" - - - - ObjectName "java.lang:type=Memory" - InstancePrefix "memory-heap" - - Type "memory" - Table true - Attribute "HeapMemoryUsage" - - - - ObjectName "java.lang:type=Memory" - InstancePrefix "memory-nonheap" - - Type "memory" - Table true - Attribute "NonHeapMemoryUsage" - - - - ObjectName "java.lang:type=Threading" - InstancePrefix "threading" - - Type "gauge" - Table false - Attribute "ThreadCount" - InstancePrefix "count" - - - - ObjectName "java.lang:type=Threading" - InstancePrefix "threading" - - Type "gauge" - Table false - Attribute "DaemonThreadCount" - InstancePrefix "count-daemon" - - - - ServiceURL "service:jmx:rmi:///jndi/rmi://localhost:1099/karaf-root" - Collect "memory_pool" - Collect "memory-heap" - Collect "memory-nonheap" - Collect "gc-count" - Collect "gc-time" - Collect "thread" - Collect "thread-daemon" - User "{{karaf_user}}" - Password "{{karaf_password}}" - - - -{% endif %} - {% if ovsagent_controller_monitor %} ModulePath "/usr/local/bin/" diff --git a/ansible/install/roles/collectd/tasks/main.yml b/ansible/install/roles/collectd/tasks/main.yml new file mode 100644 index 000000000..f9895497c --- /dev/null +++ b/ansible/install/roles/collectd/tasks/main.yml @@ -0,0 +1,105 @@ +--- +- name: Fetch log file paths + include_vars: + file: "vars/{{ rhosp_major }}.yml" + +- name: (Undercloud) Get ctlplane ip address + shell: ip r | egrep 'br-ctlplane\s*proto kernel' | awk '{print $NF}' + register: undercloud_ctlplane_ip_address + when: "'undercloud' in group_names" + +- name: Configure mysql for collectd.conf + block: + - name: (Controller) Get mysql root password + command: hiera -c /etc/puppet/hiera.yaml mysql::server::root_password + become: true + register: mysql_root_password + when: "'controller' in group_names" + + - name: (Undercloud) Get mysql root password + shell: | + grep undercloud_mysql_root_password: undercloud-passwords.conf | sed 's/undercloud_mysql_root_password: //g' + register: undercloud_mysql_password + when: "'undercloud' in group_names" + +- name: Configure rabbitmq monitoring + block: + - name: (Undercloud) Get Rabbitmq username + command: hiera -c /etc/puppet/hiera.yaml rabbitmq::default_user + become: true + register: undercloud_rabbitmq_username + when: "('undercloud' in group_names and rabbitmq_undercloud_collectd_plugin)" + + - name: (Undercloud) Get Rabbitmq password + shell: | + grep undercloud_rabbit_password /home/stack/undercloud-passwords.conf | sed 's/undercloud_rabbit_password: //g' + register: undercloud_rabbitmq_password + when: "('undercloud' in group_names and rabbitmq_undercloud_collectd_plugin)" + + - name: (Controller) Get Rabbitmq username + command: hiera -c /etc/puppet/hiera.yaml rabbitmq::default_user + register: controller0_rabbitmq_username + become: true + when: "'controller' in group_names and rabbitmq_controller_collectd_plugin and inventory_hostname == groups['controller'][0]" + + - name: (Controller) Get Rabbitmq password + command: hiera -c /etc/puppet/hiera.yaml rabbitmq::default_pass + register: controller0_rabbitmq_password + become: true + when: "'controller' in group_names and rabbitmq_controller_collectd_plugin and inventory_hostname == groups['controller'][0]" + +- name: Check if Container Files Directory exists + stat: + path: "{{ ansible_user_dir }}/browbeat/browbeat-containers/collectd-openstack" + register: directory_exists + +- name: Copy browbeat-containers directory if it doesn't exist + copy: + src: "{{ browbeat_path }}/browbeat-containers/collectd-openstack" + dest: "{{ ansible_user_dir }}/browbeat/browbeat-containers" + when: not (directory_exists.stat.isdir is defined and directory_exists.stat.isdir) + +- name: Set browbeat_contianers_path + set_fact: + browbeat_containers_path: "{{ ansible_user_dir }}/browbeat/browbeat-containers" + +- name: Create configuration directory + file: + path: "{{ browbeat_containers_path }}/collectd-openstack/config" + state: directory + +- name: Configure collectd.conf + template: + src: "{{ config_type }}.collectd.conf.j2" + dest: "{{ browbeat_containers_path }}/collectd-openstack/config/collectd.conf" + +- name: Build and Run container using Docker (OSP < 15) + block: + - name: Set container_cli (OSP < 15) + set_fact: + container_cli: docker + when: rhosp_version is version('15.0', '<') and osp_version is version('12.0', '>=') + + - name: Set container_cli (OSP > 15) + set_fact: + container_cli: podman + when: rhosp_version is version('15.0', '>=') + + - name: Build collectd-openstack container (Docker) + shell: | + {{ container_cli }} build -t collectd-openstack {{ browbeat_containers_path }}/collectd-openstack/ + become: true + - name: Run collectd-openstack container (Docker) + shell: | + {{ container_cli }} rm -f collectd-{{ config_type }} + {{ container_cli }} run --name collectd-{{ config_type }} \ + --network host --pid host \ + --privileged -d \ + -v /var/log/containers:/var/log/containers \ + -v /dev:/dev \ + {% if config_type == 'controller' %} + -v /var/lib/mysql/mysql.sock:/var/lib/mysql/mysql.sock \ + {% endif %} + collectd-openstack + become: yes + diff --git a/ansible/install/roles/collectd/templates/00-browbeat_mod_status.conf.j2 b/ansible/install/roles/collectd/templates/00-browbeat_mod_status.conf.j2 new file mode 100644 index 000000000..a7f50015d --- /dev/null +++ b/ansible/install/roles/collectd/templates/00-browbeat_mod_status.conf.j2 @@ -0,0 +1,25 @@ +# Installed by Browbeat Ansible Installer + +LoadModule status_module modules/mod_status.so + +{% if 'undercloud' in group_names %} +Listen {{apache_undercloud_mod_status_port}} +{% endif %} +{% if 'controller' in group_names %} +Listen {{apache_controller_mod_status_port}} +{% endif %} + +ExtendedStatus on +{% if 'undercloud' in group_names %} + +{% endif %} +{% if 'controller' in group_names %} + +{% endif %} + + SetHandler server-status + Order deny,allow + Deny from all + Allow from 127.0.0.1 + + diff --git a/ansible/install/roles/collectd/templates/baremetal.collectd.conf.j2 b/ansible/install/roles/collectd/templates/baremetal.collectd.conf.j2 new file mode 100644 index 000000000..14a55fb24 --- /dev/null +++ b/ansible/install/roles/collectd/templates/baremetal.collectd.conf.j2 @@ -0,0 +1,73 @@ +# Installed by Browbeat Ansible Installer +# Config type: {{config_type}} + +# Interval default is 10s +Interval {{collectd_interval}} + +# Hostname for this machine, if not defined, use gethostname(2) system call +Hostname "{{inventory_hostname}}" + +LoadPlugin write_graphite +LoadPlugin cpu +LoadPlugin df +LoadPlugin interface +LoadPlugin irq +LoadPlugin load +LoadPlugin memory +LoadPlugin numa +LoadPlugin processes +LoadPlugin swap +LoadPlugin turbostat +LoadPlugin unixsock +LoadPlugin uptime + +LoadPlugin disk + + ChangeRoot "/hostfs" + FSType anon_inodefs + FSType bdev + FSType cgroup + FSType cpuset + FSType debugfs + FSType devpts + FSType devtmpfs + FSType ecryptfs + FSType fuse + FSType fusectl + FSType hugetlbfs + FSType mqueue + FSType nfs + FSType nfs4 + FSType nfsd + FSType pipefs + FSType proc + FSType pstore + FSType ramfs + #FSType rootfs + FSType rpc_pipefs + FSType securityfs + FSType sockfs + FSType sysfs + FSType tmpfs + FSType vboxsf + IgnoreSelected true + ValuesPercentage true + ReportInodes true + + +# Graphite Host Configuration + + + Host "{{graphite_host}}" + Port "{{collectd_write_graphite_port}}" + Prefix "{{graphite_prefix}}." + Protocol "tcp" + LogSendErrors true + StoreRates true + AlwaysAppendDS false + EscapeCharacter "_" + + + +# Include other collectd configuration files +Include "/etc/collectd.d" diff --git a/ansible/install/roles/collectd/templates/blockstorage.collectd.conf.j2 b/ansible/install/roles/collectd/templates/blockstorage.collectd.conf.j2 new file mode 100644 index 000000000..93a88eec0 --- /dev/null +++ b/ansible/install/roles/collectd/templates/blockstorage.collectd.conf.j2 @@ -0,0 +1,195 @@ +# Installed by Browbeat Ansible Installer +# Config type: {{config_type}} + +# Interval default is 10s +Interval {{collectd_interval}} + +# Hostname for this machine, if not defined, use gethostname(2) system call +Hostname "{{inventory_hostname}}" + +# Loaded Plugins: +LoadPlugin "logfile" + + File "/var/log/collectd.log" + LogLevel "info" + PrintSeverity true + Timestamp true + + +LoadPlugin write_graphite +LoadPlugin cpu +LoadPlugin conntrack +LoadPlugin df +LoadPlugin disk +LoadPlugin exec +LoadPlugin interface +LoadPlugin irq +LoadPlugin load +LoadPlugin match_regex +LoadPlugin memory +LoadPlugin numa +LoadPlugin processes +{%if iostat_blockstorage_collectd_plugin %} + + Globals true + +{% endif %} +LoadPlugin swap +LoadPlugin tail +LoadPlugin turbostat +LoadPlugin unixsock +LoadPlugin uptime + +# Open unix domain socket for collectdctl + + SocketFile "/var/run/collectd-unixsock" + SocketGroup "collectd" + SocketPerms "0770" + DeleteSocket true + + +PreCacheChain "PreCache" + + + + Plugin "^interface$" + PluginInstance "^tap*" + + Target "stop" + + + + Plugin "^interface$" + PluginInstance "^q.*" + + Target "stop" + + Target "return" + + +# Graphite Host Configuration + + + Host "{{graphite_host}}" + Port "{{collectd_write_graphite_port}}" + Prefix "{{graphite_prefix}}." + Protocol "tcp" + LogSendErrors true + StoreRates true + AlwaysAppendDS false + EscapeCharacter "_" + + + + + FSType anon_inodefs + FSType bdev + FSType cgroup + FSType cpuset + FSType debugfs + FSType devpts + FSType devtmpfs + FSType ecryptfs + FSType fuse + FSType fusectl + FSType hugetlbfs + FSType mqueue + FSType nfs + FSType nfs4 + FSType nfsd + FSType overlay + FSType pipefs + FSType proc + FSType pstore + FSType ramfs + #FSType rootfs + FSType rpc_pipefs + FSType securityfs + FSType sockfs + FSType sysfs + FSType tmpfs + FSType vboxsf + MountPoint "/^/var/lib/docker/.*/" + IgnoreSelected true + ValuesPercentage true + ReportInodes true + + +{%if disk_blockstorage_collectd_plugin %} + + Disk "/^[hsv]d[a-z]+[0-9]?$/" + Disk "/^nvm/" + IgnoreSelected false + + +{% endif %} +{%if iostat_blockstorage_collectd_plugin %} + + ModulePath "/usr/local/bin/" + Import "collectd_iostat_python" + + + Path "/usr/bin/iostat" + Interval {{iostat_blockstorage_collectd_interval}} + IostatInterval 2 + Count 2 + Verbose false + NiceNames false + PluginName collectd_iostat_python + + + +{% endif %} +# (akrzos) Including the version of OpenStack that the process was verified as running after +# OpenStack Installation with a comment at the end of each Process/ProcessMatch statement. +# A Minus before the version means the process was not found in that version. (Ex -9) + + # Cinder (OpenStack Installed) + ProcessMatch "cinder-volume" "python.+cinder-volume" # 10,11 + + # Collectd (Browbeat Installed) + ProcessMatch "collectd" "/usr/sbin/collectd" + + # OVS (OpenStack Installed) + ProcessMatch "ovs-vswitchd" "ovs-vswitchd.+openvswitch" # 10,11 + ProcessMatch "ovsdb-server" "ovsdb-server.+openvswitch" # 10,11 + + + + ReportBytes true + ValuesPercentage true + + +# Tail plugin configuration + + + + Instance "cinder-volume" + + Regex " ERROR " + DSType "CounterInc" + Type "counter" + Instance "error" + +{%if regex_warn %} + + Regex " WARNING " + DSType "CounterInc" + Type "counter" + Instance "warn" + +{% endif %} +{%if regex_info %} + + Regex " INFO " + DSType "CounterInc" + Type "counter" + Instance "info" + +{% endif %} + + + + +# Include other collectd configuration files +Include "/etc/collectd.d" diff --git a/ansible/install/roles/collectd/templates/cephstorage.collectd.conf.j2 b/ansible/install/roles/collectd/templates/cephstorage.collectd.conf.j2 new file mode 100644 index 000000000..beaf822fd --- /dev/null +++ b/ansible/install/roles/collectd/templates/cephstorage.collectd.conf.j2 @@ -0,0 +1,182 @@ +# Installed by Browbeat Ansible Installer +# Config type: {{config_type}} + +# Interval default is 10s +Interval {{collectd_interval}} + +# Hostname for this machine, if not defined, use gethostname(2) system call +Hostname "{{inventory_hostname}}" + +# Loaded Plugins: +LoadPlugin "logfile" + + File "/var/log/collectd.log" + LogLevel "info" + PrintSeverity true + Timestamp true + + +LoadPlugin write_graphite +{% if ceph_storage_collectd_plugin %} +LoadPlugin ceph +{% endif %} +LoadPlugin cpu +LoadPlugin conntrack +LoadPlugin df +LoadPlugin disk +LoadPlugin exec +LoadPlugin interface +LoadPlugin irq +LoadPlugin load +LoadPlugin match_regex +LoadPlugin memory +LoadPlugin numa +LoadPlugin processes +{%if iostat_cephstorage_collectd_plugin %} + + Globals true + +{% endif %} +LoadPlugin swap +LoadPlugin tail +LoadPlugin turbostat +LoadPlugin unixsock +LoadPlugin uptime + +# Open unix domain socket for collectdctl + + SocketFile "/var/run/collectd-unixsock" + SocketGroup "collectd" + SocketPerms "0770" + DeleteSocket true + + +PreCacheChain "PreCache" + + + + Plugin "^interface$" + PluginInstance "^tap*" + + Target "stop" + + + + Plugin "^interface$" + PluginInstance "^q.*" + + Target "stop" + + Target "return" + + +# Graphite Host Configuration + + + Host "{{graphite_host}}" + Port "{{collectd_write_graphite_port}}" + Prefix "{{graphite_prefix}}." + Protocol "tcp" + LogSendErrors true + StoreRates true + AlwaysAppendDS false + EscapeCharacter "_" + + + +{% if ceph_storage_collectd_plugin %} + + LongRunAvgLatency false + ConvertSpecialMetricTypes true + + SocketPath "/var/run/ceph/ceph-osd.{{cephstorage_osd_socket.stdout}}.asok" + + + +{% endif %} + + FSType anon_inodefs + FSType bdev + FSType cgroup + FSType cpuset + FSType debugfs + FSType devpts + FSType devtmpfs + FSType ecryptfs + FSType fuse + FSType fusectl + FSType hugetlbfs + FSType mqueue + FSType nfs + FSType nfs4 + FSType nfsd + FSType overlay + FSType pipefs + FSType proc + FSType pstore + FSType ramfs + #FSType rootfs + FSType rpc_pipefs + FSType securityfs + FSType sockfs + FSType sysfs + FSType tmpfs + FSType vboxsf + MountPoint "/^/var/lib/docker/.*/" + IgnoreSelected true + ValuesPercentage true + ReportInodes true + + +{%if disk_cephstorage_collectd_plugin %} + + Disk "/^[hsv]d[a-z]+[0-9]?|nvme.+$/" + Disk "/^nvm/" + IgnoreSelected false + + +{% endif %} +{%if iostat_cephstorage_collectd_plugin %} + + ModulePath "/usr/local/bin/" + Import "collectd_iostat_python" + + + Path "/usr/bin/iostat" + Interval {{iostat_cephstorage_collectd_interval}} + IostatInterval 2 + Count 2 + Verbose false + NiceNames false + PluginName collectd_iostat_python + + + +{% endif %} +# (akrzos) Including the version of OpenStack that the process was verified as running after +# OpenStack Installation with a comment at the end of each Process/ProcessMatch statement. +# A Minus before the version means the process was not found in that version. (Ex -9) + + # Ceph (OpenStack Installed) + ProcessMatch "ceph-osd" "^/usr/bin/ceph-osd" # 10,11,12 + + # Collectd (Browbeat Installed) + ProcessMatch "collectd" "/usr/sbin/collectd" + + # OVS (OpenStack Installed) + ProcessMatch "ovs-vswitchd" "ovs-vswitchd.+openvswitch" # 10,11,12 + ProcessMatch "ovsdb-server" "ovsdb-server.+openvswitch" # 10,11,12 + + + + ReportBytes true + ValuesPercentage true + + +# Tail plugin configuration + + {# Add ceph logs to tail #} + + +# Include other collectd configuration files +Include "/etc/collectd.d" diff --git a/ansible/install/roles/collectd/templates/compute.collectd.conf.j2 b/ansible/install/roles/collectd/templates/compute.collectd.conf.j2 new file mode 100644 index 000000000..f4f9ce958 --- /dev/null +++ b/ansible/install/roles/collectd/templates/compute.collectd.conf.j2 @@ -0,0 +1,222 @@ +# Installed by Browbeat Ansible Installer +# Config type: {{config_type}} + +# Interval default is 10s +Interval {{collectd_interval}} + +# Hostname for this machine, if not defined, use gethostname(2) system call +Hostname "{{inventory_hostname}}" + +# Loaded Plugins: +LoadPlugin "logfile" + +File "/var/log/collectd.log" + LogLevel "info" + PrintSeverity true + Timestamp true + + +LoadPlugin write_graphite +LoadPlugin cpu +LoadPlugin conntrack +LoadPlugin df +LoadPlugin disk +LoadPlugin exec +LoadPlugin interface +LoadPlugin irq +LoadPlugin load +LoadPlugin match_regex +LoadPlugin memory +LoadPlugin numa +LoadPlugin processes +{%if iostat_compute_collectd_plugin %} + + Globals true + +{% endif %} +LoadPlugin swap +LoadPlugin tail +LoadPlugin uptime + +PreCacheChain "PreCache" + + + + Plugin "^interface$" + PluginInstance "^tap*" + + Target "stop" + + + + Plugin "^interface$" + PluginInstance "^q.*" + + Target "stop" + + Target "return" + + +# Graphite Host Configuration + + + Host "{{graphite_host}}" + Port "{{collectd_write_graphite_port}}" + Prefix "{{graphite_prefix}}." + Protocol "tcp" + LogSendErrors true + StoreRates true + AlwaysAppendDS false + EscapeCharacter "_" + + + + + FSType anon_inodefs + FSType bdev + FSType cgroup + FSType cpuset + FSType debugfs + FSType devpts + FSType devtmpfs + FSType ecryptfs + FSType fuse + FSType fusectl + FSType hugetlbfs + FSType mqueue + FSType nfs + FSType nfs4 + FSType nfsd + FSType overlay + FSType pipefs + FSType proc + FSType pstore + FSType ramfs + #FSType rootfs + FSType rpc_pipefs + FSType securityfs + FSType sockfs + FSType sysfs + FSType tmpfs + FSType vboxsf + MountPoint "/^/var/lib/docker/.*/" + IgnoreSelected true + ValuesPercentage true + ReportInodes true + + +{%if disk_compute_collectd_plugin %} + + Disk "/^[hsv]d[a-z]+[0-9]?$/" + Disk "/^nvm/" + IgnoreSelected false + + +{% endif %} +{%if iostat_compute_collectd_plugin %} + + ModulePath "/usr/local/bin/" + Import "collectd_iostat_python" + + + Path "/usr/bin/iostat" + Interval {{iostat_compute_collectd_interval}} + IostatInterval 2 + Count 2 + Verbose false + NiceNames false + PluginName collectd_iostat_python + + + +{% endif %} +# (akrzos) Including the version of OpenStack that the process was verified as running after +# OpenStack Installation with a comment at the end of each Process/ProcessMatch statement. +# A Minus before the version means the process was not found in that version. (Ex -10) + + # Ceilometer (OpenStack Installed) + ProcessMatch "ceilometer-polling" "ceilometer-polling" # 10,11,12,13 + + # Collectd (Browbeat Installed) + ProcessMatch "collectd" "/usr/sbin/collectd" + + # Neutron (OpenStack Installed) + ProcessMatch "neutron-l3-agent" "python.+neutron-l3-agent" # 10 with DVR + ProcessMatch "neutron-ns-metadata-proxy" "python.+neutron-ns-metadata-proxy" # 10 with DVR + ProcessMatch "neutron-metadata-agent" "python.+neutron-metadata-agent" # 10 with DVR + ProcessMatch "neutron-openvswitch-agent" "python.+neutron-openvswitch-agent" # 10,11,12,13 + + # Nova (OpenStack Installed) + ProcessMatch "nova-compute" "python.+nova-compute" # 10,11,12,13 + ProcessMatch "privsep-helper" "python.+/bin/privsep-helper" # 11,12,13 + + # OVS (OpenStack Installed) + ProcessMatch "ovs-vswitchd" "ovs-vswitchd.+openvswitch" # 10,11,12,13 + ProcessMatch "ovsdb-client" "ovsdb-client" # 10,11,12,13 + ProcessMatch "ovsdb-server" "ovsdb-server.+openvswitch" # 10,11,12,13 + ProcessMatch "ovn-controller" "ovn-controller.+openvswitch" # 9,10 + ProcessMatch "ovn-controller-vtep" "ovn-controller-vtep.+openvswitch" # 9,10 + + # QEMU / libvirt (OpenStack Installed) + ProcessMatch "qemu-kvm" "/usr/libexec/qemu-kvm" # 10,11,12,13 + ProcessMatch "libvirtd" "/usr/sbin/libvirtd" # 10,11,12,13 + ProcessMatch "virtlockd" "/usr/sbin/virtlockd" # 10,11,-12,-13 + ProcessMatch "virtlogd" "/usr/sbin/virtlogd" # 10,11,12,13 + + + + ReportBytes true + ValuesPercentage true + + +# Tail plugin configuration + + + + {% for item in collectd_logs[config_type] %} + + Instance "{{ item.instance }}" + + Regex " ERROR " + DSType "CounterInc" + Type "counter" + Instance "error" + +{%if regex_warn %} + + Regex " WARNING " + DSType "CounterInc" + Type "counter" + Instance "warn" + +{% endif %} +{%if regex_info %} + + Regex " INFO " + DSType "CounterInc" + Type "counter" + Instance "info" + +{% endif %} + + {% endfor %} + + + +{% if ovsagent_compute_monitor %} + + ModulePath "/usr/local/bin/" + LogTraces true + Interactive false + Import "collectd_ovsagent" + + prefix ovsagent + interval 10 + interfaces {% for int in compute_monitored_ints %} {{int}} {% endfor %} + + + +{% endif %} + +# Include other collectd configuration files +Include "/etc/collectd.d" diff --git a/ansible/install/roles/collectd/templates/controller.collectd.conf.j2 b/ansible/install/roles/collectd/templates/controller.collectd.conf.j2 new file mode 100644 index 000000000..2a3e01284 --- /dev/null +++ b/ansible/install/roles/collectd/templates/controller.collectd.conf.j2 @@ -0,0 +1,598 @@ +# Installed by Browbeat Ansible Installer +# Config type: {{config_type}} + +# Interval default is 10s +Interval {{collectd_interval}} + +# Hostname for this machine, if not defined, use gethostname(2) system call +Hostname "{{inventory_hostname}}" + +# Loaded Plugins: +LoadPlugin "logfile" + + File "/var/log/collectd.log" + LogLevel "info" + PrintSeverity true + Timestamp true + + +LoadPlugin write_graphite +{% if apache_controller_collectd_plugin %} +LoadPlugin apache +{% endif %} +{% if ceph_controller_collectd_plugin %} +{% if inventory_hostname == groups['controller'][0] %} +LoadPlugin ceph +{% endif %} +{% endif %} +LoadPlugin cpu +LoadPlugin conntrack +{% if keystone_overcloud_collectd_plugin %} +{%if inventory_hostname == groups['controller'][0] %} +LoadPlugin dbi +{% endif %} +{% endif %} +LoadPlugin df +LoadPlugin disk +LoadPlugin exec +LoadPlugin interface +LoadPlugin irq +LoadPlugin load +LoadPlugin match_regex +LoadPlugin memory +LoadPlugin mysql +LoadPlugin numa +LoadPlugin processes + + Globals true + +LoadPlugin swap +LoadPlugin tail +LoadPlugin uptime +{% if ping_plugin %} +LoadPlugin ping +{% endif %} + +PreCacheChain "PreCache" + + + + Plugin "^interface$" + PluginInstance "^tap*" + + Target "stop" + + + + Plugin "^interface$" + PluginInstance "^q.*" + + Target "stop" + + Target "return" + + +# Graphite Host Configuration + + + Host "{{graphite_host}}" + Port "{{collectd_write_graphite_port}}" + Prefix "{{graphite_prefix}}." + Protocol "tcp" + LogSendErrors true + StoreRates true + AlwaysAppendDS false + EscapeCharacter "_" + + +{% if apache_controller_collectd_plugin %} + + + + URL "http://127.0.0.1:{{apache_controller_mod_status_port}}/mod_status?auto" + + +{% endif %} +{% if ceph_controller_collectd_plugin %} +{% if inventory_hostname == groups['controller'][0] %} + + + LongRunAvgLatency false + ConvertSpecialMetricTypes true + + SocketPath "/var/run/ceph/ceph-mon.{{inventory_hostname}}.asok" + + + +{% endif %} +{% endif %} +{% if ceph_controller_collectd_radosbench_plugin or ceph_controller_collectd_mon_plugin or ceph_controller_collectd_osd_plugin or ceph_controller_collectd_pg_plugin or ceph_controller_collectd_pool_plugin %} +{% if inventory_hostname == groups['controller'][0] %} + + + LogTraces true + Interactive false + ModulePath "/usr/local/bin/" + Import "collectd_ceph_storage" + + CephCluster "ceph" + + CephRadosBench {{ceph_controller_collectd_radosbench_plugin}} + CephRadosBenchInterval {{ceph_controller_collectd_radosbench_interval}} + CephMONStats {{ceph_controller_collectd_mon_plugin}} + CephMONStatsInterval {{ceph_controller_collectd_mon_interval}} + CephOSDStats {{ceph_controller_collectd_osd_plugin}} + CephOSDStatsInterval {{ceph_controller_collectd_osd_interval}} + CephPGStats {{ceph_controller_collectd_pg_plugin}} + CephPGStatsInterval {{ceph_controller_collectd_pg_interval}} + CephPoolStats {{ceph_controller_collectd_pool_plugin}} + CephPoolStatsInterval {{ceph_controller_collectd_pool_interval}} + + +{% endif %} +{% endif %} +{% if keystone_overcloud_collectd_plugin %} +{%if inventory_hostname == groups['controller'][0] %} + + + + Statement "select count(*) as count from token" + + Type gauge + InstancePrefix "token" + ValuesFrom "count" + + + + Driver "mysql" + DriverOption "host" "localhost" + DriverOption "dbname" "keystone" + DriverOption "username" "root" + DriverOption "password" "{{mysql_root_password.stdout}}" + DriverOption "mysql_unix_socket" "/var/lib/mysql/mysql.sock" + Query token_count + + +{% else %} +# Token Count plugin installed and enabled on {{groups['controller'][0]}} +{% endif %} +{% endif %} + + + FSType anon_inodefs + FSType bdev + FSType cgroup + FSType cpuset + FSType debugfs + FSType devpts + FSType devtmpfs + FSType ecryptfs + FSType fuse + FSType fusectl + FSType hugetlbfs + FSType mqueue + FSType nfs + FSType nfs4 + FSType nfsd + FSType overlay + FSType pipefs + FSType proc + FSType pstore + FSType ramfs + #FSType rootfs + FSType rpc_pipefs + FSType securityfs + FSType sockfs + FSType sysfs + FSType tmpfs + FSType vboxsf + MountPoint "/^/var/lib/docker/.*/" + IgnoreSelected true + ValuesPercentage true + ReportInodes true + + +{%if disk_controller_collectd_plugin %} + + Disk "/^[hsv]d[a-z]+[0-9]?$/" + Disk "/^nvm/" + IgnoreSelected false + + +{% endif %} +{%if iostat_controller_collectd_plugin %} + + ModulePath "/usr/local/bin/" + Import "collectd_iostat_python" + + + Path "/usr/bin/iostat" + Interval {{iostat_controller_collectd_interval}} + IostatInterval 2 + Count 2 + Verbose false + NiceNames false + PluginName collectd_iostat_python + + + +{% endif %} +{%if gnocchi_status_controller_collectd_plugin %} +{%if inventory_hostname == groups['controller'][0] %} + + ModulePath "/usr/local/bin/" + LogTraces true + Interactive false + Import "collectd_gnocchi_status" + + interval {{gnocchi_status_controller_collectd_interval}} + + + +{% else %} +# Gnocchi status plugin installed and enabled on {{groups['controller'][0]}} + +{% endif %} +{% endif %} + + + Host "localhost" + User "root" + Password "{{mysql_root_password.stdout}}" + Socket "/var/lib/mysql/mysql.sock" + InnodbStats true + + + +# (akrzos) Including the version of OpenStack that the process was verified as running after +# OpenStack Installation with a comment at the end of each Process/ProcessMatch statement. +# A Minus before the version means the process was not found in that version. (Ex -10) + + # Aodh (OpenStack Installed) + ProcessMatch "aodh-evaluator" "aodh-evaluator" # 10,11,12,13 + ProcessMatch "aodh-listener" "aodh-listener" # 10,11,12,13 + ProcessMatch "aodh-notifier" "aodh-notifier" # 10,11,12,13 + ProcessMatch "aodh_wsgi" "aodh_wsgi.*-DFOREGROUND" # 11,12,13 + + # Barbican (OpenStack Installed) + ProcessMatch "barbican_wsgi" "barbican_wsgi.*-DFOREGROUND" # 13 + ProcessMatch "barbican-keystone-listener" "python.+barbican-keystone-listener" # 13 + ProcessMatch "barbican-worker" "python.+barbican-worker" # 13 + + # Ceilometer (OpenStack Installed) + ProcessMatch "ceilometer-agent-notification" "ceilometer-agent-notification" # 10,11,12,13 + ProcessMatch "ceilometer-collector" "ceilometer-collector" # 10,11,-12,-13 + ProcessMatch "ceilometer-polling" "ceilometer-polling" # 10,11,12,13 + ProcessMatch "ceilometer_wsgi" "ceilometer_wsgi.*-DFOREGROUND" # 11,-12,-13 + + # Ceph (OpenStack Installed) + # When CephStorage nodes deployed + ProcessMatch "ceph-mon" "^/usr/bin/ceph-mon" # -10,-11,-12,-13 + + # Cinder (OpenStack Installed) + ProcessMatch "cinder-api" "python.+cinder-api" # 10,-11,-12 + ProcessMatch "cinder-scheduler" "python.+cinder-scheduler" # 10,11,12,13 + ProcessMatch "cinder-volume" "python.+cinder-volume" # 10,11,12,13 + ProcessMatch "cinder_wsgi" "cinder_wsgi.*-DFOREGROUND" # 11,12,13 + + # Collectd (Browbeat Installed) + ProcessMatch "collectd" "/usr/sbin/collectd" + + # Docker (OpenStack Installed) + ProcessMatch "dockerd-current" "dockerd-current" # 12,13 + + # Pacemaker / Corosync (OpenStack Installed) + ProcessMatch "attrd" "/usr/libexec/pacemaker/attrd" # 10,11,12,13 + ProcessMatch "cib" "/usr/libexec/pacemaker/cib" # 10,11,12,13 + Process "corosync" # 10,11,12,13 + ProcessMatch "crmd" "/usr/libexec/pacemaker/crmd" # 10,11,12,13 + ProcessMatch "lrmd" "/usr/libexec/pacemaker/lrmd" # 10,11,12,13 + ProcessMatch "pacemakerd" "/usr/sbin/pacemakerd" # 10,11,12,13 + ProcessMatch "pcsd" "^/usr/bin/ruby.+/usr/lib/pcsd" # 10,11,12,13 + ProcessMatch "pengine" "/usr/libexec/pacemaker/pengine" # 10,11,12,13 + ProcessMatch "stonithd" "/usr/libexec/pacemaker/stonithd" # 10,11,12,13 + + # Everything Else (OpenStack Installed) + # (Processes displayed under "Everything Else" on Grafana Dashboards) + ProcessMatch "dnsmasq" "^dnsmasq.+" # 10,11,12 + ProcessMatch "haproxy" "/usr/sbin/haproxy.+/etc/haproxy/haproxy.cfg" # 10,11,12,13 + Process "httpd" # 10,11,12,13 + Process "keepalived" # 10,11,12 + Process "memcached" # 10,11,12,13 + Process "mongod" # 10,11,-12,-13 + ProcessMatch "mysqld" "/usr/libexec/mysqld" # 10,11,12,13 + ProcessMatch "rabbitmq" "/usr/lib64/erlang/erts-.+/bin/beam.smp" # 10,11,12,13 + Process "redis-server" # 10,11,12,13 + ProcessMatch "karaf" "java.+karaf" # ODL Specific + + # Glance (OpenStack Installed) + ProcessMatch "glance-api" "python.+glance-api" # 10,11,12,13 + ProcessMatch "glance-registry" "python.+glance-registry" # 10,-11,-12,-13 + + # Gnocchi (OpenStack Installed) + ProcessMatch "gnocchi-metricd-master" "gnocchi-metricd.*master" # 11,12,13 + ProcessMatch "gnocchi-metricd-scheduler" "gnocchi-metricd.*scheduler" # 10,11,-12,-13 + ProcessMatch "gnocchi-metricd-processing" "gnocchi-metricd.*processing" # 10,11,12,13 + ProcessMatch "gnocchi-metricd-reporting" "gnocchi-metricd.*reporting" # 10,11,12,13 + ProcessMatch "gnocchi-metricd-janitor" "gnocchi-metricd.*janitor" # 10,11,12,13 + ProcessMatch "gnocchi-metricd" "gnocchi-metricd " # 10(Old proctitle) + ProcessMatch "gnocchi-statsd" "python.+gnocchi-statsd" # 10,11,12,13 + ProcessMatch "gnocchi_wsgi" "gnocchi_wsgi.*-DFOREGROUND" # 11,12,13 + + # Heat (OpenStack Installed) + ProcessMatch "heat-api" "python.+heat-api --config-file" # 10,11,-12,-13 + ProcessMatch "heat-api-cfn" "python.+heat-api-cfn" # 10,11,-12,-13 + ProcessMatch "heat-api-cloudwatch" "python.+heat-api-cloudwatch" # 10,11,-12,-123 + ProcessMatch "heat_api_cfn" "heat_api_cfn_ws" # 12,13 + ProcessMatch "heat_api_cloudwatch" "heat_api_cloudw" # 12,-13 + ProcessMatch "heat_api_wsgi" "heat_api_wsgi" # 12,13 + ProcessMatch "heat-engine" "python.+heat-engine" # 10,11,12,13 + + # Horizon (OpenStack Installed) + ProcessMatch "horizon" "horizon" # 13 + + # Keystone (OpenStack Installed) + ProcessMatch "keystone-admin" "keystone-admin.*-DFOREGROUND" # 10,11,12,13 + ProcessMatch "keystone-main" "keystone-main.*-DFOREGROUND" # 10,11,12,13 + # Starting Pike, fernet tokens are default thus token_flush not needed + ProcessMatch "keystone-token-flush" "keystone-manage.*token_flush" # 10,11,-12,-13 + + # Neutron (OpenStack Installed) + ProcessMatch "neutron-dhcp-agent" "python.+neutron-dhcp-agent" # 10,11,12,13 + ProcessMatch "neutron-l3-agent" "python.+neutron-l3-agent" # 10,11,12,13 + ProcessMatch "neutron-metadata-agent" "python.+neutron-metadata-agent" # 10,11,12,13 + ProcessMatch "neutron-ns-metadata-proxy" "python.+neutron-ns-metadata-proxy" # 10,11 + ProcessMatch "neutron-openvswitch-agent" "python.+neutron-openvswitch-agent" # 10,11,12,13 + ProcessMatch "neutron-rootwrap-daemon" "python.+neutron-rootwrap-daemon" # 10,11,12,13 + ProcessMatch "neutron-server" "python.+neutron-server" # 10,11,12,13 + ProcessMatch "neutron-keepalived-state-change" "python.+neutron-keepalived-state-change" #For HA router + + # Nova (OpenStack Installed) + ProcessMatch "nova-api" "python.+nova-api$" # 10,11,-12,-13 + ProcessMatch "nova-api-metadata" "python.+nova-api-metadata" # 12,13 + ProcessMatch "nova_api_wsgi" "nova_api_wsgi" # 12,13 + ProcessMatch "nova-conductor" "python.+nova-conductor" # 10,11,12,13 + ProcessMatch "nova-consoleauth" "python.+nova-consoleauth" # 10,11,12,13 + ProcessMatch "nova-novncproxy" "python.+nova-novncproxy" # 10,11,12,13 + ProcessMatch "nova-scheduler" "python.+nova-scheduler" # 10,11,12,13 + ProcessMatch "placement_wsgi" "placement_wsgi.*-DFOREGROUND" # 11,12,13 + + ProcessMatch "octavia-housekeeping" "python.+octavia-housekeeping" # 13 + ProcessMatch "octavia-health-manager" "python.+octavia-health-manager" # 13 + ProcessMatch "octavia-api" "python.+octavia-api" # 13 + ProcessMatch "octavia-worker" "octavia-worker --config-file" # 13 + + # OVS (OpenStack Installed) + ProcessMatch "ovs-vswitchd" "ovs-vswitchd.+openvswitch" # 10,11,12,13 + ProcessMatch "ovsdb-client" "ovsdb-client" # 10,11,12,13 + ProcessMatch "ovsdb-server" "ovsdb-server.+openvswitch" # 10,11,12,13 + ProcessMatch "ovn-northd" "ovn-northd.+openvswitch" # 9,10 + ProcessMatch "ovn-controller" "ovn-controller.+openvswitch" # 9,10 + ProcessMatch "ovn-controller-vtep" "ovn-controller-vtep.+openvswitch" # 9,10 + + # Panko (OpenStack Installed) + ProcessMatch "panko_wsgi" "panko_wsgi.*-DFOREGROUND" # 11,12,13 + + # Swift (OpenStack Installed) + ProcessMatch "swift-account-auditor" "python.+swift-account-auditor" # 10,11,12,13 + ProcessMatch "swift-account-reaper" "python.+swift-account-reaper" # 10,11,12,13 + ProcessMatch "swift-account-replicator" "python.+swift-account-replicator" # 10,11,12,13 + ProcessMatch "swift-account-server" "python.+swift-account-server" # 10,11,12,13 + ProcessMatch "swift-container-auditor" "python.+swift-container-auditor" # 10,11,12,13 + ProcessMatch "swift-container-replicator" "python.+swift-container-replicator" # 10,11,12,13 + ProcessMatch "swift-container-server" "python.+swift-container-server" # 10,11,12,13 + ProcessMatch "swift-container-updater" "python.+swift-container-updater" # 10,11,12,13 + ProcessMatch "swift-object-auditor" "python.+swift-object-auditor" # 10,11,12,13 + ProcessMatch "swift-object-expirer" "python.+swift-object-expirer" # 11,12,13 + ProcessMatch "swift-object-replicator" "python.+swift-object-replicator" # 10,11,12,13 + ProcessMatch "swift-object-server" "python.+swift-object-server" # 10,11,12,13 + ProcessMatch "swift-object-updater" "python.+swift-object-updater" # 10,11,12,13 + ProcessMatch "swift-proxy-server" "python.+swift-proxy-server" # 10,11,12,13 + + +{%if rabbitmq_controller_collectd_plugin %} +{%if inventory_hostname == groups['controller'][0] %} + + ModulePath "/usr/local/bin/" + LogTraces true + Interactive false + Import "collectd_rabbitmq_monitoring" + + interval {{rabbitmq_controller_collectd_interval}} + host "{{inventory_hostname}}.internalapi.localdomain" + port 15672 + username {{controller0_rabbitmq_username.stdout}} + password {{controller0_rabbitmq_password.stdout}} + message_count {% for a_queue in controller_monitored_queues %}"{{a_queue}}" {% endfor %} + + + +{% else %} +# Rabbitmq plugin installed and enabled on {{groups['controller'][0]}} +{% endif %} +{% endif %} +{%if swift_stat_controller_collectd_plugin %} +{%if inventory_hostname == groups['controller'][0] %} + + + ModulePath "/usr/local/bin/" + LogTraces true + Interactive false + Import "collectd_swift_stat" + + Interval {{swift_stat_controller_collectd_interval}} + Prefix "gnocchi" + AuthURL "{{controller0_gnocchi_swift_authurl.stdout}}" + AuthVersion "{{controller0_gnocchi_swift_authversion.stdout}}" + User "{{controller0_gnocchi_swift_user.stdout}}" + Password "{{controller0_gnocchi_swift_auth_key.stdout}}" + Project "service" + + +{% else %} +# swift_stat plugin installed and enabled on {{groups['controller'][0]}} +{% endif %} +{% endif %} + + + ReportBytes true + ValuesPercentage true + + +# ping plugin +{% if ping_plugin %} +{% if groups['controller'] | length > 1 %} + +{% if inventory_hostname == groups['controller'][0] %} + Host "{{groups['controller'][1]}}" + Host "{{groups['controller'][2]}}" +{% elif inventory_hostname == groups['controller'][1] %} + Host "{{groups['controller'][0]}}" + Host "{{groups['controller'][2]}}" +{% elif inventory_hostname == groups['controller'][2] %} + Host "{{groups['controller'][0]}}" + Host "{{groups['controller'][1]}}" +{% endif %} + Interval {{ping_interval}} + +{% endif %} +{% endif %} + +# Tail plugin configuration + +{% if 'rabbitmq-server' in collectd_logs[config_type] | items2dict(key_name='instance',value_name='log_path') %} + + Instance "rabbitmq" + + Regex "ERROR REPORT" + DSType "CounterInc" + Type "counter" + Instance "error" + +{%if regex_warn %} + + Regex "WARNING REPORT" + DSType "CounterInc" + Type "counter" + Instance "warn" + +{% endif %} +{%if regex_info %} + + Regex "INFO REPORT" + DSType "CounterInc" + Type "counter" + Instance "info" + +{% endif %} + +{% endif %} + + {% for item in collectd_logs[config_type] | rejectattr('instance', 'match', 'rabbitmq-server') | rejectattr('instance', 'match', 'swift')%} + + Instance "{{ item.instance }}" + + Regex " ERROR " + DSType "CounterInc" + Type "counter" + Instance "error" + +{%if regex_warn %} + + Regex " WARNING " + DSType "CounterInc" + Type "counter" + Instance "warn" + +{% endif %} +{%if regex_info %} + + Regex " INFO " + DSType "CounterInc" + Type "counter" + Instance "info" + +{% endif %} + + {% endfor %} +{% if 'swift' in collectd_logs[config_type] | items2dict(key_name='instance',value_name='log_path') %} + + Instance "swift" + + Regex "account-server: ERROR " + DSType "CounterInc" + Type "counter" + Instance "account-server-error" + + + Regex "container-server: ERROR " + DSType "CounterInc" + Type "counter" + Instance "container-server-error" + + + Regex "object-server: ERROR " + DSType "CounterInc" + Type "counter" + Instance "object-server-error" + +{%if regex_warn %} + + Regex "account-server: WARNING " + DSType "CounterInc" + Type "counter" + Instance "account-server-warn" + + + Regex "container-server: WARNING " + DSType "CounterInc" + Type "counter" + Instance "container-server-warn" + + + Regex "object-server: WARNING " + DSType "CounterInc" + Type "counter" + Instance "object-server-warn" + +{% endif %} +{%if regex_info %} + + Regex "account-server: INFO " + DSType "CounterInc" + Type "counter" + Instance "account-server-info" + + + Regex "container-server-info + + + Regex "object-server: INFO " + DSType "CounterInc" + Type "counter" + Instance "object-server-info" + +{% endif %} + +{% endif %} + + +{% if ovsagent_controller_monitor %} + + ModulePath "/usr/local/bin/" + LogTraces true + Interactive false + Import "collectd_ovsagent" + + interval 10 + prefix ovsagent + interfaces {% for int in controller_monitored_ints %} {{int}} {% endfor %} + + namespaces {% for ns in controller_monitored_ns %} {{ns}} {% endfor %} + + + +{% endif %} + +# Include other collectd configuration files +Include "/etc/collectd.d" diff --git a/ansible/install/roles/collectd/templates/guest.collectd.conf.j2 b/ansible/install/roles/collectd/templates/guest.collectd.conf.j2 new file mode 100644 index 000000000..14a55fb24 --- /dev/null +++ b/ansible/install/roles/collectd/templates/guest.collectd.conf.j2 @@ -0,0 +1,73 @@ +# Installed by Browbeat Ansible Installer +# Config type: {{config_type}} + +# Interval default is 10s +Interval {{collectd_interval}} + +# Hostname for this machine, if not defined, use gethostname(2) system call +Hostname "{{inventory_hostname}}" + +LoadPlugin write_graphite +LoadPlugin cpu +LoadPlugin df +LoadPlugin interface +LoadPlugin irq +LoadPlugin load +LoadPlugin memory +LoadPlugin numa +LoadPlugin processes +LoadPlugin swap +LoadPlugin turbostat +LoadPlugin unixsock +LoadPlugin uptime + +LoadPlugin disk + + ChangeRoot "/hostfs" + FSType anon_inodefs + FSType bdev + FSType cgroup + FSType cpuset + FSType debugfs + FSType devpts + FSType devtmpfs + FSType ecryptfs + FSType fuse + FSType fusectl + FSType hugetlbfs + FSType mqueue + FSType nfs + FSType nfs4 + FSType nfsd + FSType pipefs + FSType proc + FSType pstore + FSType ramfs + #FSType rootfs + FSType rpc_pipefs + FSType securityfs + FSType sockfs + FSType sysfs + FSType tmpfs + FSType vboxsf + IgnoreSelected true + ValuesPercentage true + ReportInodes true + + +# Graphite Host Configuration + + + Host "{{graphite_host}}" + Port "{{collectd_write_graphite_port}}" + Prefix "{{graphite_prefix}}." + Protocol "tcp" + LogSendErrors true + StoreRates true + AlwaysAppendDS false + EscapeCharacter "_" + + + +# Include other collectd configuration files +Include "/etc/collectd.d" diff --git a/ansible/install/roles/collectd/templates/networker.collectd.conf.j2 b/ansible/install/roles/collectd/templates/networker.collectd.conf.j2 new file mode 100644 index 000000000..0e6a38bb6 --- /dev/null +++ b/ansible/install/roles/collectd/templates/networker.collectd.conf.j2 @@ -0,0 +1,295 @@ +# Installed by Browbeat Ansible Installer +# Config type: {{config_type}} + +# Interval default is 10s +Interval {{collectd_interval}} + +# Hostname for this machine, if not defined, use gethostname(2) system call +Hostname "{{inventory_hostname}}" + +# Loaded Plugins: +LoadPlugin "logfile" + + File "/var/log/collectd.log" + LogLevel "info" + PrintSeverity true + Timestamp true + + +LoadPlugin write_graphite +LoadPlugin cpu +LoadPlugin conntrack +LoadPlugin df +LoadPlugin disk +LoadPlugin exec +LoadPlugin interface +LoadPlugin irq +LoadPlugin load +LoadPlugin match_regex +LoadPlugin memory +LoadPlugin numa +LoadPlugin processes + + Globals true + +LoadPlugin swap +LoadPlugin tail +LoadPlugin turbostat +LoadPlugin unixsock +LoadPlugin uptime + +# Open unix domain socket for collectdctl + + SocketFile "/var/run/collectd-unixsock" + SocketGroup "collectd" + SocketPerms "0770" + DeleteSocket true + + +PreCacheChain "PreCache" + + + + Plugin "^interface$" + PluginInstance "^tap*" + + Target "stop" + + + + Plugin "^interface$" + PluginInstance "^q.*" + + Target "stop" + + Target "return" + + +# Graphite Host Configuration + + + Host "{{graphite_host}}" + Port "{{collectd_write_graphite_port}}" + Prefix "{{graphite_prefix}}." + Protocol "tcp" + LogSendErrors true + StoreRates true + AlwaysAppendDS false + EscapeCharacter "_" + + + + + FSType anon_inodefs + FSType bdev + FSType cgroup + FSType cpuset + FSType debugfs + FSType devpts + FSType devtmpfs + FSType ecryptfs + FSType fuse + FSType fusectl + FSType hugetlbfs + FSType mqueue + FSType nfs + FSType nfs4 + FSType nfsd + FSType overlay + FSType pipefs + FSType proc + FSType pstore + FSType ramfs + #FSType rootfs + FSType rpc_pipefs + FSType securityfs + FSType sockfs + FSType sysfs + FSType tmpfs + FSType vboxsf + MountPoint "/^/var/lib/docker/.*/" + IgnoreSelected true + ValuesPercentage true + ReportInodes true + + +{%if disk_networker_collectd_plugin %} + + Disk "/^[hsv]d[a-z]+[0-9]?$/" + Disk "/^nvm/" + IgnoreSelected false + + +{% endif %} +{%if iostat_networker_collectd_plugin %} + + ModulePath "/usr/local/bin/" + Import "collectd_iostat_python" + + + Path "/usr/bin/iostat" + Interval {{iostat_networker_collectd_interval}} + IostatInterval 2 + Count 2 + Verbose false + NiceNames false + PluginName collectd_iostat_python + + + +{% endif %} + +# (akrzos) Including the version of OpenStack that the process was verified as running after +# OpenStack Installation with a comment at the end of each Process/ProcessMatch statement. +# A Minus before the version means the process was not found in that version. (Ex -10) + + # Collectd (Browbeat Installed) + ProcessMatch "collectd" "/usr/sbin/collectd" + + # Everything Else (OpenStack Installed) + # (Processes displayed under "Everything Else" on Grafana Dashboards) + ProcessMatch "dnsmasq" "^dnsmasq.+" # 12 + ProcessMatch "haproxy" "haproxy.+-f.+/var/lib/neutron/ns-metadata-proxy.*" # 12 + Process "keepalived" # 12 + + # Neutron (OpenStack Installed) + ProcessMatch "neutron-dhcp-agent" "python.+neutron-dhcp-agent" # 12 + ProcessMatch "neutron-l3-agent" "python.+neutron-l3-agent" # 12 + ProcessMatch "neutron-metadata-agent" "python.+neutron-metadata-agent" # 12 + ProcessMatch "neutron-ns-metadata-proxy" "python.+neutron-ns-metadata-proxy" # + ProcessMatch "neutron-openvswitch-agent" "python.+neutron-openvswitch-agent" # 12 + + # OVS (OpenStack Installed) + ProcessMatch "ovs-vswitchd" "ovs-vswitchd.+openvswitch" # 10,11,12 + ProcessMatch "ovsdb-client" "ovsdb-client" # 10,11,12 + ProcessMatch "ovsdb-server" "ovsdb-server.+openvswitch" # 10,11,12 + + + + ReportBytes true + ValuesPercentage true + + +# Tail plugin configuration + + + Instance "neutron-dhcp-agent" + + Regex " ERROR " + DSType "CounterInc" + Type "counter" + Instance "error" + +{%if regex_warn %} + + Regex " WARNING " + DSType "CounterInc" + Type "counter" + Instance "warn" + +{% endif %} +{%if regex_info %} + + Regex " INFO " + DSType "CounterInc" + Type "counter" + Instance "info" + +{% endif %} + + + Instance "neutron-l3-agent" + + Regex " ERROR " + DSType "CounterInc" + Type "counter" + Instance "error" + +{%if regex_warn %} + + Regex " WARNING " + DSType "CounterInc" + Type "counter" + Instance "warn" + +{% endif %} +{%if regex_info %} + + Regex " INFO " + DSType "CounterInc" + Type "counter" + Instance "info" + +{% endif %} + + + Instance "neutron-metadata-agent" + + Regex " ERROR " + DSType "CounterInc" + Type "counter" + Instance "error" + +{%if regex_warn %} + + Regex " WARNING " + DSType "CounterInc" + Type "counter" + Instance "warn" + +{% endif %} +{%if regex_info %} + + Regex " INFO " + DSType "CounterInc" + Type "counter" + Instance "info" + +{% endif %} + + + Instance "neutron-openvswitch-agent" + + Regex " ERROR " + DSType "CounterInc" + Type "counter" + Instance "error" + +{%if regex_warn %} + + Regex " WARNING " + DSType "CounterInc" + Type "counter" + Instance "warn" + +{% endif %} +{%if regex_info %} + + Regex " INFO " + DSType "CounterInc" + Type "counter" + Instance "info" + +{% endif %} + + + +{% if ovsagent_networker_monitor %} + + ModulePath "/usr/local/bin/" + LogTraces true + Interactive false + Import "collectd_ovsagent" + + interval 10 + prefix ovsagent + interfaces {% for int in networker_monitored_ints %} {{int}} {% endfor %} + + namespaces {% for ns in networker_monitored_ns %} {{ns}} {% endfor %} + + + +{% endif %} + +# Include other collectd configuration files +Include "/etc/collectd.d" diff --git a/ansible/install/roles/collectd/templates/objectstorage.collectd.conf.j2 b/ansible/install/roles/collectd/templates/objectstorage.collectd.conf.j2 new file mode 100644 index 000000000..edb731e5f --- /dev/null +++ b/ansible/install/roles/collectd/templates/objectstorage.collectd.conf.j2 @@ -0,0 +1,245 @@ +# Installed by Browbeat Ansible Installer +# Config type: {{config_type}} + +# Interval default is 10s +Interval {{collectd_interval}} + +# Hostname for this machine, if not defined, use gethostname(2) system call +Hostname "{{inventory_hostname}}" + +# Loaded Plugins: +LoadPlugin "logfile" + + File "/var/log/collectd.log" + LogLevel "info" + PrintSeverity true + Timestamp true + + +LoadPlugin write_graphite +LoadPlugin cpu +LoadPlugin conntrack +LoadPlugin df +LoadPlugin disk +LoadPlugin exec +LoadPlugin interface +LoadPlugin irq +LoadPlugin load +LoadPlugin match_regex +LoadPlugin memory +LoadPlugin numa +LoadPlugin processes +{%if iostat_objectstorage_collectd_plugin %} + + Globals true + +{% endif %} +LoadPlugin swap +LoadPlugin tail +LoadPlugin turbostat +LoadPlugin unixsock +LoadPlugin uptime + +# Open unix domain socket for collectdctl + + SocketFile "/var/run/collectd-unixsock" + SocketGroup "collectd" + SocketPerms "0770" + DeleteSocket true + + +PreCacheChain "PreCache" + + + + Plugin "^interface$" + PluginInstance "^tap*" + + Target "stop" + + + + Plugin "^interface$" + PluginInstance "^q.*" + + Target "stop" + + Target "return" + + +# Graphite Host Configuration + + + Host "{{graphite_host}}" + Port "{{collectd_write_graphite_port}}" + Prefix "{{graphite_prefix}}." + Protocol "tcp" + LogSendErrors true + StoreRates true + AlwaysAppendDS false + EscapeCharacter "_" + + + + + FSType anon_inodefs + FSType bdev + FSType cgroup + FSType cpuset + FSType debugfs + FSType devpts + FSType devtmpfs + FSType ecryptfs + FSType fuse + FSType fusectl + FSType hugetlbfs + FSType mqueue + FSType nfs + FSType nfs4 + FSType nfsd + FSType overlay + FSType pipefs + FSType proc + FSType pstore + FSType ramfs + #FSType rootfs + FSType rpc_pipefs + FSType securityfs + FSType sockfs + FSType sysfs + FSType tmpfs + FSType vboxsf + MountPoint "/^/var/lib/docker/.*/" + IgnoreSelected true + ValuesPercentage true + ReportInodes true + + +{%if disk_objectstorage_collectd_plugin %} + + Disk "/^[hsv]d[a-z]+[0-9]?$/" + Disk "/^nvm/" + IgnoreSelected false + + +{% endif %} +{%if iostat_objectstorage_collectd_plugin %} + + ModulePath "/usr/local/bin/" + Import "collectd_iostat_python" + + + Path "/usr/bin/iostat" + Interval {{iostat_objectstorage_collectd_interval}} + IostatInterval 2 + Count 2 + Verbose false + NiceNames false + PluginName collectd_iostat_python + + + +{% endif %} +# (akrzos) Including the version of OpenStack that the process was verified as running after +# OpenStack Installation with a comment at the end of each Process/ProcessMatch statement. +# A Minus before the version means the process was not found in that version. (Ex -9) + + # Collectd (Browbeat Installed) + ProcessMatch "collectd" "/usr/sbin/collectd" + + # OVS (OpenStack Installed) + ProcessMatch "ovs-vswitchd" "ovs-vswitchd.+openvswitch" # 10,11,12,13 + ProcessMatch "ovsdb-server" "ovsdb-server.+openvswitch" # 10,11,12,13 + + # Swift (OpenStack Installed) + ProcessMatch "swift-account-auditor" "python.+swift-account-auditor" # 10,11,12,13 + ProcessMatch "swift-account-reaper" "python.+swift-account-reaper" # 10,11,12,13 + ProcessMatch "swift-account-replicator" "python.+swift-account-replicator" # 10,11,12,13 + ProcessMatch "swift-account-server" "python.+swift-account-server" # 10,11,12,13 + ProcessMatch "swift-container-auditor" "python.+swift-container-auditor" # 10,11,12,13 + ProcessMatch "swift-container-replicator" "python.+swift-container-replicator" # 10,11,12,13 + ProcessMatch "swift-container-server" "python.+swift-container-server" # 10,11,12,13 + ProcessMatch "swift-container-updater" "python.+swift-container-updater" # 10,11,12,13 + ProcessMatch "swift-object-auditor" "python.+swift-object-auditor" # 10,11,12,13 + ProcessMatch "swift-object-expirer" "python.+swift-object-expirer" # 13 + ProcessMatch "swift-object-replicator" "python.+swift-object-replicator" # 10,11,12,13 + ProcessMatch "swift-object-server" "python.+swift-object-server" # 10,11,12,13 + ProcessMatch "swift-object-updater" "python.+swift-object-updater" # 10,11,12,13 + Process "rsync" # 13 + + + + ReportBytes true + ValuesPercentage true + + +# Tail plugin configuration + + + # Swift logs all into the same file + + Instance "swift" + + Regex "account-server: ERROR " + DSType "CounterInc" + Type "counter" + Instance "account-server-error" + + + Regex "container-server: ERROR " + DSType "CounterInc" + Type "counter" + Instance "container-server-error" + + + Regex "object-server: ERROR " + DSType "CounterInc" + Type "counter" + Instance "object-server-error" + +{%if regex_warn %} + + Regex "account-server: WARNING " + DSType "CounterInc" + Type "counter" + Instance "account-server-warn" + + + Regex "container-server: WARNING " + DSType "CounterInc" + Type "counter" + Instance "container-server-warn" + + + Regex "object-server: WARNING " + DSType "CounterInc" + Type "counter" + Instance "object-server-warn" + +{% endif %} +{%if regex_info %} + + Regex "account-server: INFO " + DSType "CounterInc" + Type "counter" + Instance "account-server-info" + + + Regex "container-server: INFO " + DSType "CounterInc" + Type "counter" + Instance "container-server-info" + + + Regex "object-server: INFO " + DSType "CounterInc" + Type "counter" + Instance "object-server-info" + +{% endif %} + + + + +# Include other collectd configuration files +Include "/etc/collectd.d" diff --git a/ansible/install/roles/collectd/templates/undercloud.collectd.conf.j2 b/ansible/install/roles/collectd/templates/undercloud.collectd.conf.j2 new file mode 100644 index 000000000..2c4f2cf60 --- /dev/null +++ b/ansible/install/roles/collectd/templates/undercloud.collectd.conf.j2 @@ -0,0 +1,470 @@ +# Installed by Browbeat Ansible Installer +# Config type: {{config_type}} + +# Interval default is 10s +Interval {{collectd_interval}} + +# Hostname for this machine, if not defined, use gethostname(2) system call +Hostname "{{inventory_hostname}}" + +# Loaded Plugins: +LoadPlugin "logfile" + + File "/var/log/collectd.log" + LogLevel "info" + PrintSeverity true + Timestamp true + + +LoadPlugin write_graphite +{% if apache_undercloud_collectd_plugin %} +LoadPlugin apache +{% endif %} +LoadPlugin cpu +LoadPlugin conntrack +{% if keystone_undercloud_collectd_plugin %} +LoadPlugin dbi +{% endif %} +LoadPlugin df +LoadPlugin disk +LoadPlugin exec +LoadPlugin interface +LoadPlugin irq +LoadPlugin load +LoadPlugin match_regex +LoadPlugin memory +LoadPlugin mysql +LoadPlugin numa +LoadPlugin processes +{%if gnocchi_status_undercloud_collectd_plugin or iostat_undercloud_collectd_plugin or rabbitmq_undercloud_collectd_plugin %} + + Globals true + +{% endif %} +LoadPlugin swap +LoadPlugin tail +LoadPlugin uptime + +PreCacheChain "PreCache" + + + + Plugin "^interface$" + PluginInstance "^tap*" + + Target "stop" + + + + Plugin "^interface$" + PluginInstance "^q.*" + + Target "stop" + + {%if iostat_undercloud_collectd_plugin %} + + + Plugin "^collectd_iostat_python$" + PluginInstance "^docker.*$" + + Target "stop" + + {% endif %} + Target "return" + + +# Graphite Host Configuration + + + Host "{{graphite_host}}" + Port "{{collectd_write_graphite_port}}" + Prefix "{{graphite_prefix}}." + Protocol "tcp" + LogSendErrors true + StoreRates true + AlwaysAppendDS false + EscapeCharacter "_" + + +{% if apache_undercloud_collectd_plugin %} + + + + URL "http://127.0.0.1:{{apache_undercloud_mod_status_port}}/mod_status?auto" + + +{% endif %} +{% if keystone_undercloud_collectd_plugin %} + + + + Statement "select count(*) as count from token" + + Type gauge + InstancePrefix "token" + ValuesFrom "count" + + + + Driver "mysql" + DriverOption "host" "{{undercloud_ctlplane_ip_address.stdout}}" + DriverOption "user" "root" + DriverOption "password" "{{undercloud_mysql_password.stdout}}" + DriverOption "dbname" "keystone" + Query token_count + + +{% endif %} + + + FSType anon_inodefs + FSType bdev + FSType cgroup + FSType cpuset + FSType debugfs + FSType devpts + FSType devtmpfs + FSType ecryptfs + FSType fuse + FSType fusectl + FSType hugetlbfs + FSType mqueue + FSType nfs + FSType nfs4 + FSType nfsd + FSType overlay + FSType pipefs + FSType proc + FSType pstore + FSType ramfs + #FSType rootfs + FSType rpc_pipefs + FSType securityfs + FSType sockfs + FSType sysfs + FSType tmpfs + FSType vboxsf + MountPoint "/^/tmp/.*/" + MountPoint "/^/var/lib/docker/.*/" + IgnoreSelected true + ValuesPercentage true + ReportInodes true + + +{%if disk_undercloud_collectd_plugin %} + + Disk "/^[hsv]d[a-z]+[0-9]?$/" + Disk "/^nvm/" + IgnoreSelected false + + +{% endif %} +{%if iostat_undercloud_collectd_plugin %} + + ModulePath "/usr/local/bin/" + Import "collectd_iostat_python" + + + Path "/usr/bin/iostat" + Interval {{iostat_undercloud_collectd_interval}} + IostatInterval 2 + Count 2 + Verbose false + NiceNames false + PluginName collectd_iostat_python + + + +{% endif %} +{%if gnocchi_status_undercloud_collectd_plugin %} + + ModulePath "/usr/local/bin/" + LogTraces true + Interactive false + Import "collectd_gnocchi_status" + + interval {{gnocchi_status_undercloud_collectd_interval}} + + + +{% endif %} + + + Host "{{undercloud_ctlplane_ip_address.stdout}}" + User "root" + Password "{{undercloud_mysql_password.stdout}}" + InnodbStats true + + + +# (akrzos) Including the version of OpenStack that the process was verified as running after +# OpenStack Installation with a comment at the end of each Process/ProcessMatch statement. +# A Minus before the version means the process was not found in that version. (Ex -10) + + # Ansible + ProcessMatch "ansible-playbook" "ansible-playbook" # 12,13 + + # Aodh (OpenStack Installed) + ProcessMatch "aodh-evaluator" "aodh-evaluator" # -10,11 + ProcessMatch "aodh-listener" "aodh-listener" # -10,11 + ProcessMatch "aodh-notifier" "aodh-notifier" # -10,11 + ProcessMatch "aodh_wsgi" "aodh_wsgi.*-DFOREGROUND" # 11 + + # Ceilometer (OpenStack Installed) + ProcessMatch "ceilometer-agent-notification" "ceilometer-agent-notification" # 10,11 + ProcessMatch "ceilometer-api" "python.+ceilometer-api" # -10(httpd),-11(httpd) + ProcessMatch "ceilometer-collector" "ceilometer-collector" # 10,11 + ProcessMatch "ceilometer-polling" "ceilometer-polling" # 10,11 + ProcessMatch "ceilometer_wsgi" "ceilometer_wsgi.*-DFOREGROUND" # 11 + + # Collectd (Browbeat Installed) + ProcessMatch "collectd" "/usr/sbin/collectd" + + # Docker (OpenStack Installed) + ProcessMatch "docker-registry" "registry.+serve.+/etc/docker-distribution" # 11,12,13 + ProcessMatch "dockerd-current" "dockerd-current" # 11,12,13 + ProcessMatch "docker-containerd-current" "docker-containerd-current" # 11,12,13 + + # Everything Else (OpenStack Installed) + # (Processes displayed under "Everything Else" on Grafana Dashboards) + Process "httpd" # 10,11,12,13 + Process "iscsid" # 10,11,12,13 + Process "memcached" # 10,11,12,13 + Process "mongod" # 10,11,-12,-13 + ProcessMatch "mysqld" "/usr/libexec/mysqld" # 10,11,12,13 + ProcessMatch "rabbitmq" "/usr/lib64/erlang/erts-.+/bin/beam.smp" # 10,11,12,13 + + # Glance (OpenStack Installed) + ProcessMatch "glance-api" "python.+glance-api" # 10,11,12,13 + ProcessMatch "glance-registry" "python.+glance-registry" # 10,-11,-12,-13 + + # Gnocchi (OpenStack Installed) + ProcessMatch "gnocchi-metricd-master" "gnocchi-metricd:.*master" # 11 + ProcessMatch "gnocchi-metricd-scheduler" "gnocchi-metricd:.*scheduler" # 11 + ProcessMatch "gnocchi-metricd-processing" "gnocchi-metricd:.*processing" # 11 + ProcessMatch "gnocchi-metricd-reporting" "gnocchi-metricd:.*reporting" # 11 + ProcessMatch "gnocchi-metricd-janitor" "gnocchi-metricd:.*janitor" # 11 + ProcessMatch "gnocchi-statsd" "python.+gnocchi-statsd" # 11 + ProcessMatch "gnocchi_wsgi" "gnocchi_wsgi.*-DFOREGROUND" # 11 + + # Heat (OpenStack Installed) + ProcessMatch "heat-api" "python.+heat-api" # 10,11,-12,-13 + ProcessMatch "heat-api-cfn" "python.+heat-api-cfn" # 10,11,-12,-13 + ProcessMatch "heat-engine" "python.+heat-engine" # 10,11,12,13 + ProcessMatch "heat_api_wsgi" "heat_api_wsgi.*-DFOREGROUND" # 12,13 + ProcessMatch "heat_api_cfn_ws" "heat_api_cfn_ws.*-DFOREGROUND" # 12,13 + + # Ironic (OpenStack Installed) + ProcessMatch "ironic-api" "python.+ironic-api" # 10,11,-12,-13 + ProcessMatch "ironic-conductor" "python.+ironic-conductor" # 10,11,12,13 + ProcessMatch "ironic-inspector" "python.+ironic-inspector" # 10,11,12,13 + ProcessMatch "dnsmasq-ironic" "dnsmasq.+/etc/dnsmasq-ironic.conf" # 10,11,12,13 + ProcessMatch "dnsmasq-ironicinspector" "/sbin/dnsmasq.+/etc/ironic-inspector/" # 10,11,12,13 + ProcessMatch "ironic_wsgi" "ironic_wsgi.*-DFOREGROUND" # 12,13 + + # Keystone (OpenStack Installed) + ProcessMatch "keystone-admin" "keystone-admin.*-DFOREGROUND" # 10,11,12,13 + ProcessMatch "keystone-main" "keystone-main.*-DFOREGROUND" # 10,11,12,13 + # Starting Ocata, fernet tokens are default thus token_flush not needed + ProcessMatch "keystone-token-flush" "keystone-manage.*token_flush" # 10,11,-12,-13 + + # Mistral (Openstack Installed) + ProcessMatch "mistral-server-api" "python.+mistral-server.+api" # 10,11,12,13 + ProcessMatch "mistral-server-engine" "python.+mistral-server.+engine" # 10,11,12,13 + ProcessMatch "mistral-server-executor" "python.+mistral-server.+executor" # 10,11,12,13 + + # Neutron (OpenStack Installed) + ProcessMatch "neutron-dhcp-agent" "python.+neutron-dhcp-agent" # 10,11,12,13 + ProcessMatch "neutron-openvswitch-agent" "python.+neutron-openvswitch-agent" # 10,11,12,13 + ProcessMatch "neutron-rootwrap-daemon" "python.+neutron-rootwrap-daemon" # 10,11,12,13 + ProcessMatch "neutron-server" "python.+neutron-server" # 10,11,12,13 + ProcessMatch "neutron-rootwrap-daemon" "python.+neutron-rootwrap-daemon" # 10,11,12,13 + ProcessMatch "neutron-l3-agent" "python.+neutron-l3-agent" # 13 + + # Nova (OpenStack Installed) + ProcessMatch "nova-api" "python.+nova-api" # 10,11,12,13 + ProcessMatch "nova-cert" "python.+nova-cert" # 10,11,-12,-13 + ProcessMatch "nova-compute" "python.+nova-compute" # 10,11,12,13 + ProcessMatch "nova-conductor" "python.+nova-conductor" # 10,11,12,13 + ProcessMatch "nova-scheduler" "python.+nova-scheduler" # 10,11,12,13 + ProcessMatch "placement_wsgi" "placement_wsgi.*-DFOREGROUND" # 11,12,13 + ProcessMatch "nova_api_wsgi" "nova_api_wsgi.*-DFOREGROUND" # 13 + ProcessMatch "novajoin-server" "python.+novajoin-server" # TLS + + + # OVS (OpenStack Installed) + ProcessMatch "ovs-vswitchd" "ovs-vswitchd.+openvswitch" # 10,11,12,13 + ProcessMatch "ovsdb-client" "ovsdb-client" # 10,11,12,13 + ProcessMatch "ovsdb-server" "ovsdb-server.+openvswitch" # 10,11,12,13 + + # Panko (OpenStack Installed) + ProcessMatch "panko_wsgi" "panko_wsgi.*-DFOREGROUND" # 11 + + # Swift (OpenStack Installed) + ProcessMatch "swift-account-auditor" "python.+swift-account-auditor" # 10,-11,-12,-13 + ProcessMatch "swift-account-reaper" "python.+swift-account-reaper" # 10,11,12,13 + ProcessMatch "swift-account-replicator" "python.+swift-account-replicator" # 10,-11,-12,-13 + ProcessMatch "swift-account-server" "python.+swift-account-server" # 10,11,12,13 + ProcessMatch "swift-container-auditor" "python.+swift-container-auditor" # 10,-11,-12,-13 + ProcessMatch "swift-container-replicator" "python.+swift-container-replicator" # 10,-11,-12,-13 + ProcessMatch "swift-container-server" "python.+swift-container-server" # 10,11,12,13 + ProcessMatch "swift-container-sync" "python.+swift-container-sync" # 12,13 + ProcessMatch "swift-container-updater" "python.+swift-container-updater" # 10,11,12,13 + ProcessMatch "swift-object-auditor" "python.+swift-object-auditor" # 10,-11,-12,-13 + ProcessMatch "swift-object-expirer" "python.+swift-object-expirer" # 13 + ProcessMatch "swift-object-reconstructor" "python.+swift-object-reconstructor" # 12,13 + ProcessMatch "swift-object-replicator" "python.+swift-object-replicator" # 10,-11,-12,-13 + ProcessMatch "swift-object-server" "python.+swift-object-server" # 10,11,12,13 + ProcessMatch "swift-object-updater" "python.+swift-object-updater" # 10,11,12,13 + ProcessMatch "swift-proxy-server" "python.+swift-proxy-server" # 10,11,12,13 + + # Zaqar (Openstack Installed) + ProcessMatch "zaqar-server" "python.+zaqar-server" # 10,11,12,13 + ProcessMatch "zaqar_wsgi" "zaqar_wsgi.*-DFOREGROUND" # 12,13 + + +{%if rabbitmq_undercloud_collectd_plugin %} + + ModulePath "/usr/local/bin/" + LogTraces true + Interactive false + Import "collectd_rabbitmq_monitoring" + + interval {{rabbitmq_undercloud_collectd_interval}} + host "{{undercloud_ctlplane_ip_address.stdout}}" + port 15672 + username {{undercloud_rabbitmq_username.stdout}} + password {{undercloud_rabbitmq_password.stdout}} + message_count {% for a_queue in undercloud_monitored_queues %}"{{a_queue}}" {% endfor %} + + + + +{% endif %} + + ReportBytes true + ValuesPercentage true + + +# Tail plugin configuration + + {% for item in collectd_logs[config_type] %} + + Instance "{{ item.instance }}" + + Regex " ERROR " + DSType "CounterInc" + Type "counter" + Instance "error" + +{%if regex_warn %} + + Regex " WARNING " + DSType "CounterInc" + Type "counter" + Instance "warn" + +{% endif %} +{%if regex_info %} + + Regex " INFO " + DSType "CounterInc" + Type "counter" + Instance "info" + +{% endif %} + + {% endfor %} + {% for item in collectd_logs[config_type] | rejectattr('instance', 'match', 'rabbitmq-server') | rejectattr('instance', 'match', 'swift')%} + + Instance "{{ item.instance }}" + + Regex " ERROR " + DSType "CounterInc" + Type "counter" + Instance "error" + +{%if regex_warn %} + + Regex " WARNING " + DSType "CounterInc" + Type "counter" + Instance "warn" + +{% endif %} +{%if regex_info %} + + Regex " INFO " + DSType "CounterInc" + Type "counter" + Instance "info" + +{% endif %} + + {% endfor %} +{% if 'swift' in collectd_logs[config_type] | items2dict(key_name='instance',value_name='log_path') %} + + Instance "swift" + + Regex "account-server: ERROR " + DSType "CounterInc" + Type "counter" + Instance "account-server-error" + + + Regex "container-server: ERROR " + DSType "CounterInc" + Type "counter" + Instance "container-server-error" + + + Regex "object-server: ERROR " + DSType "CounterInc" + Type "counter" + Instance "object-server-error" + +{%if regex_warn %} + + Regex "account-server: WARNING " + DSType "CounterInc" + Type "counter" + Instance "account-server-warn" + + + Regex "container-server: WARNING " + DSType "CounterInc" + Type "counter" + Instance "container-server-warn" + + + Regex "object-server: WARNING " + DSType "CounterInc" + Type "counter" + Instance "object-server-warn" + +{% endif %} +{%if regex_info %} + + Regex "account-server: INFO " + DSType "CounterInc" + Type "counter" + Instance "account-server-info" + + + Regex "container-server-info + + + Regex "object-server: INFO " + DSType "CounterInc" + Type "counter" + Instance "object-server-info" + +{% endif %} + +{% endif %} + + + + +# Include other collectd configuration files +Include "/etc/collectd.d" diff --git a/ansible/install/roles/collectd/vars/13.yml b/ansible/install/roles/collectd/vars/13.yml new file mode 100644 index 000000000..37d26ac67 --- /dev/null +++ b/ansible/install/roles/collectd/vars/13.yml @@ -0,0 +1,171 @@ +--- +collectd_logs: + undercloud: + - log_path: /var/log/nova/nova-api.log + instance: nova-api + - log_path: /var/log/nova/nova-compute.log + instance: nova-compute + - log_path: /var/log/nova/nova-conductor.log + instance: nova-conductor + - log_path: /var/log/nova/nova-scheduler.log + instance: nova-scheduler + - log_path: /var/log/glance/api.log + instance: glance-api + - log_path: /var/log/heat/heat-engine.log + instance: heat-engine + - log_path: /var/log/heat/heat_api.log + instance: heat-api + - log_path: /var/log/ironic/ironic-conductor.log + instance: ironic-conductor + - log_path: /var/log/ironic-inspector/ironic-inspector.log + instance: ironic-inspector + - log_path: /var/log/keystone/keystone.log + instance: keystone + - log_path: /var/log/mistral/api.log + instance: mistral-api + - log_path: /var/log/mistral/engine.log + instance: mistral-engine + - log_path: /var/log/mistral/event-engine.log + instance: mistral-event-engine + - log_path: /var/log/mistral/executor.log + instance: mistral-executor + - log_path: /var/log/mariadb/mariadb.log + instance: mysql + - log_path: /var/log/containers/neutron/dhcp-agent.log + instance: neutron-dhcp-agent + - log_path: /var/log/neutron/ironic-neutron-agent.log + instance: neutron-ironic-agent + - log_path: /var/log/neutron/l3-agent.log + instance: neutron-l3-agent + - log_path: /var/log/neutron/openvswitch-agent.log + instance: neutron-openvswitch-agent + - log_path: /var/log//neutron/server.log + instance: neutron-server + - log_path: /var/log/nova/nova-placement-api.log + instance: nova-plancement + - log_path: /var/log/zaqar/zaqar-server.log + instance: zaqar-server + - log_path: /var/log/httpd/heat_api_wsgi_error.log + instance: heat-api-wsgi + - log_path: /var/log/httpd/ironic_wsgi_error.log + instance: ironic-api-wsgi + - log_path: /var/log/httpd/ipxe_vhost_error.log + instance: ironic-pxe-wsgi + - log_path: /var/log/httpd/keystone_wsgi_error.log + instance: keystone-wsgi + - log_path: /var/log/httpd/nova_api_wsgi_error.log + instance: nova-api-wsgi + - log_path: /var/log/httpd/placement_wsgi_error.log + instance: nova-placement-wsgi + - log_path: /var/log/httpd/zaqar_wsgi_error.log + instance: zaqar-api-wsgi + controller: + - log_path: /var/log/containers/aodh/aodh-evaluator.log + instance: aodh-evaluator + - log_path: /var/log/containers/aodh/aodh-listener.log + instance: aodh-listener + - log_path: /var/log/containers/aodh/aodh-notifier.log + instance: aodh-notifier + - log_path: /var/log/containers/ceilometer/agent-notification.log + instance: ceilometer-agent-notification + - log_path: /var/log/containers/ceilometer/central.log + instance: ceilometer-central + - log_path: /var/log/containers/cinder/cinder-api.log + instance: cinder-api + - log_path: /var/log/containers/cinder/cinder-scheduler.log + instance: cinder-scheduler + - log_path: /var/log/containers/cinder/cinder-volume.log + instance: cinder-volume + - log_path: /var/log/containers/glance/api.log + instance: glance-api + - log_path: /var/log/containers/gnocchi/gnocchi-metricd.log + instance: gnocchi-metricd + - log_path: /var/log/containers/gnocchi/gnocchi-statsd.log + instance: gnocchi-statsd + - log_path: /var/log/containers/heat/heat_api_cfn.log + instance: heat-api-cfn + - log_path: /var/log/containers/heat/heat_api.log + instance: heat-api + - log_path: /var/log/containers/heat/heat-engine.log + instance: heat-engine + - log_path: /var/log/containers/keystone/keystone.log + instance: keystone + - log_path: /var/log/containers/mysql/mariadb.log + instance: mysql + - log_path: /var/log/containers/neutron/server.log + instance: neutron-server + - log_path: /var/log/containers/neutron/l3-agent.log + instance: neutron-l3-agent + - log_path: /var/log/containers/neutron/dhcp-agent.log + instance: neutron-dhcp-agent + - log_path: /var/log/containers/neutron/metadata-agent.log + instance: neutron-metadata-agent + - log_path: /var/log/containers/neutron/openvswitch-agent.log + instance: neutron-openvswitch-agent + - log_path: /var/log/containers/nova/nova-api.log + instance: nova-api + - log_path: /var/log/containers/nova/nova-conductor.log + instance: nova-conductor + - log_path: /var/log/containers/nova/nova-scheduler.log + instance: nova-scheduler + - log_path: /var/log/containers/nova/nova-metadata-api.log + instance: nova-metadata-api + - log_path: /var/log/containers/nova/nova-novncproxy.log + instance: nova-novncproxy + - log_path: /var/log/containers/openvswitch/ovn-controller.log + instance: ovn-controller + - log_path: /var/log/containers/openvswitch/ovn-northd.log + instance: ovn-northd + - log_path: /var/log/containers/openvswitch/ovsdb-server-nb.log + instance: ovsdb-server-nb + - log_path: /var/log/containers/openvswitch/ovsdb-server-sb.log + instance: ovsdb-server-sb + - log_path: /var/log/containers/nova/nova-placement-api.log + instance: nova-placement + - log_path: /var/log/containers/rabbitmq/rabbit@{{ inventory_hostname }}.log + instance: rabbitmq-server + - log_path: /var/log/containers/redis/redis.log + instance: redis-server + - log_path: /var/log/containers/httpd/heat-api/heat_api_wsgi_error.log + instance: heat-api-wsgi + - log_path: /var/log/containers/httpd/ironic-api/ironic_wsgi_error.log + instance: ironic-api-wsgi + - log_path: /var/log/containers/httpd/ironic-pxe/ipxe_vhost_error.log + instance: ironic-pxe-wsgi + - log_path: /var/log/containers/httpd/keystone/keystone_wsgi_error.log + instance: keystone-wsgi + - log_path: /var/log/containers/httpd/nova-api/nova_api_wsgi_error.log + instance: nova-api-wsgi + - log_path: /var/log/containers/httpd/nova-placement/placement_wsgi_access.log + instance: nova-placement-wsgi + - log_path: /var/log/containers/httpd/zaqar/zaqar_wsgi_error.log + instance: zaqar-api-wsgi + - log_path: /var/log/containers/httpd/aodh-api/aodh_wsgi_error.log + instance: aodh-api-wsgi + - log_path: /var/log/containers/httpd/cinder-api/cinder_wsgi_error.log + instance: cinder-api-wsgi + - log_path: /var/log/containers/httpd/gnocchi-api/gnocchi_wsgi_error.log + instance: gnocchi-api-wsgi + - log_path: /var/log/containers/httpd/heat-api-cfn/heat_api_cfn_wsgi_error.log + instance: heat-api-cfn-wsgi + - log_path: /var/log/containers/httpd/nova-metadata/nova_metadata_wsgi_error.log + instance: nova-metadata-wsgi + - log_path: /var/log/containers/httpd/octavia-api/octavia_wsgi_error.log + instance: octavia-api + - log_path: /var/log/containers/swift/swift.log + instance: swift + compute: + - log_path: /var/log/containers/libvirt/libvirtd.log + instance: ilbvirtd + - log_path: /var/log/containers/neutron/l3-agent.log + instance: neutron-l3-agent + - log_path: /var/log/containers/neutron/metadata-agent.log + instance: neutron-metadata-agent + - log_path: /var/log/containers/neutron/openvswitch-agent.log + instance: neutron-openvswitch-agent + - log_path: /var/log/containers/neutron/ovn-metadata-agent.log + instance: ovn-metadata-agent + - log_path: /var/log/containers/nova/nova-compute.log + instance: nova-compute + - log_path: /var/log/containers/openvswitch/ovn-controller.log + instance: ovn-controller diff --git a/ansible/install/roles/collectd/vars/16.yml b/ansible/install/roles/collectd/vars/16.yml new file mode 100644 index 000000000..6249f408d --- /dev/null +++ b/ansible/install/roles/collectd/vars/16.yml @@ -0,0 +1,163 @@ +--- +collectd_logs: + undercloud: + - log_path: /var/log/containers/nova/nova-api.log + instance: nova-api + - log_path: /var/log/containers/nova/nova-compute.log + instance: nova-compute + - log_path: /var/log/containers/nova/nova-conductor.log + instance: nova-conductor + - log_path: /var/log/containers/nova/nova-scheduler.log + instance: nova-scheduler + - log_path: /var/log/containers/glance/api.log + instance: glance-api + - log_path: /var/log/containers/heat/heat-engine.log + instance: heat-engine + - log_path: /var/log/containers/heat/heat_api.log + instance: heat-api + - log_path: /var/log/containers/ironic/ironic-conductor.log + instance: ironic-conductor + - log_path: /var/log/containers/ironic-inspector/ironic-inspector.log + instance: ironic-inspector + - log_path: /var/log/containers/keystone/keystone.log + instance: keystone + - log_path: /var/log/containers/mistral/api.log + instance: mistral-api + - log_path: /var/log/containers/mistral/engine.log + instance: mistral-engine + - log_path: /var/log/containers/mistral/event-engine.log + instance: mistral-event-engine + - log_path: /var/log/containers/mistral/executor.log + instance: mistral-executor + - log_path: /var/log/containers/mysql/mariadb.log + instance: mysql + - log_path: /var/log/containers/neutron/dhcp-agent.log + instance: neutron-dhcp-agent + - log_path: /var/log/containers/neutron/ironic-neutron-agent.log + instance: neutron-ironic-agent + - log_path: /var/log/containers/neutron/l3-agent.log + instance: neutron-l3-agent + - log_path: /var/log/containers/neutron/openvswitch-agent.log + instance: neutron-openvswitch-agent + - log_path: /var/log/containers/neutron/server.log + instance: neutron-server + - log_path: /var/log/containers/placement/placement.log + instance: nova-plancement + - log_path: /var/log/containers/zaqar/zaqar-server.log + instance: zaqar-server + - log_path: /var/log/containers/httpd/heat-api/heat_api_wsgi_error.log + instance: heat-api-wsgi + - log_path: /var/log/containers/httpd/ironic-api/ironic_wsgi_error.log + instance: ironic-api-wsgi + - log_path: /var/log/containers/httpd/ironic-pxe/ipxe_vhost_error.log + instance: ironic-pxe-wsgi + - log_path: /var/log/containers/httpd/keystone/keystone_wsgi_error.log + instance: keystone-wsgi + - log_path: /var/log/containers/httpd/nova-api/nova_api_wsgi_error.log + instance: nova-api-wsgi + - log_path: /var/log/containers/httpd/placement/placement_wsgi_error.log + instance: nova-placement-wsgi + - log_path: /var/log/containers/httpd/zaqar/zaqar_wsgi_error.log + instance: zaqar-api-wsgi + controller: + - log_path: /var/log/containers/aodh/aodh-evaluator.log + instance: aodh-evaluator + - log_path: /var/log/containers/aodh/aodh-listener.log + instance: aodh-listener + - log_path: /var/log/containers/aodh/aodh-notifier.log + instance: aodh-notifier + - log_path: /var/log/containers/ceilometer/agent-notification.log + instance: ceilometer-agent-notification + - log_path: /var/log/containers/ceilometer/central.log + instance: ceilometer-central + - log_path: /var/log/containers/cinder/cinder-api.log + instance: cinder-api + - log_path: /var/log/containers/cinder/cinder-scheduler.log + instance: cinder-scheduler + - log_path: /var/log/containers/cinder/cinder-volume.log + instance: cinder-volume + - log_path: /var/log/containers/glance/api.log + instance: glance-api + - log_path: /var/log/containers/gnocchi/gnocchi-metricd.log + instance: gnocchi-metricd + - log_path: /var/log/containers/gnocchi/gnocchi-statsd.log + instance: gnocchi-statsd + - log_path: /var/log/containers/heat/heat_api_cfn.log + instance: heat-api-cfn + - log_path: /var/log/containers/heat/heat_api.log + instance: heat-api + - log_path: /var/log/containers/heat/heat-engine.log + instance: heat-engine + - log_path: /var/log/containers/keystone/keystone.log + instance: keystone + - log_path: /var/log/containers/mysql/mariadb.log + instance: mysql + - log_path: /var/log/containers/neutron/server.log + instance: neutron-server + - log_path: /var/log/containers/neutron/l3-agent.log + instance: neutron-l3-agent + - log_path: /var/log/containers/neutron/dhcp-agent.log + instance: neutron-dhcp-agent + - log_path: /var/log/containers/neutron/metadata-agent.log + instance: neutron-metadata-agent + - log_path: /var/log/containers/neutron/openvswitch-agent.log + instance: neutron-openvswitch-agent + - log_path: /var/log/containers/nova/nova-api.log + instance: nova-api + - log_path: /var/log/containers/nova/nova-conductor.log + instance: nova-conductor + - log_path: /var/log/containers/nova/nova-scheduler.log + instance: nova-scheduler + - log_path: /var/log/containers/nova/nova-metadata-api.log + instance: nova-metadata-api + - log_path: /var/log/containers/nova/nova-novncproxy.log + instance: nova-novncproxy + - log_path: /var/log/containers/openvswitch/ovn-controller.log + instance: ovn-controller + - log_path: /var/log/containers/openvswitch/ovn-northd.log + instance: ovn-northd + - log_path: /var/log/containers/openvswitch/ovsdb-server-nb.log + instance: ovsdb-server-nb + - log_path: /var/log/containers/openvswitch/ovsdb-server-sb.log + instance: ovsdb-server-sb + - log_path: /var/log/containers/placement/placement.log + instance: nova-placement + - log_path: /var/log/containers/rabbitmq/rabbit@{{ inventory_hostname }}.log + instance: rabbitmq-server + - log_path: /var/log/containers/redis/redis.log + instance: redis-server + - log_path: /var/log/containers/httpd/heat-api/heat_api_wsgi_error.log + instance: heat-api-wsgi + - log_path: /var/log/containers/httpd/ironic-api/ironic_wsgi_error.log + instance: ironic-api-wsgi + - log_path: /var/log/containers/httpd/ironic-pxe/ipxe_vhost_error.log + instance: ironic-pxe-wsgi + - log_path: /var/log/containers/httpd/keystone/keystone_wsgi_error.log + instance: keystone-wsgi + - log_path: /var/log/containers/httpd/nova-api/nova_api_wsgi_error.log + instance: nova-api-wsgi + - log_path: /var/log/containers/httpd/placement/placement_wsgi_error.log + instance: nova-placement-wsgi + - log_path: /var/log/containers/httpd/zaqar/zaqar_wsgi_error.log + instance: zaqar-api-wsgi + - log_path: /var/log/containers/httpd/aodh-api/aodh_wsgi_error.log + instance: aodh-api-wsgi + - log_path: /var/log/containers/httpd/cinder-api/cinder_wsgi_error.log + instance: cinder-api-wsgi + - log_path: /var/log/containers/httpd/gnocchi-api/gnocchi_wsgi_error.log + instance: gnocchi-api-wsgi + - log_path: /var/log/containers/httpd/heat-api-cfn/heat_api_cfn_wsgi_error.log + instance: heat-api-cfn-wsgi + - log_path: /var/log/containers/httpd/nova-metadata/nova_metadata_wsgi_error.log + instance: nova-metadata-wsgi + - log_path: /var/log/containers/swift/swift.log + instance: swift + compute: + - log_path: /var/log/containers/libvirt/libvirtd.log + instance: ilbvirtd + - log_path: /var/log/containers/neutron/ovn-metadata-agent.log + instance: ovn-metadata-agent + - log_path: /var/log/containers/nova/nova-compute.log + instance: nova-compute + - log_path: /var/log/containers/openvswitch/ovn-controller.log + instance: ovn-controller diff --git a/ansible/install/roles/epel/defaults/main.yml b/ansible/install/roles/epel/defaults/main.yml index e0a40271c..7efb508c8 100644 --- a/ansible/install/roles/epel/defaults/main.yml +++ b/ansible/install/roles/epel/defaults/main.yml @@ -1,3 +1,3 @@ -# epel7 rpm for collectd packages -epel7_rpm: https://download.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm -epel7_rpmkey: https://download.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-7 +# epel rpm for collectd packages +epel_rpm: https://download.fedoraproject.org/pub/epel/epel-release-latest-{{ ansible_distrubution_major_version }}.noarch.rpm +epel_rpmkey: https://download.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-{{ ansible_distribution_major_version }} diff --git a/ansible/install/roles/epel/tasks/main.yml b/ansible/install/roles/epel/tasks/main.yml index 5a182eed6..c1df83512 100644 --- a/ansible/install/roles/epel/tasks/main.yml +++ b/ansible/install/roles/epel/tasks/main.yml @@ -14,7 +14,7 @@ - name: Import EPEL GPG Key rpm_key: state: present - key: "{{ epel7_rpmkey }}" + key: "{{ epel_rpmkey }}" become: true register: import_result until: import_result is success @@ -24,7 +24,7 @@ # Same as above but with the Centos CDN - name: Check for EPEL repo package: - name: "{{ epel7_rpm }}" + name: "{{ epel_rpm }}" state: present become: true register: install_result diff --git a/ansible/install/roles/grafana-dashboards/templates/openstack_general_system_performance.yaml.j2 b/ansible/install/roles/grafana-dashboards/templates/openstack_general_system_performance.yaml.j2 index 51fa9762d..1e6e87df7 100644 --- a/ansible/install/roles/grafana-dashboards/templates/openstack_general_system_performance.yaml.j2 +++ b/ansible/install/roles/grafana-dashboards/templates/openstack_general_system_performance.yaml.j2 @@ -150,10 +150,6 @@ dashboard: {% include 'partials/ironic_metrics.yaml' %} {% endif %} -{% if item.template_node_type in odl_groups %} - {% include 'partials/opendaylight_metrics.yaml' %} -{% endif %} - {% if item.template_node_type in ovsagent_groups %} {% include 'partials/neutron_resources.yaml' %} {% endif %} diff --git a/ansible/install/roles/grafana-dashboards/templates/partials/opendaylight_metrics.yaml b/ansible/install/roles/grafana-dashboards/templates/partials/opendaylight_metrics.yaml deleted file mode 100644 index 16bcdcdcc..000000000 --- a/ansible/install/roles/grafana-dashboards/templates/partials/opendaylight_metrics.yaml +++ /dev/null @@ -1,41 +0,0 @@ - - title: OpenDaylight - collapse: true - height: 200px - showTitle: true - panels: - - title: ODL Java Heap Memory - type: graph - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - rightSide: true - show: true - total: false - values: true - nullPointMode: 'null' - targets: - - target: aliasByNode($Cloud.$Node.GenericJMX-memory-heap.*, 3) - yaxes: - - format: bits - - format: short - - title: ODL Java Non-Heap Memory - type: graph - legend: - alignAsTable: true - avg: true - current: true - max: true - min: true - rightSide: true - show: true - total: false - values: true - nullPointMode: 'null' - targets: - - target: aliasByNode($Cloud.$Node.GenericJMX-memory-nonheap.*, 3) - yaxes: - - format: bits - - format: short diff --git a/ansible/install/roles/osp_version/tasks/main.yml b/ansible/install/roles/osp_version/tasks/main.yml index 6e13dc4c7..287875750 100644 --- a/ansible/install/roles/osp_version/tasks/main.yml +++ b/ansible/install/roles/osp_version/tasks/main.yml @@ -23,3 +23,15 @@ osp_version: content: "{{'Pike' | b64encode}}" when: not rhosp_release_stat.stat.exists + +- name: set numeric version for release + set_fact: + version: "{{ osp_version.content | b64decode }}" + +- name: set rhosp version (downstream) + set_fact: + rhosp_version: "{{ version.split()[5] }}" + +- name: set rhosp major version (downstream) + set_fact: + rhosp_major: "{{ osp_version.content | b64decode | regex_replace('^Red Hat OpenStack Platform release ([0-9]+)\\.\\d+.*\n', '\\1') }}" diff --git a/ansible/oooq/roles/collectd-undercloud/tasks/main.yml b/ansible/oooq/roles/collectd-undercloud/tasks/main.yml index 35f217ec9..d77ea8fdd 100644 --- a/ansible/oooq/roles/collectd-undercloud/tasks/main.yml +++ b/ansible/oooq/roles/collectd-undercloud/tasks/main.yml @@ -14,7 +14,7 @@ ansible-playbook -i hosts -c local \ --extra-vars graphite_host={{ graphite_host_template }} \ --extra-vars graphite_prefix={{ graphite_prefix_template }} \ - install/collectd-openstack.yml \ + install/collectd.yml \ > {{ ansible_env.HOME }}/browbeat/results/collecd_install.log" register: collectd_install until: collectd_install.rc == 0 diff --git a/ansible/oooq/roles/collectd/tasks/main.yml b/ansible/oooq/roles/collectd/tasks/main.yml index ac71518f8..a860a2bdb 100644 --- a/ansible/oooq/roles/collectd/tasks/main.yml +++ b/ansible/oooq/roles/collectd/tasks/main.yml @@ -9,7 +9,7 @@ --extra-vars graphite_host={{ graphite_host_template }} \ --extra-vars graphite_prefix={{ graphite_prefix_template }} \ --extra-vars dns_server={{ dns_server }} \ - install/collectd-openstack.yml \ + install/collectd.yml \ > {{ ansible_env.HOME }}/browbeat/results/collecd_install.log" register: collectd_install until: collectd_install.rc == 0 diff --git a/browbeat-containers/collectd-baremetal/Dockerfile b/browbeat-containers/collectd-baremetal/Dockerfile new file mode 100644 index 000000000..efd25ff08 --- /dev/null +++ b/browbeat-containers/collectd-baremetal/Dockerfile @@ -0,0 +1,13 @@ +FROM centos:7 + +FROM centos:7 + +RUN yum update -y && \ + yum clean all && \ + yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm && \ + yum install -y centos-release-opstools && \ + yum install -y collectd collectd-turbostat collectd-disk + +ADD config/collectd.conf /etc/collectd.conf + +CMD ["collectd", "-f"] diff --git a/browbeat-containers/collectd-guest/Dockerfile b/browbeat-containers/collectd-guest/Dockerfile new file mode 100644 index 000000000..1deca0ac7 --- /dev/null +++ b/browbeat-containers/collectd-guest/Dockerfile @@ -0,0 +1,13 @@ +FROM centos:7 + +FROM centos:7 + +RUN yum update -y && \ + yum clean all && \ + yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm && \ + yum install -y centos-release-opstools && \ + yum install -y collectd + +ADD config/collectd.conf /etc/collectd.conf + +CMD ["collectd", "-f"] diff --git a/browbeat-containers/collectd-openstack/Dockerfile b/browbeat-containers/collectd-openstack/Dockerfile new file mode 100644 index 000000000..06a8ca94e --- /dev/null +++ b/browbeat-containers/collectd-openstack/Dockerfile @@ -0,0 +1,22 @@ +FROM centos:7 + +RUN yum update -y && \ + yum clean all && \ + yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm && \ + yum install -y centos-release-opstools && \ + yum install -y collectd collectd-turbostat collectd-disk collectd-apache collectd-ceph \ + collectd-mysql collectd-python collectd-ping && \ + yum install -y sysstat && \ + yum install -y python2-pip python2-devel && \ + pip install pyrabbit && \ + yum install -y libdbi-dbd-mysql collectd-dbi + + +ADD files/collectd_ceph_storage.py /usr/local/bin/collectd_ceph_storage.py +ADD files/collectd_gnocchi_status.py /usr/local/bin/collectd_gnocchi_status.py +ADD files/collectd_rabbitmq_monitoring.py /usr/local/bin/collectd_rabbitmq_monitoring.py +ADD files/collectd_swift_stat.py /usr/local/bin/collectd_swift_stat.py + +ADD config/collectd.conf /etc/collectd.conf + +CMD ["collectd", "-f"] diff --git a/browbeat-containers/collectd-openstack/files/collectd_ceph_storage.py b/browbeat-containers/collectd-openstack/files/collectd_ceph_storage.py new file mode 100644 index 000000000..5cba7ec2a --- /dev/null +++ b/browbeat-containers/collectd-openstack/files/collectd_ceph_storage.py @@ -0,0 +1,334 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Collectd python plugin to read ceph storage stats from ceph command line for +an OpenStack Cloud. +""" + +import collectd +import json +import os +import subprocess +import time +import traceback + + +class CollectdCephStorage(object): + def __init__(self): + self.ceph_cluster = None + self.ceph_rados_bench = False + self.ceph_rados_bench_interval = 60 + self.ceph_mon_stats = False + self.ceph_mon_stats_interval = 10 + self.ceph_osd_stats = False + self.ceph_osd_stats_interval = 10 + self.ceph_pg_stats = False + self.ceph_pg_stats_interval = 10 + self.ceph_pool_stats = False + self.ceph_pool_stats_interval = 10 + + def configure_callback(self, config): + for node in config.children: + val = str(node.values[0]) + if node.key == 'CephRadosBench': + self.ceph_rados_bench = val in ['True', 'true'] + elif node.key == 'CephMONStats': + self.ceph_mon_stats = val in ['True', 'true'] + elif node.key == 'CephOSDStats': + self.ceph_osd_stats = val in ['True', 'true'] + elif node.key == 'CephPGStats': + self.ceph_pg_stats = val in ['True', 'true'] + elif node.key == 'CephPoolStats': + self.ceph_pool_stats = val in ['True', 'true'] + elif node.key == 'CephCluster': + self.ceph_cluster = val + elif node.key == 'CephRadosBenchInterval': + self.ceph_rados_bench_interval = int(float(val)) + elif node.key == 'CephMONStatsInterval': + self.ceph_mon_stats_interval = int(float(val)) + elif node.key == 'CephOSDStatsInterval': + self.ceph_osd_stats_interval = int(float(val)) + elif node.key == 'CephPGStatsInterval': + self.ceph_pg_stats_interval = int(float(val)) + elif node.key == 'CephPoolStatsInterval': + self.ceph_pool_stats_interval = int(float(val)) + else: + collectd.warning( + 'collectd-ceph-storage: Unknown config key: {}' + .format(node.key)) + + if not self.ceph_cluster: + collectd.warning('collectd-ceph-storage: CephCluster Undefined') + + if self.ceph_rados_bench: + collectd.info('Registered Ceph Rados Bench') + collectd.register_read( + self.read_ceph_rados_bench, + self.ceph_rados_bench_interval, name='ceph-rados-bench') + if self.ceph_mon_stats: + collectd.info('Registered Ceph Mon') + collectd.register_read( + self.read_ceph_mon, self.ceph_mon_stats_interval, + name='ceph-monitor') + if self.ceph_osd_stats: + collectd.info('Registered Ceph OSD') + collectd.register_read( + self.read_ceph_osd, self.ceph_osd_stats_interval, + name='ceph-osd') + if self.ceph_pg_stats: + collectd.info('Registered Ceph PG') + collectd.register_read( + self.read_ceph_pg, self.ceph_pg_stats_interval, name='ceph-pg') + if self.ceph_pool_stats: + collectd.info('Registered Ceph Pool') + collectd.register_read( + self.read_ceph_pool, self.ceph_pool_stats_interval, + name='ceph-pool') + + def dispatch_value(self, plugin_instance, type_instance, value, interval): + metric = collectd.Values() + metric.plugin = 'collectd-ceph-storage' + metric.interval = interval + metric.type = 'gauge' + metric.plugin_instance = plugin_instance + metric.type_instance = type_instance + metric.values = [value] + metric.dispatch() + + def read_ceph_rados_bench(self): + """Runs "rados bench" and collects latencies reported.""" + rados_bench_ran, output = self.run_command( + ['timeout', '30s', 'rados', '-p', 'rbd', 'bench', '10', + 'write', '-t', '1', '-b', '65536', '2>/dev/null', '|', + 'grep', '-i', 'latency', '|', 'awk', + '\'{print 1000*$3}\''], False) + + if rados_bench_ran: + results = output.split('\n') + + self.dispatch_value( + 'cluster', 'avg_latency', results[0], + self.ceph_rados_bench_interval) + self.dispatch_value( + 'cluster', 'stddev_latency', results[1], + self.ceph_rados_bench_interval) + self.dispatch_value( + 'cluster', 'max_latency', results[2], + self.ceph_rados_bench_interval) + self.dispatch_value( + 'cluster', 'min_latency', results[3], + self.ceph_rados_bench_interval) + + def read_ceph_mon(self): + """Reads stats from "ceph mon dump" command.""" + mon_dump_ran, output = self.run_command( + ['ceph', 'mon', 'dump', '-f', 'json', '--cluster', + self.ceph_cluster]) + + if mon_dump_ran: + json_data = json.loads(output) + + self.dispatch_value( + 'mon', 'number', len(json_data['mons']), + self.ceph_mon_stats_interval) + self.dispatch_value( + 'mon', 'quorum', len(json_data['quorum']), + self.ceph_mon_stats_interval) + + def read_ceph_osd(self): + """Reads stats from "ceph osd dump" command.""" + osd_dump_ran, output = self.run_command( + ['ceph', 'osd', 'dump', '-f', 'json', '--cluster', + self.ceph_cluster]) + + if osd_dump_ran: + json_data = json.loads(output) + + self.dispatch_value( + 'pool', 'number', len(json_data['pools']), + self.ceph_osd_stats_interval) + + for pool in json_data['pools']: + pool_name = 'pool-{}'.format(pool['pool_name']) + self.dispatch_value( + pool_name, 'size', pool['size'], + self.ceph_osd_stats_interval) + self.dispatch_value( + pool_name, 'pg_num', pool['pg_num'], + self.ceph_osd_stats_interval) + self.dispatch_value( + pool_name, 'pgp_num', pool['pg_placement_num'], + self.ceph_osd_stats_interval) + + osds_up = 0 + osds_down = 0 + osds_in = 0 + osds_out = 0 + for osd in json_data['osds']: + if osd['up'] == 1: + osds_up += 1 + else: + osds_down += 1 + if osd['in'] == 1: + osds_in += 1 + else: + osds_out += 1 + + self.dispatch_value( + 'osd', 'up', osds_up, self.ceph_osd_stats_interval) + self.dispatch_value( + 'osd', 'down', osds_down, self.ceph_osd_stats_interval) + self.dispatch_value( + 'osd', 'in', osds_in, self.ceph_osd_stats_interval) + self.dispatch_value( + 'osd', 'out', osds_out, self.ceph_osd_stats_interval) + + def read_ceph_pg(self): + """Reads stats from "ceph pg dump" command.""" + pg_dump_ran, output = self.run_command( + ['ceph', 'pg', 'dump', '-f', 'json', '--cluster', + self.ceph_cluster]) + + if pg_dump_ran: + json_data = json.loads(output) + + pgs = {} + for pg in json_data['pg_stats']: + for state in pg['state'].split('+'): + if state not in pgs: + pgs[state] = 0 + pgs[state] += 1 + + for state in pgs: + self.dispatch_value( + 'pg', state, pgs[state], self.ceph_pg_stats_interval) + + for osd in json_data['osd_stats']: + osd_id = 'osd-{}'.format(osd['osd']) + self.dispatch_value( + osd_id, 'kb_used', osd['kb_used'], + self.ceph_pg_stats_interval) + self.dispatch_value( + osd_id, 'kb_total', osd['kb'], self.ceph_pg_stats_interval) + self.dispatch_value( + osd_id, 'snap_trim_queue_len', osd['snap_trim_queue_len'], + self.ceph_pg_stats_interval) + self.dispatch_value( + osd_id, 'num_snap_trimming', osd['num_snap_trimming'], + self.ceph_pg_stats_interval) + if 'fs_perf_stat' in osd: + self.dispatch_value( + osd_id, 'apply_latency_ms', + osd['fs_perf_stat']['apply_latency_ms'], + self.ceph_pg_stats_interval) + self.dispatch_value( + osd_id, 'commit_latency_ms', + osd['fs_perf_stat']['commit_latency_ms'], + self.ceph_pg_stats_interval) + elif 'perf_stat' in osd: + self.dispatch_value( + osd_id, 'apply_latency_ms', + osd['perf_stat']['apply_latency_ms'], + self.ceph_pg_stats_interval) + self.dispatch_value( + osd_id, 'commit_latency_ms', + osd['perf_stat']['commit_latency_ms'], + self.ceph_pg_stats_interval) + + def read_ceph_pool(self): + """Reads stats from "ceph osd pool" and "ceph df" commands.""" + stats_ran, stats_output = self.run_command( + ['ceph', 'osd', 'pool', 'stats', '-f', 'json']) + df_ran, df_output = self.run_command(['ceph', 'df', '-f', 'json']) + + if stats_ran: + json_stats_data = json.loads(stats_output) + + for pool in json_stats_data: + pool_key = 'pool-{}'.format(pool['pool_name']) + for stat in ( + 'read_bytes_sec', 'write_bytes_sec', 'read_op_per_sec', + 'write_op_per_sec'): + value = 0 + if stat in pool['client_io_rate']: + value = pool['client_io_rate'][stat] + self.dispatch_value( + pool_key, stat, value, self.ceph_pool_stats_interval) + + if df_ran: + json_df_data = json.loads(df_output) + + for pool in json_df_data['pools']: + pool_key = 'pool-{}'.format(pool['name']) + for stat in ('bytes_used', 'kb_used', 'objects'): + value = pool['stats'][stat] if stat in pool['stats'] else 0 + self.dispatch_value( + pool_key, stat, value, self.ceph_pool_stats_interval) + + if 'total_bytes' in json_df_data['stats']: + # ceph 0.84+ + self.dispatch_value( + 'cluster', 'total_space', + int(json_df_data['stats']['total_bytes']), + self.ceph_pool_stats_interval) + self.dispatch_value( + 'cluster', 'total_used', + int(json_df_data['stats']['total_used_bytes']), + self.ceph_pool_stats_interval) + self.dispatch_value( + 'cluster', 'total_avail', + int(json_df_data['stats']['total_avail_bytes']), + self.ceph_pool_stats_interval) + else: + # ceph < 0.84 + self.dispatch_value( + 'cluster', 'total_space', + int(json_df_data['stats']['total_space']) * 1024.0, + self.ceph_pool_stats_interval) + self.dispatch_value( + 'cluster', 'total_used', + int(json_df_data['stats']['total_used']) * 1024.0, + self.ceph_pool_stats_interval) + self.dispatch_value( + 'cluster', 'total_avail', + int(json_df_data['stats']['total_avail']) * 1024.0, + self.ceph_pool_stats_interval) + + def run_command(self, command, check_output=True): + """Run a command for this collectd plugin. Returns a tuple with command + success and output or False and None for output. + """ + output = None + try: + if check_output: + output = subprocess.check_output(command) + else: + stdin, stdout, stderr = os.popen3(' '.join(command)) + output = stdout.read() + except Exception as exc: + collectd.error( + 'collectd-ceph-storage: {} exception: {}'.format(command, exc)) + collectd.error( + 'collectd-ceph-storage: {} traceback: {}' + .format(command, traceback.format_exc())) + return False, None + + if output is None: + collectd.error( + 'collectd-ceph-storage: failed to {}: output is None' + .format(command)) + return False, None + return True, output + +collectd_ceph_storage = CollectdCephStorage() +collectd.register_config(collectd_ceph_storage.configure_callback) diff --git a/browbeat-containers/collectd-openstack/files/collectd_gnocchi_status.py b/browbeat-containers/collectd-openstack/files/collectd_gnocchi_status.py new file mode 100644 index 000000000..42460b26c --- /dev/null +++ b/browbeat-containers/collectd-openstack/files/collectd_gnocchi_status.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Collectd python plugin to read gnocchi status on an OpenStack Controller.""" +from gnocchiclient.v1 import client +from keystoneauth1 import session +import collectd +import os +import time + + +def configure(configobj): + global INTERVAL + + config = {c.key: c.values for c in configobj.children} + INTERVAL = 10 + if 'interval' in config: + INTERVAL = config['interval'][0] + collectd.info('gnocchi_status: Interval: {}'.format(INTERVAL)) + collectd.register_read(read, INTERVAL) + + +def read(data=None): + starttime = time.time() + + gnocchi = client.Client(session=keystone_session) + try: + status = gnocchi.status.get() + metric = collectd.Values() + metric.plugin = 'gnocchi_status' + metric.interval = INTERVAL + metric.type = 'gauge' + metric.type_instance = 'measures' + metric.values = [status['storage']['summary']['measures']] + metric.dispatch() + + metric = collectd.Values() + metric.plugin = 'gnocchi_status' + metric.interval = INTERVAL + metric.type = 'gauge' + metric.type_instance = 'metrics' + metric.values = [status['storage']['summary']['metrics']] + metric.dispatch() + except Exception as err: + collectd.error( + 'gnocchi_status: Exception getting status: {}' + .format(err)) + + timediff = time.time() - starttime + if timediff > INTERVAL: + collectd.warning( + 'gnocchi_status: Took: {} > {}' + .format(round(timediff, 2), INTERVAL)) + + +def create_keystone_session(): + if int(os_identity_api_version) == 3: + from keystoneauth1.identity import v3 + auth = v3.Password( + username=os_username, password=os_password, project_name=os_tenant, + user_domain_name=os_user_domain_name, project_domain_name=os_project_domain_name, + auth_url=os_auth_url) + else: + from keystoneauth1.identity import v2 + auth = v2.Password( + username=os_username, password=os_password, tenant_name=os_tenant, + auth_url=os_auth_url) + return session.Session(auth=auth) + +os_identity_api_version = os.environ.get('OS_IDENTITY_API_VERSION') +if os_identity_api_version is None: + os_identity_api_version = 2 +os_username = os.environ.get('OS_USERNAME') +os_password = os.environ.get('OS_PASSWORD') +os_tenant = os.environ.get('OS_TENANT_NAME') +if os_tenant is None: + os_tenant = os.environ.get('OS_PROJECT_NAME') +os_auth_url = os.environ.get('OS_AUTH_URL') +os_project_domain_name = os.environ.get('OS_PROJECT_DOMAIN_NAME') +os_user_domain_name = os.environ.get('OS_USER_DOMAIN_NAME') + +collectd.info( + 'gnocchi_status: Keystone API: {} Connecting with user={}, password={}, tenant/project={}, ' + 'auth_url={}'.format(os_identity_api_version, os_username, os_password, os_tenant, os_auth_url)) + +keystone_session = create_keystone_session() +collectd.register_config(configure) diff --git a/browbeat-containers/collectd-openstack/files/collectd_iostat_python.py b/browbeat-containers/collectd-openstack/files/collectd_iostat_python.py new file mode 100644 index 000000000..fff8f49f7 --- /dev/null +++ b/browbeat-containers/collectd-openstack/files/collectd_iostat_python.py @@ -0,0 +1,403 @@ +#!/usr/bin/env python +# coding=utf-8 +# The MIT License (MIT) +# +# Copyright (c) 2014-2016 Denis Zhdanov +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# collectd-iostat-python +# ====================== +# +# Collectd-iostat-python is an iostat plugin for collectd that allows you to +# graph Linux iostat metrics in Graphite or other output formats that are +# supported by collectd. +# +# https://github.com/powdahound/redis-collectd-plugin +# - was used as template +# https://github.com/keirans/collectd-iostat/ +# - was used as inspiration and contains some code from +# https://bitbucket.org/jakamkon/python-iostat +# - by Kuba Kończyk +# + +import signal +import string +import subprocess +import sys +import re +try: + import pyudev + pyudev_available = True +except ImportError: + pyudev_available = False + +# Original Version/Author +__version__ = '0.0.5' +__author__ = 'denis.zhdanov@gmail.com' + + +class IOStatError(Exception): + pass + + +class CmdError(IOStatError): + pass + + +class ParseError(IOStatError): + pass + + +class IOStat(object): + def __init__(self, path='/usr/bin/iostat', interval=2, count=2, disks=[], no_dm_name=False): + self.path = path + self.interval = interval + self.count = count + self.disks = disks + self.no_dm_name = no_dm_name + + def parse_diskstats(self, input): + """ + Parse iostat -d and -dx output.If there are more + than one series of statistics, get the last one. + By default parse statistics for all available block devices. + + @type input: C{string} + @param input: iostat output + + @type disks: list of C{string}s + @param input: lists of block devices that + statistics are taken for. + + @return: C{dictionary} contains per block device statistics. + Statistics are in form of C{dictonary}. + Main statistics: + tps Blk_read/s Blk_wrtn/s Blk_read Blk_wrtn + Extended staistics (available with post 2.5 kernels): + rrqm/s wrqm/s r/s w/s rsec/s wsec/s rkB/s wkB/s avgrq-sz \ + avgqu-sz await svctm %util + See I{man iostat} for more details. + """ + dstats = {} + dsi = input.rfind('Device:') + if dsi == -1: + raise ParseError('Unknown input format: %r' % input) + + ds = input[dsi:].splitlines() + hdr = ds.pop(0).split()[1:] + + for d in ds: + if d: + d = d.split() + d = [re.sub(r',','.',element) for element in d] + dev = d.pop(0) + if (dev in self.disks) or not self.disks: + dstats[dev] = dict([(k, float(v)) for k, v in zip(hdr, d)]) + + return dstats + + def sum_dstats(self, stats, smetrics): + """ + Compute the summary statistics for chosen metrics. + """ + avg = {} + + for disk, metrics in stats.iteritems(): + for mname, metric in metrics.iteritems(): + if mname not in smetrics: + continue + if mname in avg: + avg[mname] += metric + else: + avg[mname] = metric + + return avg + + def _run(self, options=None): + """ + Run iostat command. + """ + close_fds = 'posix' in sys.builtin_module_names + args = '%s %s %s %s %s' % ( + self.path, + ''.join(options), + self.interval, + self.count, + ' '.join(self.disks)) + + return subprocess.Popen( + args, + bufsize=1, + shell=True, + stdout=subprocess.PIPE, + close_fds=close_fds) + + @staticmethod + def _get_childs_data(child): + """ + Return child's data when available. + """ + (stdout, stderr) = child.communicate() + ecode = child.poll() + + if ecode != 0: + raise CmdError('Command %r returned %d' % (child.cmd, ecode)) + + return stdout + + def get_diskstats(self): + """ + Get all available disks statistics that we can get. + iostat -kNd + tps kB_read/s kB_wrtn/s kB_read kB_wrtn + iostat -kNdx + rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz + avgqu-sz await r_await w_await svctm %util + """ + options=['-','k','N','d'] + extdoptions=['-','k','N','d','x'] + if self.no_dm_name: + options.remove('N') + extdoptions.remove('N') + dstats = self._run(options) + extdstats = self._run(extdoptions) + dsd = self._get_childs_data(dstats) + edd = self._get_childs_data(extdstats) + ds = self.parse_diskstats(dsd) + eds = self.parse_diskstats(edd) + + for dk, dv in ds.iteritems(): + if dk in eds: + ds[dk].update(eds[dk]) + + return ds + + +class IOMon(object): + def __init__(self): + self.plugin_name = 'collectd-iostat-python' + self.iostat_path = '/usr/bin/iostat' + self.interval = 60.0 + self.iostat_interval = 2 + self.iostat_count = 2 + self.iostat_disks = [] + self.iostat_nice_names = False + self.iostat_disks_regex = '' + self.iostat_udevnameattr = '' + self.skip_multipath = False + self.verbose_logging = False + self.iostat_no_dm_name = False + self.names = { + 'tps': {'t': 'transfers_per_second'}, + 'Blk_read/s': {'t': 'blocks_per_second', 'ti': 'read'}, + 'kB_read/s': {'t': 'bytes_per_second', 'ti': 'read', 'm': 1024}, + 'MB_read/s': {'t': 'bytes_per_second', 'ti': 'read', 'm': 1048576}, + 'Blk_wrtn/s': {'t': 'blocks_per_second', 'ti': 'write'}, + 'kB_wrtn/s': {'t': 'bytes_per_second', 'ti': 'write', 'm': 1024}, + 'MB_wrtn/s': {'t': 'bytes_per_second', 'ti': 'write', 'm': 1048576}, + 'Blk_read': {'t': 'blocks', 'ti': 'read'}, + 'kB_read': {'t': 'bytes', 'ti': 'read', 'm': 1024}, + 'MB_read': {'t': 'bytes', 'ti': 'read', 'm': 1048576}, + 'Blk_wrtn': {'t': 'blocks', 'ti': 'write'}, + 'kB_wrtn': {'t': 'bytes', 'ti': 'write', 'm': 1024}, + 'MB_wrtn': {'t': 'bytes', 'ti': 'write', 'm': 1048576}, + 'rrqm/s': {'t': 'requests_merged_per_second', 'ti': 'read'}, + 'wrqm/s': {'t': 'requests_merged_per_second', 'ti': 'write'}, + 'r/s': {'t': 'per_second', 'ti': 'read'}, + 'w/s': {'t': 'per_second', 'ti': 'write'}, + 'rsec/s': {'t': 'sectors_per_second', 'ti': 'read'}, + 'rkB/s': {'t': 'bytes_per_second', 'ti': 'read', 'm': 1024}, + 'rMB/s': {'t': 'bytes_per_second', 'ti': 'read', 'm': 1048576}, + 'wsec/s': {'t': 'sectors_per_second', 'ti': 'write'}, + 'wkB/s': {'t': 'bytes_per_second', 'ti': 'write', 'm': 1024}, + 'wMB/s': {'t': 'bytes_per_second', 'ti': 'write', 'm': 1048576}, + 'avgrq-sz': {'t': 'avg_request_size'}, + 'avgqu-sz': {'t': 'avg_request_queue'}, + 'await': {'t': 'avg_wait_time'}, + 'r_await': {'t': 'avg_wait_time', 'ti': 'read'}, + 'w_await': {'t': 'avg_wait_time', 'ti': 'write'}, + 'svctm': {'t': 'avg_service_time'}, + '%util': {'t': 'percent', 'ti': 'util'} + } + + def log_verbose(self, msg): + if not self.verbose_logging: + return + collectd.info('%s plugin [verbose]: %s' % (self.plugin_name, msg)) + + def configure_callback(self, conf): + """ + Receive configuration block + """ + for node in conf.children: + val = str(node.values[0]) + + if node.key == 'Path': + self.iostat_path = val + elif node.key == 'Interval': + self.interval = float(val) + elif node.key == 'IostatInterval': + self.iostat_interval = int(float(val)) + elif node.key == 'Count': + self.iostat_count = int(float(val)) + elif node.key == 'Disks': + self.iostat_disks = val.split(',') + elif node.key == 'NiceNames': + self.iostat_nice_names = val in ['True', 'true'] + elif node.key == 'DisksRegex': + self.iostat_disks_regex = val + elif node.key == 'UdevNameAttr': + self.iostat_udevnameattr = val + elif node.key == 'PluginName': + self.plugin_name = val + elif node.key == 'Verbose': + self.verbose_logging = val in ['True', 'true'] + elif node.key == 'SkipPhysicalMultipath': + self.skip_multipath = val in [ 'True', 'true' ] + elif node.key == 'NoDisplayDMName': + self.iostat_no_dm_name = val in [ 'True', 'true' ] + else: + collectd.warning( + '%s plugin: Unknown config key: %s.' % ( + self.plugin_name, + node.key)) + + self.log_verbose( + 'Configured with iostat=%s, interval=%s, count=%s, disks=%s, ' + 'disks_regex=%s udevnameattr=%s skip_multipath=%s no_dm_name=%s' % ( + self.iostat_path, + self.iostat_interval, + self.iostat_count, + self.iostat_disks, + self.iostat_disks_regex, + self.iostat_udevnameattr, + self.skip_multipath, + self.iostat_no_dm_name)) + + collectd.register_read(self.read_callback, self.interval) + + def dispatch_value(self, plugin_instance, val_type, type_instance, value): + """ + Dispatch a value to collectd + """ + self.log_verbose( + 'Sending value: %s-%s.%s=%s' % ( + self.plugin_name, + plugin_instance, + '-'.join([val_type, type_instance]), + value)) + + val = collectd.Values() + val.plugin = self.plugin_name + val.plugin_instance = plugin_instance + val.type = val_type + if len(type_instance): + val.type_instance = type_instance + val.values = [value, ] + val.meta={'0': True} + val.dispatch() + + def read_callback(self): + """ + Collectd read callback + """ + self.log_verbose('Read callback called') + iostat = IOStat( + path=self.iostat_path, + interval=self.iostat_interval, + count=self.iostat_count, + disks=self.iostat_disks, + no_dm_name=self.iostat_no_dm_name) + ds = iostat.get_diskstats() + + if not ds: + self.log_verbose('%s plugin: No info received.' % self.plugin_name) + return + + if self.iostat_udevnameattr and pyudev_available: + context = pyudev.Context() + + for disk in ds: + if not re.match(self.iostat_disks_regex, disk): + continue + if self.iostat_udevnameattr and pyudev_available: + device = pyudev.Device.from_device_file(context, "/dev/" + disk) + if self.skip_multipath: + mp_managed = device.get('DM_MULTIPATH_DEVICE_PATH') + if mp_managed and mp_managed == '1': + self.log_verbose('Skipping physical multipath disk %s' % disk) + continue + if self.iostat_udevnameattr: + persistent_name = device.get(self.iostat_udevnameattr) + if not persistent_name: + self.log_verbose('Unable to determine disk name based on UdevNameAttr: %s' % self.iostat_udevnameattr) + persistent_name = disk + else: + persistent_name = disk + + for name in ds[disk]: + if self.iostat_nice_names and name in self.names: + val_type = self.names[name]['t'] + + if 'ti' in self.names[name]: + type_instance = self.names[name]['ti'] + else: + type_instance = '' + + value = ds[disk][name] + if 'm' in self.names[name]: + value *= self.names[name]['m'] + else: + val_type = 'gauge' + tbl = string.maketrans('/-%', '___') + type_instance = name.translate(tbl) + value = ds[disk][name] + self.dispatch_value( + persistent_name, val_type, type_instance, value) + +def restore_sigchld(): + """ + Restore SIGCHLD handler for python <= v2.6 + It will BREAK exec plugin!!! + See https://github.com/deniszh/collectd-iostat-python/issues/2 for details + """ + if sys.version_info[0] == 2 and sys.version_info[1] <= 6: + signal.signal(signal.SIGCHLD, signal.SIG_DFL) + + +if __name__ == '__main__': + iostat = IOStat() + ds = iostat.get_diskstats() + + for disk in ds: + for metric in ds[disk]: + tbl = string.maketrans('/-%', '___') + metric_name = metric.translate(tbl) + print("%s.%s:%s" % (disk, metric_name, ds[disk][metric])) + + sys.exit(0) +else: + import collectd + + iomon = IOMon() + + # Register callbacks + collectd.register_init(restore_sigchld) + collectd.register_config(iomon.configure_callback) diff --git a/browbeat-containers/collectd-openstack/files/collectd_ovsagent.py b/browbeat-containers/collectd-openstack/files/collectd_ovsagent.py new file mode 100644 index 000000000..398461ac0 --- /dev/null +++ b/browbeat-containers/collectd-openstack/files/collectd_ovsagent.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" +""" + +import collectd +import os +import subprocess +import time + +def configure(cfg): + global INTERVAL + global interfaces + global namespaces + interfaces = [] + namespaces = [] + config = {c.key: c.values for c in cfg.children} + INTERVAL = config['interval'][0] + collectd.register_read(read, INTERVAL) + if 'interfaces' in config: + interfaces = config['interfaces'] + if 'namespaces' in config : + namespaces = config['namespaces'] + +def run_command(command): + output = subprocess.Popen(command,shell=True,stdout=subprocess.PIPE) + return output.communicate() + +def read(data=None): + starttime = time.time() + ifs = [] + ns = [] + if len(interfaces) > 0 : + collectd.debug("Interfaces : {}".format(interfaces)) + for interface in interfaces : + ifs.append({interface: run_command("ovs-vsctl show | grep 'Port \\\"{}' | wc -l".format(interface))[0].replace("\n","")}) + if len(namespaces) > 0 : + collectd.debug("Namespaces : {}".format(namespaces)) + for namespace in namespaces : + ns.append({namespace: run_command("sudo ip netns | grep {} | wc -l".format(namespace))[0].replace("\n","")}) + if len(ifs) > 0 : + for i in ifs : + for value in i: + metric = collectd.Values() + metric.plugin = 'ovsagent_monitoring' + metric.interval = INTERVAL + metric.type = 'gauge' + metric.type_instance = "{}_interface_total-count".format(value) + metric.values = [i[value]] + metric.dispatch() + + if len(ns) > 0 : + for n in ns : + for value in n: + metric = collectd.Values() + metric.plugin = 'ovsagent_monitoring' + metric.interval = INTERVAL + metric.type = 'gauge' + metric.type_instance = "{}_ns_total-count".format(value) + metric.values = [n[value]] + metric.dispatch() + + timediff = time.time() - starttime + if timediff > INTERVAL: + collectd.warning( + 'ovsagent_monitoring: Took: {} > {}'.format( + round(timediff, 2), + INTERVAL) + ) + +collectd.register_config(configure) diff --git a/browbeat-containers/collectd-openstack/files/collectd_rabbitmq_monitoring.py b/browbeat-containers/collectd-openstack/files/collectd_rabbitmq_monitoring.py new file mode 100644 index 000000000..8d9a73a2f --- /dev/null +++ b/browbeat-containers/collectd-openstack/files/collectd_rabbitmq_monitoring.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Collectd python plugin to read rabbitmq metrics from rabbitmq management +plugin. +""" +from pyrabbit.api import Client +from pyrabbit.http import HTTPError +import collectd +import os +import time + + +def configure(configobj): + global INTERVAL + global cl + global queues_to_count + + config = {c.key: c.values for c in configobj.children} + INTERVAL = config['interval'][0] + host = config['host'][0] + port = int(config['port'][0]) + username = config['username'][0] + password = config['password'][0] + queues_to_count = [] + if 'message_count' in config: + queues_to_count = config['message_count'] + collectd.info('rabbitmq_monitoring: Interval: {}'.format(INTERVAL)) + cl = Client('{}:{}'.format(host, port), username, password) + collectd.info( + 'rabbitmq_monitoring: Connecting to: {}:{} as user:{} password:{}' + .format(host, port, username, password)) + collectd.info( + 'rabbitmq_monitoring: Counting messages on: {}' + .format(queues_to_count)) + collectd.register_read(read, INTERVAL) + + +def read(data=None): + starttime = time.time() + + overview = cl.get_overview() + + # Object counts + for m_instance in \ + ['channels', 'connections', 'consumers', 'exchanges', 'queues']: + if m_instance in overview['object_totals']: + metric = collectd.Values() + metric.plugin = 'rabbitmq_monitoring' + metric.interval = INTERVAL + metric.type = 'gauge' + metric.type_instance = m_instance + metric.values = [overview['object_totals'][m_instance]] + metric.dispatch() + + # Aggregated Queue message stats + for m_instance in \ + ['messages', 'messages_ready', 'messages_unacknowledged']: + if m_instance in overview['queue_totals']: + metric = collectd.Values() + metric.plugin = 'rabbitmq_monitoring' + metric.interval = INTERVAL + metric.type = 'gauge' + metric.type_instance = 'queue_total-{}-count'.format(m_instance) + metric.values = [overview['queue_totals'][m_instance]] + metric.dispatch() + + metric = collectd.Values() + metric.plugin = 'rabbitmq_monitoring' + metric.interval = INTERVAL + metric.type = 'gauge' + metric.type_instance = 'queue_total-{}-rate'.format( + m_instance) + metric.values = \ + [ + overview['queue_totals']['{}_details'.format(m_instance)] + ['rate'] + ] + metric.dispatch() + + # Aggregated Message Stats + for m_instance in \ + [ + 'ack', 'confirm', 'deliver', 'deliver_get', 'deliver_no_ack', + 'get', 'get_no_ack', 'publish', 'publish_in', 'publish_out', + 'redeliver', 'return_unroutable' + ]: + if m_instance in overview['message_stats']: + metric = collectd.Values() + metric.plugin = 'rabbitmq_monitoring' + metric.interval = INTERVAL + metric.type = 'gauge' + metric.type_instance = 'message_total-{}-count'.format(m_instance) + metric.values = [overview['message_stats'][m_instance]] + metric.dispatch() + + metric = collectd.Values() + metric.plugin = 'rabbitmq_monitoring' + metric.interval = INTERVAL + metric.type = 'gauge' + metric.type_instance = 'message_total-{}-rate'.format(m_instance) + metric.values = \ + [ + overview['message_stats']['{}_details'.format(m_instance)] + ['rate'] + ] + metric.dispatch() + + # Configurable per-queue message counts + for queue_name in queues_to_count: + messages_detail = None + try: + messages_detail = cl.get_messages('/', queue_name) + except HTTPError as err: + collectd.error( + 'Error Opening Queue [{}] details: {}' + .format(queue_name, err)) + if messages_detail is None: + count = 0 + else: + count = messages_detail[0]['message_count'] + metric = collectd.Values() + metric.plugin = 'rabbitmq_monitoring' + metric.interval = INTERVAL + metric.type = 'gauge' + metric.type_instance = 'msg_count-{}'.format(queue_name) + metric.values = [count] + metric.dispatch() + + timediff = time.time() - starttime + if timediff > INTERVAL: + collectd.warning( + 'rabbitmq_monitoring: Took: {} > {}'.format( + round(timediff, 2), + INTERVAL) + ) + +collectd.register_config(configure) diff --git a/browbeat-containers/collectd-openstack/files/collectd_swift_stat.py b/browbeat-containers/collectd-openstack/files/collectd_swift_stat.py new file mode 100644 index 000000000..aaaacb1ea --- /dev/null +++ b/browbeat-containers/collectd-openstack/files/collectd_swift_stat.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Collectd python plugin to read swift stat on an OpenStack Controller.""" +from swiftclient.client import Connection +import collectd +import os +import time + + +class CollectdSwiftStat(object): + SWIFT_STATS = { + 'x-account-object-count': 'objects', + 'x-account-container-count': 'containers', + 'x-account-bytes-used': 'bytes'} + + def __init__(self): + self.interval = 10 + self.prefix = None + self.user = None + self.password = None + self.authurl = None + self.authversion = None + self.project = None + self.swift_conn = None + + def configure_callback(self, configobj): + for node in configobj.children: + val = str(node.values[0]) + if node.key == 'Interval': + self.interval = int(float(val)) + elif node.key == 'Prefix': + self.prefix = val + elif node.key == 'User': + self.user = val + elif node.key == 'Password': + self.password = val + elif node.key == 'AuthURL': + self.authurl = val + elif node.key == 'AuthVersion': + self.authversion = val + elif node.key == 'Project': + self.project = val + else: + collectd.warning( + 'collectd-swift-stat: Unknown config key: {}' + .format(node.key)) + + read_plugin = True + if not self.prefix: + collectd.error('collectd-swift-stat: Prefix Undefined') + read_plugin = False + if not self.user: + collectd.error('collectd-swift-stat: User Undefined') + read_plugin = False + if not self.password: + collectd.error('collectd-swift-stat: Password Undefined') + read_plugin = False + if not self.authurl: + collectd.error('collectd-swift-stat: AuthURL Undefined') + read_plugin = False + if not self.authversion: + collectd.error('collectd-swift-stat: AuthVersion Undefined') + read_plugin = False + if not self.project: + collectd.error('collectd-swift-stat: Project Undefined') + read_plugin = False + + if read_plugin: + collectd.info( + 'swift_stat: Connecting with user={}, password={}, tenant={}, auth_url={},' + ' auth_version={}'.format( + self.user, self.password, self.project, self.authurl, self.authversion)) + + self.swift_conn = self.create_swift_session() + collectd.register_read(self.read_swift_stat, self.interval) + else: + collectd.error('collectd_swift_stat: Invalid configuration') + + def read_swift_stat(self, data=None): + starttime = time.time() + + stats = self.swift_conn.head_account() + + for m_instance, name in CollectdSwiftStat.SWIFT_STATS.iteritems(): + if m_instance in stats: + metric = collectd.Values() + metric.plugin = 'swift_stat' + metric.interval = self.interval + metric.type = 'gauge' + metric.type_instance = '{}-{}'.format(self.prefix, name) + metric.values = [stats[m_instance]] + metric.dispatch() + else: + collectd.error( + 'swift_stat: Can not find: {}'.format(m_instance)) + + timediff = time.time() - starttime + if timediff > self.interval: + collectd.warning( + 'swift_stat: Took: {} > {}' + .format(round(timediff, 2), self.interval)) + + def create_swift_session(self): + return Connection( + authurl=self.authurl, user=self.user, key=self.password, + tenant_name=self.project, auth_version=self.authversion) + + +collectd_swift_stat = CollectdSwiftStat() +collectd.register_config(collectd_swift_stat.configure_callback) diff --git a/doc/source/installation.rst b/doc/source/installation.rst index ec17e7092..cc4b81fe2 100644 --- a/doc/source/installation.rst +++ b/doc/source/installation.rst @@ -76,16 +76,18 @@ has been installed. To skip directly to this task execute: :: $ ansible-playbook -i hosts install/browbeat.yml --start-at-task "Check browbeat_network" - ... (Optional) Install Collectd ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Set ``collectd_container`` to true in ``ansible/install/group_vars/all.yml`` if running on OpenStack version Stein or later. The containerized collectd work +can also work with Queens release but it is not recommended. + :: - [stack@ospd ansible]$ ansible-playbook -i hosts install/collectd-openstack.yml + [stack@ospd ansible]$ ansible-playbook -i hosts install/collectd.yml (Optional) Install Rsyslogd logging with aggregation ~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/usage.rst b/doc/source/usage.rst index 11eaadbfd..1e80aabc6 100644 --- a/doc/source/usage.rst +++ b/doc/source/usage.rst @@ -117,81 +117,6 @@ using some simple searches such as: shaker_uuid: 97092334-34e8-446c-87d6-6a0f361b9aa8 AND record.concurrency: 1 AND result.result_type: bandwidth shaker_uuid: c918a263-3b0b-409b-8cf8-22dfaeeaf33e AND record.concurrency:1 AND record.test:Bi-Directional -Running YODA ------------- - -YODA (Yet Openstack Deployment tool, Another) is a workload integrated into -Browbeat for benchmarking TripleO deployment. This includes importing baremetal -nodes, running introspections and overcloud deployements of various kinds. Note -that YODA assumes it is on the undercloud of a TripleO instance post undercloud -installation and introspection. - -Configuration -~~~~~~~~~~~~~ - -For examples of the configuration see `browbeat-complete.yaml` in the repo root directory. -Additional configuration documentation can be found below for each subworkload of YODA. - -Overcloud -~~~~~~~~~ - -For overcloud workloads, note that the nodes dictionary is dynamic, so you don't -have to define types you aren't using, this is done in the demonstration -configurations for the sake of completeness. Furthermore the node name is taken -from the name of the field, meaning custom role names should work fine there. - -The step parameter decides how many nodes can be distributed between the various -types to get from start scale to end scale, if these are the same it won't -matter. But if they are different up to that many nodes will be distributed to -the different node types (in no particular order) before the next deploy is -performed. The step rule is violated if and only if it is required to keep the -deployment viable, for example if the step dictates that 2 control nodes be -deployed it will skip to 3 even if it violates step. - -YODA has basic support for custom templates and more advanced roles, configure the -`templates:` paramater in the overcloud benchmark section with a string for -template paths. - - templates: "-e /usr/share/openstack-tripleo-heat-templates/environments/network-isolation.yaml" - -Note that `--templates` is passed to the `overcloud deploy` command before this, -then nodes sizes, ntp server and timeout are passed after, so your templates -will override the defaults, but not scale, timeout, or ntp settings from the -YODA config. If you want to use scheduling hints for your overcloud deploy you -will need to pip install [ostag](https://github.com/jkilpatr/ostag) and set -`node_pinning: True` in your config file. Ostag will be used before every deploy -to clean all tags and tag the appropriate nodes. If you set `node_pinning: False` -tags will be cleaned before the deploy. If you need more advanced features view -the ostag readme for how to tag based on node properties. If you don't want YODA -to edit your node properties, don't define `node_pinning` in your configuration. - -Introspection -~~~~~~~~~~~~~ - -Introspection workloads have two modes, batch and individual, the batch workload -follows the documentation exactly, nodes are imported, then bulk introspection -is run. Individual introspection has it's own custom batch size and handles -failures more gracefully (individual instead of group retries). Both have a -timeout configured in seconds and record the amount of time required for each -node to pxe and the number of failures. - -`timeout` is how long we wait for the node to come back from introspection this is -hardware variable. Although the default 900 seconds has been shown to be the 99th -percentile for success across at least two stes of hardware. Adjust as required. - -Note that `batch_size` can not produce a batch of unintrospected ndoes if none exist -so the last batch may be below the maximum size. When nodes in a batch fail the `failure_count` -is incremented and the nodes are returned to the pool. So it's possible that same node will -fail again in another batch. There is a safety mechanism that will kill Yoda if a node exceeds -10 retries as that's pretty much garunteed to be misconfigured. For bulk introspection all nodes -are tried once and what you get is what you get. - -If you wish to change the introspection workload failure threshold of 10% you can -set `max_fail_amnt` to any floating point value you desire. - -I would suggest bulk introspection for testing documented TripleO workflows and -individual introspection to test the performance of introspection itself. - Interpreting Browbeat Results ----------------------------- diff --git a/tox.ini b/tox.ini index 058aa4a9e..f8f7a893f 100644 --- a/tox.ini +++ b/tox.ini @@ -84,4 +84,4 @@ show-source = True ignore = E123,E125,E226,E302,E41,E231,E203,H233,H306,H238,H236,H404,H405,W504 max-line-length = 100 builtins = _ -exclude=.venv,.git,.tox,dist,doc,*lib/python*,*egg,build,ansible/*,.browbeat-venv,.perfkit-venv,.rally-venv,.shaker-venv +exclude=.venv,.git,.tox,dist,doc,*lib/python*,*egg,build,ansible/*,.browbeat-venv,.perfkit-venv,.rally-venv,.shaker-venv,browbeat-containers/*