diff --git a/conf.d/lxc.yaml.example b/conf.d/lxc.yaml.example new file mode 100644 index 00000000..0e1ae68c --- /dev/null +++ b/conf.d/lxc.yaml.example @@ -0,0 +1,20 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +init_config: + +instances: + - container: all + cpu: True + mem: True + blkio: True + net: True diff --git a/docs/Plugins.md b/docs/Plugins.md index ec928305..00c2f414 100644 --- a/docs/Plugins.md +++ b/docs/Plugins.md @@ -65,6 +65,7 @@ - [Libvirt VM Monitoring](#libvirt-vm-monitoring) - [Open vSwitch Neutron Router Monitoring](#open-vswitch-neutron-router-monitoring) - [Lighttpd](#lighttpd) + - [LXC](#lxc) - [Mcache](#mcache) - [MK Livestatus](#mk-livestatus) - [Mongo](#mongo) @@ -1717,6 +1718,68 @@ Complete documentation of the Open vSwitch Neutron Router monitoring plugin can ## Lighttpd See [the example configuration](https://github.com/openstack/monasca-agent/blob/master/conf.d/lighttpd.yaml.example) for how to configure the Lighttpd plugin. +## LXC +An agent that provides LXC cgroup data. This agent does not require sudo. + +Requirements: + * lxc + +Sample config: + +``` +init_config: + +instances: + - container: all + cpu: True + mem: True + blkio: True + net: True +``` + +The LXC checks return the following metrics: + +| Metric Name | Dimensions | Semantics | +| ----------- | ---------- | --------- | +| cpuacct.usage | hostname, container_name, service=lxc | reports the total CPU time (in nanoseconds) consumed | +| cpuacct.usage_percpu.cpu{X} | hostname, container_name, service=lxc | reports the total CPU time (in nanoseconds) consumed by cpu X | +| cpuacct.user | hostname, container_name, service=lxc| CPU time consumed by tasks in user mode. Unit defined by the USER_HZ variable | +| cpuacct.system | hostname, container_name, service=lxc| CPU time consumed by tasks in kernel mode. Unit defined by the USER_HZ variable | +| memory.cache | hostname, container_name, service=lxc | page cache, including *tmpfs* (shmem), in bytes | +| memory.rss | hostname, container_name, service=lxc | anonymous and swap cache, not including tmpfs (shmem), in bytes | +| memory.mapped_file| hostname, container_name, service=lxc | size of memory-mapped mapped files, including tmpfs (shmem), in bytes | +| memory.pgpgin | hostname, container_name, service=lxc | number of pages paged into memory | +| memory.pgpgout | hostname, container_name, service=lxc | number of pages paged out of memory | +| memory.swap | hostname, container_name, service=lxc | swap usage in bytes | +| memory.active_anon | hostname, container_name, service=lxc | anonymous and swap cache on LRU list, in bytes | +| memory.inactive_anon | hostname, container_name, service=lxc | anonymous and swap cache on inactive LRU list, in bytes | +| memory.active_file | hostname, container_name, service=lxc | file-backed memory on active LRU list, in bytes | +| memory.inactive_file | hostname, container_name, service=lxc | file-backed memory on inactive LRU list, in bytes | +| memory.unevictable | hostname, container_name, service=lxc | memory that cannot be reclaimed, in bytes | +| memory.hierarchical_memory_limit | hostname, container_name, service=lxc | memory limit for the hierarchy that contains the memory cgroup, in bytes | +| memory.hierarchical_memsw_limit | hostname, container_name, service=lxc | memory plus swap limit for the hierarchy that contains the memory cgroup, in bytes | +| net.rx.bytes | hostname, container_name, service=lxc, iface | number of received bytes | +| net.rx.packets | hostname, container_name, service=lxc, iface | number of received packets | +| net.rx.errs | hostname, container_name, service=lxc, iface | number of received error packets | +| net.rx.drop | hostname, container_name, service=lxc, iface | number of received dropped packets | +| net.rx.fifo | hostname, container_name, service=lxc, iface | number of received fifo packets | +| net.rx.frame | hostname, container_name, service=lxc, iface | number of received frame packets | +| net.rx.compressed | hostname, container_name, service=lxc, iface| number of received compressed bytes | +| net.rx.multicast | hostname, container_name, service=lxc, iface | number of received multicast packets | +| net.tx.bytes | hostname, container_name, service=lxc, iface| number of transferred bytes | +| net.tx.packets | hostname, container_name, service=lxc, iface | number of transferred packets | +| net.tx.errs | hostname, container_name, service=lxc, iface | number of transferred error packets | +| net.tx.drop | hostname, container_name, service=lxc, iface | number of transferred dropped packets | +| net.tx.fifo | hostname, container_name, service=lxc, iface | number of transferred fifo packets | +| net.tx.frame | hostname, container_name, service=lxc, iface | number of transferred frame packets | +| net.tx.compressed | hostname, container_name, service=lxc, iface| number of transferred compressed bytes | +| net.tx.multicast | hostname, container_name, service=lxc, iface | number of transferred multicast packets | +| blkio.read | hostname, container_name, service=lxc | number of bytes read from the disk to the cgroup(container) | +| blkio.write | hostname, container_name, service=lxc | number of bytes written from the cgroup(container) to the disk | +| blkio.async | hostname, container_name, service=lxc | number of asynchronous bytes | +| blkio.sync | hostname, container_name, service=lxc | number of synchronous bytes | +| blkio.total | hostname, container_name, service=lxc | total number of bytes | + ## Mcache See [the example configuration](https://github.com/openstack/monasca-agent/blob/master/conf.d/mcache.yaml.example) for how to configure the Mcache plugin. diff --git a/monasca_agent/collector/checks_d/lxc.py b/monasca_agent/collector/checks_d/lxc.py new file mode 100644 index 00000000..2686eb79 --- /dev/null +++ b/monasca_agent/collector/checks_d/lxc.py @@ -0,0 +1,199 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import re + +import monasca_agent.collector.checks as checks + +_LXC_CGROUP_PWD = '/sys/fs/cgroup' +_LXC_CGROUP_CPU_PWD = '{0}/cpu/lxc'.format(_LXC_CGROUP_PWD) +_LXC_CGROUP_CPUSET_PWD = '{0}/cpuset/lxc'.format(_LXC_CGROUP_PWD) +_LXC_CGROUP_MEM_PWD = '{0}/memory/lxc'.format(_LXC_CGROUP_PWD) +_LXC_CGROUP_DISK_PWD = '{0}/blkio/lxc'.format(_LXC_CGROUP_PWD) + +_LXC_NET_REGEX = re.compile(r'(\w+):(.+)') +_LXC_DISK_REGEX = re.compile(r'(\w+)\s(\d+)') + + +class LXC(checks.AgentCheck): + """Agent to collect LXC cgroup information + + The information is mostly based on cgroup files of each container + """ + + def check(self, instance): + self.instance = instance + self.containers = self._containers_name() + for container_name in self.containers: + self._collect_cpu_metrics(container_name) + self._collect_mem_metrics(container_name) + self._collect_net_metrics(container_name) + self._collect_disk_metrics(container_name) + + def _containers_name(self): + container_name = self.instance.get('container') + if container_name == 'all': + return [name for name in os.listdir(_LXC_CGROUP_CPU_PWD) + if os.path.isdir(_LXC_CGROUP_CPU_PWD + name)] + + if os.path.isdir('{0}/{1}'.format(_LXC_CGROUP_CPU_PWD, + container_name)): + self.log.info('\tContainer name: ' + container_name) + return [container_name] + else: + self.log.error('\tContainer {0} was not found' + .format(container_name)) + return + + def _collect_cpu_metrics(self, container_name): + if not self.instance.get('cpu', True): + return + metrics = self._get_cpu_metrics(container_name) + cpu_dimensions = self._get_dimensions(container_name) + for metric, value in metrics.iteritems(): + self.gauge(metric, value, dimensions=cpu_dimensions) + + def _collect_mem_metrics(self, container_name): + if not self.instance.get('mem', True): + return + metrics = self._get_mem_metrics(container_name) + mem_dimensions = self._get_dimensions(container_name) + for metric, value in metrics.iteritems(): + self.gauge(metric, value, dimensions=mem_dimensions) + + def _collect_net_metrics(self, container_name): + if not self.instance.get('net', True): + return + metrics = self._get_net_metrics(container_name) + for iface_name, iface_metrics in metrics.iteritems(): + net_dimensions = self._get_dimensions(container_name, + {'iface': iface_name}) + for metric, value in iface_metrics.iteritems(): + self.gauge(metric, value, dimensions=net_dimensions) + + def _collect_disk_metrics(self, container_name): + if not self.instance.get('blkio', True): + return + metrics = self._get_disk_metrics(container_name) + disk_dimensions = self._get_dimensions(container_name) + for metric, value in metrics.iteritems(): + self.gauge(metric, value, dimensions=disk_dimensions) + + def _get_cpu_metrics(self, container_name): + """Get metrics from cpuacct.usage cgroup file + + :return: a dictionary containing cpu metrics defined on container + cgroup + """ + metrics = {} + cpu_cgroup = '{0}/{1}/'.format(_LXC_CGROUP_CPU_PWD, container_name) + metrics['cpuacct.usage'] = int(open(cpu_cgroup + 'cpuacct.usage', 'r') + .readline().rstrip('\n')) + cpuacct_usage_percpu = open(cpu_cgroup + 'cpuacct.usage_percpu', 'r')\ + .readline().rstrip(' \n').split(' ') + for cpu in range(len(cpuacct_usage_percpu)): + metrics['cpuacct.usage_percpu.cpu{0}'.format(cpu)] = \ + int(cpuacct_usage_percpu[cpu]) + metrics_stat = self._get_metrics_by_file(cpu_cgroup + 'cpuacct.stat', + 'cpuacct') + metrics.update(metrics_stat) + return metrics + + def _get_mem_metrics(self, container_name): + """Get metrics from memory.stat cgroup file + + :returns: a dictionary containing memory metrics defined on + container cgroup + """ + mem_cgroup = '{0}/{1}/'.format(_LXC_CGROUP_MEM_PWD, container_name) + metrics = self._get_metrics_by_file(mem_cgroup + 'memory.stat', + 'memory') + return metrics + + def _get_net_metrics(self, container_name): + """Get metrics for each net interface found + + :returns: a dictionary containing metrics regarding each + net interface found, in the format: + { 'lo': { 'net.rx.bytes': 1234 }, ...} + """ + metrics = {} + pid = self._get_pid_container(container_name) + net_cgroup = '/proc/{0}/net/'.format(pid) + with open(net_cgroup + 'dev', 'r') as dev_file: + for line in dev_file: + iface = re.search(_LXC_NET_REGEX, line) + if iface: + iface_name = iface.group(1) + iface_info = iface.group(2).split() + metrics[iface_name] = { + 'net.rx.bytes': int(iface_info[0]), + 'net.rx.packets': int(iface_info[1]), + 'net.rx.errs': int(iface_info[2]), + 'net.rx.drop': int(iface_info[3]), + 'net.rx.fifo': int(iface_info[4]), + 'net.rx.frame': int(iface_info[5]), + 'net.rx.compressed': int(iface_info[6]), + 'net.rx.multicast': int(iface_info[7]), + 'net.tx.bytes': int(iface_info[8]), + 'net.tx.packets': int(iface_info[9]), + 'net.tx.errs': int(iface_info[10]), + 'net.tx.drop': int(iface_info[11]), + 'net.tx.fifo': int(iface_info[12]), + 'net.tx.frame': int(iface_info[13]), + 'net.tx.compressed': int(iface_info[14]), + 'net.tx.multicast': int(iface_info[15]) + } + return metrics + + def _get_disk_metrics(self, container_name): + """Get metrics blkio.throttle.io_service_bytes from cgroup file + + :return: a dictionary containing blkio metrics used to verify disk + cgroup usage + """ + metrics = {} + disk_cgroup = '{0}/{1}/blkio.throttle.io_service_bytes'.format( + _LXC_CGROUP_DISK_PWD, container_name) + with open(disk_cgroup, 'r') as disk_file: + for line in disk_file: + disk = re.search(_LXC_DISK_REGEX, line) + if disk: + disk_key = 'blkio.{0}'.format(disk.group(1)).lower() + disk_value = disk.group(2) + metrics[disk_key] = int(disk_value) + return metrics + + def _get_metrics_by_file(self, filename, pre_key): + """Some cgroup files have a pattern 'key value' that can be easily + handled to a dictionary + """ + metrics = {} + with open(filename, 'r') as cgroup_file: + for line in cgroup_file: + resource_post_key, resource_value = line.split(' ') + resource_key = '{0}.{1}'.format(pre_key, resource_post_key) + metrics[resource_key] = int(resource_value) + return metrics + + def _get_dimensions(self, container_name, options=None): + dimensions = {'container_name': container_name, + 'service': 'lxc'} + dimensions.update(options) + return self._set_dimensions(dimensions, self.instance) + + def _get_pid_container(self, container_name): + cpu_tasks = '{0}/{1}/tasks'.format(_LXC_CGROUP_CPU_PWD, + container_name) + pid = open(cpu_tasks, 'r').readline().rstrip('\n') + return pid diff --git a/monasca_setup/detection/plugins/lxc.py b/monasca_setup/detection/plugins/lxc.py new file mode 100644 index 00000000..098027e3 --- /dev/null +++ b/monasca_setup/detection/plugins/lxc.py @@ -0,0 +1,61 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import logging +import os + +import monasca_setup.agent_config +import monasca_setup.detection + +_LXC_PWD = '/var/lib/lxc' +log = logging.getLogger(__name__) + + +class LXC(monasca_setup.detection.Plugin): + """Detect if LXC is present on the host. + + LXC uses cgroup and namespaces to create a controlled and isolated + environment. One can easily detect if lxc is installed on a machine, + searching for /var/lib/lxc. But, if one uninstalls lxc, this dir must not + be removed. THIS CAN NOT VERIFY ALL CONTAINERS (RUNNING AND STOPPED) + WITHOUT ROOT ACCESS TO MONASCA-AGENT USER. Only running containers will be + detected. + + To detect if any container is running, You can search if there are any + folders in /sys/fs/cgroup/cpu/lxc/. Folders' names are the same as the + running containers' names. + """ + + def __init__(self, template_dir, overwrite=True, args=None): + self.service_name = 'lxc' + super(LXC, self).__init__(template_dir, overwrite, args) + + def _detect(self): + """Verify if there are container folder.""" + if os.path.exists(_LXC_PWD): + self.available = True + + def build_config(self): + config = monasca_setup.agent_config.Plugins() + config['default'] = {'init_config': None, + 'instances': [ + {'container': 'all', + 'state': True, + 'cpu': True, + 'mem': True, + 'blkio': True, + 'net': True + }]} + return config + + def dependencies_installed(self): + return True