Add atop module to collect system metrics

This commit is contained in:
Ilya Shakhat 2016-02-26 16:33:40 +03:00
parent 92d86d73ad
commit 9a605c2a68
6 changed files with 399 additions and 1 deletions

View File

@ -118,6 +118,7 @@ def _run(play_source, host_list):
if tqm is not None:
tqm.cleanup()
LOG.debug('Execution result: %s', storage)
return storage

View File

@ -75,7 +75,8 @@ def play_execution(execution_playbook):
if isinstance(v, list) or isinstance(v, dict):
del record[k]
del record['stdout']
if 'stdout' in record:
del record['stdout']
LOG.debug('Record: %s', record)
records.append(record)

240
performa/modules/atop.py Normal file
View File

@ -0,0 +1,240 @@
#!/usr/bin/python
import re
ATOP_FILE_NAME = '/tmp/performa.atop'
UNIQUE_NAME = 'performa_atop'
PREFIX_PATTERN = (
'(?P<host>\S+)\s+'
'(?P<timestamp>\d+)\s+'
'(?P<date>\S+)\s+'
'(?P<time>\S+)\s+'
'(?P<interval>\w+)\s+'
)
CPU_TOTAL_PATTERN = re.compile(
'(?P<label>CPU)\s+' +
PREFIX_PATTERN +
'(?P<ticks_per_second>\d+)\s+'
'(?P<cpu_count>\d+)\s+'
'(?P<sys_ticks>\d+)\s+'
'(?P<user_ticks>\d+)\s+'
'(?P<nice_ticks>\d+)\s+'
'(?P<idle_ticks>\d+)\s+'
'(?P<wait_ticks>\d+)\s+'
'(?P<irq_ticks>\d+)\s+'
'(?P<softirq_ticks>\d+)\s+'
'(?P<steal_ticks>\d+)\s+'
'(?P<guest_ticks>\d+)',
)
CPU_PATTERN = re.compile(
'(?P<label>cpu)\s+' +
PREFIX_PATTERN +
'(?P<ticks_per_second>\d+)\s+'
'(?P<cpu_id>\d+)\s+'
'(?P<sys_ticks>\d+)\s+'
'(?P<user_ticks>\d+)\s+'
'(?P<nice_ticks>\d+)\s+'
'(?P<idle_ticks>\d+)\s+'
'(?P<wait_ticks>\d+)\s+'
'(?P<irq_ticks>\d+)\s+'
'(?P<softirq_ticks>\d+)\s+'
'(?P<steal_ticks>\d+)\s+'
'(?P<guest_ticks>\d+)',
)
MEM_PATTERN = re.compile(
'(?P<label>MEM)\s+' +
PREFIX_PATTERN +
'(?P<page_size>\d+)\s+'
'(?P<phys_pages>\d+)\s+'
'(?P<free_pages>\d+)\s+'
'(?P<cache_pages>\d+)\s+'
'(?P<buffer_pages>\d+)\s+'
'(?P<slab_pages>\d+)\s+'
'(?P<dirty_pages>\d+)'
)
NET_UPPER_PATTERN = re.compile(
'(?P<label>NET)\s+' +
PREFIX_PATTERN +
'upper\s+'
'(?P<tcp_rx>\d+)\s+'
'(?P<tcp_tx>\d+)\s+'
'(?P<udp_rx>\d+)\s+'
'(?P<udp_tx>\d+)\s+'
'(?P<ip_rx>\d+)\s+'
'(?P<ip_tx>\d+)\s+'
'(?P<ip_dx>\d+)\s+'
'(?P<ip_fx>\d+)'
)
NET_PATTERN = re.compile(
'(?P<label>NET)\s+' +
PREFIX_PATTERN +
'(?P<interface>\S+)\s+'
'(?P<rx_pkt>\d+)\s+'
'(?P<tx_pkt>\d+)\s+'
'(?P<rx_bytes>\d+)\s+'
'(?P<tx_bytes>\d+)\s+'
'(?P<speed>\d+)\s+'
'(?P<duplex_command>\d+)'
)
PRC_PATTERN = re.compile(
'(?P<label>PRC)\s+' +
PREFIX_PATTERN +
'(?P<pid>\d+)\s+'
'\((?P<name>.+)\)\s+'
'(?P<state>\S+)\s+'
'(?P<ticks_per_second>\d+)\s+'
'(?P<user_ticks>\d+)\s+'
'(?P<sys_ticks>\d+)\s+'
'(?P<nice>\d+)\s+'
'(?P<priority>\d+)\s+'
'(?P<realtime_priority>\d+)\s+'
'(?P<scheduling_policy>\d+)\s+'
'(?P<current_cpu>\d+)\s+'
'(?P<sleep_avg>\d+)'
)
PRM_PATTERN = re.compile(
'(?P<label>PRM)\s+' +
PREFIX_PATTERN +
'(?P<pid>\d+)\s+'
'\((?P<name>.+)\)\s+'
'(?P<state>\S+)\s+'
'(?P<page_size>\d+)\s+'
'(?P<virtual_kb>\d+)\s+'
'(?P<resident_kb>\d+)\s+'
'(?P<shared_kb>\d+)\s+'
'(?P<virtual_growth_kb>\d+)\s+'
'(?P<resident_growth_kb>\d+)\s+'
'(?P<minor_page_faults>\d+)\s+'
'(?P<major_page_faults>\d+)'
)
PATTERNS = [CPU_TOTAL_PATTERN, CPU_PATTERN, MEM_PATTERN,
NET_UPPER_PATTERN, NET_PATTERN, PRC_PATTERN, PRM_PATTERN]
ALL_LABELS = ['CPU', 'cpu', 'MEM', 'NET', 'PRC', 'PRM']
def normalize_point(point):
# interpret strings into numbers
for k, v in point.items():
if v.isdigit():
point[k] = int(v)
# convert measurement units
for k, v in point.items():
if k[-6:] == '_pages':
point[k[:-6]] = v * point['page_size']
del point[k]
elif k[-6:] == '_ticks':
point[k[:-6]] = float(v) / point['ticks_per_second']
del point[k]
elif k[-3:] == '_kb':
point[k[:-3]] = v * 1024
del point[k]
return point
def parse_output(raw, filter_labels):
filter_labels = set(filter_labels)
series = []
active = False
for line in raw.split('\n'):
if line == 'SEP':
active = True
continue
if not active:
continue
for pattern in PATTERNS:
m = re.match(pattern, line)
if m:
point = m.groupdict()
if point['label'] in filter_labels:
series.append(normalize_point(point))
break
return series
def start(module):
# clear the file
cmd = 'rm %s' % ATOP_FILE_NAME
module.run_command(cmd)
# start atop as daemon
cmd = ('daemon -n %(name)s -- atop -w %(file)s %(interval)s' %
dict(name=UNIQUE_NAME, file=ATOP_FILE_NAME,
interval=module.params['interval']))
rc, stdout, stderr = module.run_command(cmd)
result = dict(changed=True, rc=rc, stdout=stdout, stderr=stderr, cmd=cmd)
if rc:
module.fail_json(msg='Failed to start atop', **result)
else:
# sleep until file is created
for timeout in range(10):
if os.path.exists(ATOP_FILE_NAME):
break
module.run_command('sleep 1')
module.exit_json(**result)
def stop(module):
# stop atop
cmd = 'daemon -n %(name)s --stop' % dict(name=UNIQUE_NAME)
rc, stdout, stderr = module.run_command(cmd)
if rc:
module.fail_json(msg='Failed to stop atop', rc=rc, stderr=stderr)
# grab data
labels = module.params['labels'] or ALL_LABELS
cmd = ('atop -r %(file)s -P %(labels)s' %
dict(file=ATOP_FILE_NAME, labels=','.join(labels)))
rc, stdout, stderr = module.run_command(cmd)
try:
series = parse_output(stdout, labels)
module.exit_json(series=series)
except Exception as e:
module.fail_json(msg=str(e), stderr=stderr, rc=rc)
def main():
module = AnsibleModule(
argument_spec=dict(
command=dict(required=True, choices=['start', 'stop']),
interval=dict(type='int', default=1),
labels=dict(type='list'),
))
command = module.params['command']
if command == 'start':
start(module)
elif command == 'stop':
stop(module)
else:
module.fail_json(msg='Unsupported command: %s' % command)
from ansible.module_utils.basic import * # noqa
if __name__ == '__main__':
main()

View File

@ -12,6 +12,16 @@ setup:
become: yes
become_user: root
become_method: sudo
- name: installing atop
apt:
name: atop, daemon
become: yes
become_user: root
become_method: sudo
-
hosts: $target
tasks:
- atop: command=start
execution:
-
@ -21,6 +31,12 @@ execution:
tasks:
- sysbench_oltp:
duration: 10
-
hosts: $target
tasks:
- atop:
command: stop
labels: [ CPU, PRC, PRM ]
report:
template: sysbench.rst

View File

@ -0,0 +1,50 @@
RESET
CPU host 1456480863 2016/02/26 10:01:03 2503659 100 4 5355049 7555867 5 984566626 2589741 63689 0 0 0 12768 100
cpu host 1456480863 2016/02/26 10:01:03 2503659 100 0 1705191 2253800 0 243472217 2465631 12723 0 0 0 3192 100
SEP
CPU host 1456480864 2016/02/26 10:01:04 1 100 4 4 4 0 392 0 0 0 0 0 12768 100
cpu host 1456480864 2016/02/26 10:01:04 1 100 0 0 0 0 99 0 0 0 0 0 3192 100
cpu host 1456480864 2016/02/26 10:01:04 1 100 1 1 0 0 99 0 0 0 0 0 3192 100
cpu host 1456480864 2016/02/26 10:01:04 1 100 2 2 3 0 95 0 0 0 0 0 3192 100
cpu host 1456480864 2016/02/26 10:01:04 1 100 3 0 0 0 100 0 0 0 0 0 3192 100
MEM host 1456480864 2016/02/26 10:01:04 1 4096 2044208 893540 809906 85798 72782 0
NET host 1456480864 2016/02/26 10:01:04 1 upper 0 0 0 0 0 0 0 0
NET host 1456480864 2016/02/26 10:01:04 1 ovs-system 0 0 0 0 0 0
NET host 1456480864 2016/02/26 10:01:04 1 br-int 0 0 0 0 0 0
NET host 1456480864 2016/02/26 10:01:04 1 eth0 0 0 0 0 100 1
NET host 1456480864 2016/02/26 10:01:04 1 lo 0 0 0 0 0 0
NET host 1456480864 2016/02/26 10:01:04 1 br-ex 0 0 0 0 0 0
NET host 1456480864 2016/02/26 10:01:04 1 virbr0 0 0 0 0 0 0
PRC host 1456480864 2016/02/26 10:01:04 1 8595 (epmd) S 100 0 1 0 120 0 0 0 0
PRC host 1456480864 2016/02/26 10:01:04 1 8634 (beam.smp) S 100 1 1 0 120 0 0 1 0
PRC host 1456480864 2016/02/26 10:01:04 1 11014 (dstat) S 100 1 2 0 120 0 0 2 0
PRC host 1456480864 2016/02/26 10:01:04 1 14134 (glance-api) S 100 1 0 0 120 0 0 0 0
PRC host 1456480864 2016/02/26 10:01:04 1 19929 (atop) R 100 1 2 0 120 0 0 2 0
PRM host 1456480864 2016/02/26 10:01:04 1 8595 (epmd) S 4096 7492 316 0 0 0 0 0
PRM host 1456480864 2016/02/26 10:01:04 1 8634 (beam.smp) S 4096 2168928 51548 0 0 0 0 0
PRM host 1456480864 2016/02/26 10:01:04 1 11014 (dstat) S 4096 34044 7472 2796 0 0 0 0
PRM host 1456480864 2016/02/26 10:01:04 1 14134 (glance-api) S 4096 219848 87044 2796 0 0 0 0
PRM host 1456480864 2016/02/26 10:01:04 1 19929 (atop) R 4096 17004 1972 148 244 364 874 0
SEP
CPU host 1456480865 2016/02/26 10:01:05 1 100 4 4 4 0 392 0 0 0 0 0 12768 100
cpu host 1456480865 2016/02/26 10:01:05 1 100 0 2 1 0 97 0 0 0 0 0 3192 100
cpu host 1456480865 2016/02/26 10:01:05 1 100 1 0 0 0 100 0 0 0 0 0 3192 100
cpu host 1456480865 2016/02/26 10:01:05 1 100 2 3 3 0 94 0 0 0 0 0 3192 100
cpu host 1456480865 2016/02/26 10:01:05 1 100 3 0 0 0 100 0 0 0 0 0 3192 100
MEM host 1456480865 2016/02/26 10:01:05 1 4096 2044208 893540 809909 85798 72782 0
NET host 1456480865 2016/02/26 10:01:05 1 upper 0 0 0 0 0 0 0 0
NET host 1456480865 2016/02/26 10:01:05 1 ovs-system 0 0 0 0 0 0
NET host 1456480865 2016/02/26 10:01:05 1 br-int 0 0 0 0 0 0
NET host 1456480865 2016/02/26 10:01:05 1 eth0 0 0 0 0 100 1
NET host 1456480865 2016/02/26 10:01:05 1 lo 0 0 0 0 0 0
NET host 1456480865 2016/02/26 10:01:05 1 br-ex 0 0 0 0 0 0
NET host 1456480865 2016/02/26 10:01:05 1 virbr0 0 0 0 0 0 0
PRC host 1456480865 2016/02/26 10:01:05 1 7315 (ovs-vswitchd) S 100 0 1 -10 110 0 0 0 0
PRC host 1456480865 2016/02/26 10:01:05 1 11014 (dstat) S 100 2 0 0 120 0 0 2 0
PRC host 1456480865 2016/02/26 10:01:05 1 14134 (glance-api) S 100 1 0 0 120 0 0 0 0
PRC host 1456480865 2016/02/26 10:01:05 1 19929 (atop) R 100 1 2 0 120 0 0 2 0
PRM host 1456480865 2016/02/26 10:01:05 1 7315 (ovs-vswitchd) S 4096 243088 32016 0 0 0 0 0
PRM host 1456480865 2016/02/26 10:01:05 1 11014 (dstat) S 4096 34044 7472 2796 0 0 0 0
PRM host 1456480865 2016/02/26 10:01:05 1 14134 (glance-api) S 4096 219848 87044 2796 0 0 0 0
PRM host 1456480865 2016/02/26 10:01:05 1 19929 (atop) R 4096 17004 1972 148 0 0 751 0
SEP

View File

@ -0,0 +1,90 @@
# Copyright (c) 2016 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import testtools
from performa.modules import atop
def _read_sample():
with open('performa/tests/atop_sample.txt') as f:
return f.read()
class TestAtop(testtools.TestCase):
def test_parse_cpu_total(self):
expected = [{'cpu_count': 4, 'date': '2016/02/26', 'guest': 0.0,
'host': 'host', 'idle': 3.92, 'interval': 1, 'irq': 0.0,
'label': 'CPU', 'nice': 0.0, 'softirq': 0.0, 'steal': 0.0,
'sys': 0.04, 'ticks_per_second': 100, 'time': '10:01:04',
'timestamp': 1456480864, 'user': 0.04, 'wait': 0.0},
{'cpu_count': 4, 'date': '2016/02/26', 'guest': 0.0,
'host': 'host', 'idle': 3.92, 'interval': 1, 'irq': 0.0,
'label': 'CPU', 'nice': 0.0, 'softirq': 0.0, 'steal': 0.0,
'sys': 0.04, 'ticks_per_second': 100, 'time': '10:01:05',
'timestamp': 1456480865, 'user': 0.04, 'wait': 0.0}]
self.assertEqual(expected, atop.parse_output(_read_sample(), ['CPU']))
def test_parse_cpu(self):
needle = {'cpu_id': 2, 'date': '2016/02/26', 'guest': 0.0,
'host': 'host', 'idle': 0.94, 'interval': 1, 'irq': 0.0,
'label': 'cpu', 'nice': 0.0, 'softirq': 0.0, 'steal': 0.0,
'sys': 0.03, 'ticks_per_second': 100, 'time': '10:01:05',
'timestamp': 1456480865, 'user': 0.03, 'wait': 0.0}
self.assertIn(needle, atop.parse_output(_read_sample(), ['cpu']))
def test_parse_mem(self):
expected = [
{'buffer': 351428608, 'cache': 3317374976, 'date': '2016/02/26',
'dirty': 0, 'free': 3659939840, 'host': 'host', 'interval': 1,
'label': 'MEM', 'page_size': 4096, 'phys': 8373075968,
'slab': 298115072, 'time': '10:01:04', 'timestamp': 1456480864},
{'buffer': 351428608, 'cache': 3317387264, 'date': '2016/02/26',
'dirty': 0, 'free': 3659939840, 'host': 'host', 'interval': 1,
'label': 'MEM', 'page_size': 4096, 'phys': 8373075968,
'slab': 298115072, 'time': '10:01:05', 'timestamp': 1456480865}]
self.assertEqual(expected, atop.parse_output(_read_sample(), ['MEM']))
def test_parse_net(self):
needle = {'date': '2016/02/26', 'host': 'host', 'interval': 1,
'ip_dx': 0, 'ip_fx': 0, 'ip_rx': 0, 'ip_tx': 0,
'label': 'NET', 'tcp_rx': 0, 'tcp_tx': 0, 'time': '10:01:04',
'timestamp': 1456480864, 'udp_rx': 0, 'udp_tx': 0}
self.assertIn(needle, atop.parse_output(_read_sample(), ['NET']))
def test_parse_prc(self):
needle = {'current_cpu': 2, 'date': '2016/02/26', 'host': 'host',
'interval': 1, 'label': 'PRC', 'name': 'dstat', 'nice': 0,
'pid': 11014, 'priority': 120, 'realtime_priority': 0,
'scheduling_policy': 0, 'sleep_avg': 0, 'state': 'S',
'sys': 0.02, 'ticks_per_second': 100, 'time': '10:01:04',
'timestamp': 1456480864, 'user': 0.01}
self.assertIn(needle, atop.parse_output(_read_sample(), ['PRC']))
def test_parse_prm(self):
needle = {'date': '2016/02/26', 'host': 'host', 'interval': 1,
'label': 'PRM', 'major_page_faults': 0,
'minor_page_faults': 751, 'name': 'atop', 'page_size': 4096,
'pid': 19929, 'resident': 2019328, 'resident_growth': 0,
'shared': 151552, 'state': 'R', 'time': '10:01:05',
'timestamp': 1456480865, 'virtual': 17412096,
'virtual_growth': 0}
self.assertIn(needle, atop.parse_output(_read_sample(), ['PRM']))