summaryrefslogtreecommitdiff
path: root/devstack
diff options
context:
space:
mode:
authorFederico Ceratto <federico.ceratto@hpe.com>2016-03-10 15:53:55 +0000
committerFederico Ceratto <federico.ceratto@hpe.com>2016-05-17 17:52:30 +0100
commitd5e4c60d44d3f010e37a40994dd9d001409f9678 (patch)
treefee3c05f63f7c1ab8648dcac044d945b4e912801 /devstack
parente23eb9a03032e3ee777825bac5cff0c1ea8ca51a (diff)
Integration/stress test using a simulated network
Setup a devstack host Setup simulated lossy network and run benchmarks Change-Id: Icf492ab523e218af28ff0558e765bedffef9e623
Notes
Notes (review): Code-Review+2: Graham Hayes <graham.hayes@hpe.com> Code-Review+2: Kiall Mac Innes <kiall@macinnes.ie> Workflow+1: Kiall Mac Innes <kiall@macinnes.ie> Verified+2: Jenkins Submitted-by: Jenkins Submitted-at: Wed, 18 May 2016 15:46:27 +0000 Reviewed-on: https://review.openstack.org/291260 Project: openstack/designate Branch: refs/heads/master
Diffstat (limited to 'devstack')
-rwxr-xr-xdevstack/networking_test.py663
-rwxr-xr-xdevstack/networking_test_monitor_tc.sh3
2 files changed, 666 insertions, 0 deletions
diff --git a/devstack/networking_test.py b/devstack/networking_test.py
new file mode 100755
index 0000000..49ef76f
--- /dev/null
+++ b/devstack/networking_test.py
@@ -0,0 +1,663 @@
1#!/usr/bin/env python
2# Copyright 2016 Hewlett Packard Enterprise Development Company LP
3#
4# Author: Federico Ceratto <federico.ceratto@hpe.com>
5#
6# Licensed under the Apache License, Version 2.0 (the "License"); you may
7# not use this file except in compliance with the License. You may obtain
8# a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15# License for the specific language governing permissions and limitations
16# under the License.
17
18"""
19 Network simulator
20 ~~~~~~~~~~~~~~~~~
21 Perform end-to-end stress tests on Designate on a simulated network
22 that displays high latency and packet loss (almost like real ones)
23
24 WARNING: this script is to be run on a disposable devstack VM
25 It requires sudo and it will configure /sbin/tc
26
27 Usage:
28 cd <designate_repo>/contrib/vagrant
29 ./setup_ubuntu_devstack
30 vagrant ssh ubuntu
31 source ~/devstack/openrc
32 /opt/stack/designate/devstack/networking_test.py
33 Monitor the logfiles
34"""
35
36from argparse import ArgumentParser
37from collections import OrderedDict
38from itertools import product
39from subprocess import check_output
40from subprocess import CalledProcessError
41from tempfile import NamedTemporaryFile
42from threading import Thread
43import json
44import logging
45import os
46import random
47import string
48import time
49
50import dns
51import dns.resolver
52
53log = logging.getLogger()
54
55tc_path = '/sbin/tc'
56sudo_path = '/usr/bin/sudo'
57iptables_restore_path = '/sbin/iptables-restore'
58designate_cli_path = '/usr/local/bin/designate'
59openstack_cli = 'openstack'
60
61
62def gen_random_name(l):
63 return "".join(
64 random.choice(string.ascii_lowercase + string.digits)
65 for n in range(l)
66 )
67
68
69def parse_args():
70 ap = ArgumentParser()
71 ap.add_argument('-d', '--debug', action='store_true')
72 return ap.parse_args()
73
74
75def run_shell(cmd, env=None):
76 log.debug(" running %s" % cmd)
77 out = check_output(cmd, env=env, shell=True, executable='/bin/bash')
78 return [line.rstrip() for line in out.splitlines()]
79
80
81class DesignateCLI(object):
82 """Designate CLI runner
83 """
84
85 def __init__(self):
86 """Setup CLI handler"""
87 self._cli_env = {}
88 for k, v in sorted(os.environ.items()):
89 if k.startswith('OS_'):
90 log.debug("%s: %s", k, v)
91 self._cli_env[k] = v
92
93 def setup_quota(self, quota):
94 """Setup quota
95 """
96 user_id = self.run_json("token issue")["user_id"]
97
98 cmd = """quota-update
99 --domains %(quota)d
100 --domain-recordsets %(quota)d
101 --recordset-records %(quota)d
102 --domain-records %(quota)d
103 %(user_id)s """
104 cmd = ' '.join(cmd.split())
105 quotas = self.run_designate_cli_table(cmd % dict(quota=quota,
106 user_id=user_id))
107 assert quotas['domain_records'] == str(quota)
108
109 def run(self, cmd):
110 """Run a openstack client command
111 """
112 return run_shell("%s %s" % (openstack_cli, cmd),
113 env=self._cli_env)
114
115 def run_json(self, cmd):
116 """Run a openstack client command using JSON output
117
118 :returns: dict
119 :raises: CalledProcessError
120 """
121 cmd = "%s %s -f json" % (openstack_cli, cmd)
122 log.debug(" running %s" % cmd)
123 out = check_output(cmd, env=self._cli_env, shell=True,
124 executable='/bin/bash')
125 return json.loads(out)
126
127 def runcsv(self, cmd):
128 """Run a command using the -f csv flag, parse the output
129 and return a list of dicts
130 """
131 cmdout = self.run(cmd + " -f csv")
132 header = [item.strip('"') for item in cmdout[0].split(',')]
133 output_rows = []
134 for line in cmdout[1:]:
135 rawvalues = line.split(',')
136 d = OrderedDict()
137 for k, v in zip(header, rawvalues):
138 if v.startswith('"') or v.endswith('"'):
139 v = v.strip('"')
140 else:
141 try:
142 v = int(v)
143 except ValueError:
144 v = float(v)
145
146 d[k] = v
147
148 output_rows.append(d)
149
150 return output_rows
151
152 def run_designate_cli_table(self, cmd):
153 """Run a command in the designate cli expecting a table to be
154 returned and parse it into a dict
155 """
156 cmdout = run_shell("%s %s" % (designate_cli_path, cmd),
157 env=self._cli_env)
158 out = {}
159 try:
160 for line in cmdout:
161 if not line.startswith('| '):
162 continue
163 if not line.endswith(' |'):
164 continue
165 k = line.split('|')[1].strip()
166 v = line.split('|')[2].strip()
167 out[k] = v
168 except Exception:
169 log.error("Unable to parse output into a dict:")
170 for line in out:
171 log.error(line)
172 log.error("-----------------------------------")
173 raise
174
175 return out
176
177
178class TrafficControl(object):
179 """Configure Linux Traffic Control to simulate a real network
180 """
181
182 protocol_marks = dict(
183 mysql=1,
184 dns_udp=2,
185 dns_tcp=3,
186 )
187
188 def run_tc(self, cmd):
189 return run_shell("%s %s %s" % (sudo_path, tc_path, cmd))
190
191 def _apply_iptables_conf(self, ipt_conf):
192 tf = NamedTemporaryFile()
193 tf.file.write(ipt_conf)
194 tf.file.flush()
195 run_shell("%s %s %s" % (sudo_path, iptables_restore_path, tf.name))
196 tf.file.close()
197
198 def cleanup_iptables_marking(self):
199 # Currently unneeded
200 ipt_conf = """
201*filter
202:INPUT ACCEPT [0:0]
203:FORWARD ACCEPT [0:0]
204:OUTPUT ACCEPT [0:0]
205COMMIT
206*mangle
207:PREROUTING ACCEPT [0:0]
208:INPUT ACCEPT [0:0]
209:FORWARD ACCEPT [0:0]
210:OUTPUT ACCEPT [0:0]
211:POSTROUTING ACCEPT [0:0]
212COMMIT
213"""
214 self._apply_iptables_conf(ipt_conf)
215
216 def setup_iptables_marking(self):
217 # Currently unneeded
218 ipt_conf = """
219*filter
220:INPUT ACCEPT [0:0]
221:FORWARD ACCEPT [0:0]
222:OUTPUT ACCEPT [0:0]
223COMMIT
224*mangle
225:PREROUTING ACCEPT [0:0]
226:INPUT ACCEPT [0:0]
227:FORWARD ACCEPT [0:0]
228:OUTPUT ACCEPT [0:0]
229:POSTROUTING ACCEPT [0:0]
230-A PREROUTING -i lo -p tcp -m tcp --dport 3306 -j MARK --set-xmark %(mysql)s
231-A PREROUTING -i lo -p tcp -m tcp --sport 3306 -j MARK --set-xmark %(mysql)s
232-A PREROUTING -i lo -p tcp -m tcp --dport 53 -j MARK --set-xmark %(dns_tcp)s
233-A PREROUTING -i lo -p tcp -m tcp --sport 53 -j MARK --set-xmark %(dns_tcp)s
234-A PREROUTING -i lo -p udp -m udp --dport 53 -j MARK --set-xmark %(dns_udp)s
235-A PREROUTING -i lo -p udp -m udp --sport 53 -j MARK --set-xmark %(dns_udp)s
236COMMIT
237"""
238 marks = dict((k, "0x%d/0xffffffff" % v)
239 for k, v in self.protocol_marks.iteritems())
240 ipt_conf = ipt_conf % marks
241 self._apply_iptables_conf(ipt_conf)
242
243 def cleanup_tc(self):
244 """Clean up tc conf
245 """
246 out = self.run_tc('qdisc show dev lo')
247 if out:
248 log.debug("Cleaning up tc conf")
249 self.run_tc('qdisc del dev lo root')
250 else:
251 log.debug("No tc conf to be cleaned up")
252
253 def setup_tc(self, dns_latency_ms=0, dns_packet_loss_perc=0,
254 db_latency_ms=1, db_packet_loss_perc=1):
255 """Setup traffic control
256 """
257 self.cleanup_tc()
258
259 # Create HTB at the root
260 self.run_tc("qdisc add dev lo handle 1: root htb")
261
262 self.run_tc("class add dev lo parent 1: classid 1:5 htb rate 1000Mbps")
263 self.run_tc("class add dev lo parent 1: classid 1:7 htb rate 1000Mbps")
264
265 # TCP DNS
266 self._setup_tc_block('1:8', 'tcp', 53, dns_latency_ms,
267 dns_packet_loss_perc)
268 # UDP DNS
269 self._setup_tc_block('1:9', 'udp', 53, dns_latency_ms,
270 dns_packet_loss_perc)
271 # TCP mDNS
272 self._setup_tc_block('1:10', 'tcp', 5354, dns_latency_ms,
273 dns_packet_loss_perc)
274 # UDP mDNS
275 self._setup_tc_block('1:11', 'udp', 5354, dns_latency_ms,
276 dns_packet_loss_perc)
277 # MySQL
278 self._setup_tc_block('1:12', 'tcp', 3306, 1, 1)
279
280 # RabbitMQ port: 5672
281 self._setup_tc_block('1:13', 'tcp', 5672, 1, 1)
282
283 # MemcacheD
284 self._setup_tc_block('1:14', 'tcp', 11211, 1, 1)
285
286 def _setup_tc_block(self, class_id, proto, port, latency_ms,
287 packet_loss_perc):
288 """Setup tc htb entry, netem and filter"""
289 assert proto in ('tcp', 'udp')
290 cmd = "class add dev lo parent 1: classid %s htb rate 1000Mbps" % \
291 class_id
292 self.run_tc(cmd)
293 self._setup_netem(class_id, latency_ms, latency_ms, packet_loss_perc)
294 self._setup_filter(proto, 'sport %d' % port, class_id)
295 self._setup_filter(proto, 'dport %d' % port, class_id)
296
297 def _setup_netem(self, classid, latency1, latency2, loss_perc):
298 """Setup tc netem
299 """
300 # This could be done with the FireQOS tool instead:
301 # https://firehol.org/tutorial/fireqos-new-user/
302 cmd = ("qdisc add dev lo parent {cid} netem"
303 " corrupt 0.1%"
304 " delay {lat1}ms {lat2}ms distribution normal"
305 " duplicate 0.1%"
306 " loss {packet_loss_perc}%"
307 " reorder 25% 50%")
308 cmd = cmd.format(cid=classid, lat1=latency1, lat2=latency2,
309 packet_loss_perc=loss_perc)
310 self.run_tc(cmd)
311
312 def _setup_filter(self, protocol, filter, flowid):
313 """Setup tc filter
314 """
315 protocol_nums = dict(tcp=6, udp=17)
316 pnum = protocol_nums[protocol]
317 cmd = "filter add dev lo protocol ip prio 1 u32 match ip protocol " \
318 "%(pnum)d 0xff match ip %(filter)s 0xffff flowid %(flowid)s"
319
320 self.run_tc(cmd % dict(pnum=pnum, filter=filter, flowid=flowid))
321
322
323class Digger(object):
324 def __init__(self):
325 self.ns_ipaddr = self.get_nameserver_ipaddr()
326 self._setup_resolver()
327 self.max_probes_per_second = 30
328 self.reset_goals()
329
330 @property
331 def prober_is_running(self):
332 try:
333 return self._prober_thread.is_alive()
334 except AttributeError:
335 return False
336
337 def _setup_resolver(self, timeout=1):
338 resolver = dns.resolver.Resolver(configure=False)
339 resolver.timeout = timeout
340 resolver.lifetime = timeout
341 resolver.nameservers = [self.ns_ipaddr]
342 self.resolver = resolver
343
344 def get_nameserver_ipaddr(self):
345 # FIXME: find a better way to do this
346 out = run_shell('sudo netstat -nlpt | grep pdns_server')
347 ipaddr = out[0].split()[3]
348 ipaddr = ipaddr.split(':', 1)[0]
349 log.debug("Resolver ipaddr: %s" % ipaddr)
350 return ipaddr
351
352 def query_a_record(self, record_name, timeout=3):
353 try:
354 answer = self.resolver.query(record_name, 'A')
355 if answer.rrset:
356 return answer.rrset[0].address
357 except Exception:
358 return None
359
360 def query_soa(self, zone_name, timeout=3):
361 try:
362 soa_answer = self.resolver.query(zone_name, 'SOA')
363 soa_serial = soa_answer[0].serial
364 return soa_serial
365 except Exception:
366 return None
367
368 def reset_goals(self):
369 assert not self.prober_is_running
370 self.goals = set()
371 self.summary = dict(
372 success_cnt=0,
373 total_time_to_success=0,
374 )
375
376 def add_goal(self, goal):
377 self.goals.add(goal + (time.time(), ))
378
379 def _print_summary(self, final=True):
380 """Log out a summary of the current run
381 """
382 remaining = len(self.goals)
383 success_cnt = self.summary['success_cnt']
384 try:
385 avg_t = (self.summary['total_time_to_success'] / success_cnt)
386 avg_t = ", avg time to success: %2.3fs" % avg_t
387 except ZeroDivisionError:
388 avg_t = ''
389
390 logf = log.info if final else log.debug
391 logf(" test summary: success %3d, remaining %3d %s" % (
392 success_cnt, remaining, avg_t))
393
394 def _probe_resolver(self):
395 """Probe the local resolver, report achieved goals
396 """
397 log.debug("Starting prober")
398 assert self.prober_is_running is True
399 self._progress_report_time = 0
400 now = time.time()
401 while (self.goals or not self.prober_can_stop) and \
402 now < self.prober_timeout_time:
403
404 for goal in tuple(self.goals):
405 goal_type = goal[0]
406 if goal_type == 'zone_serial_ge':
407 goal_type, zone_name, serial, t0 = goal
408 actual_serial = self.query_soa(zone_name)
409 if actual_serial and actual_serial >= serial:
410 deltat = time.time() - t0
411 log.debug(" reached %s in %.3fs" % (repr(goal),
412 deltat))
413 self.goals.discard(goal)
414 self.summary['success_cnt'] += 1
415 self.summary['total_time_to_success'] += deltat
416
417 elif goal_type == 'record_a':
418 goal_type, record_name, ipaddr, t0 = goal
419 actual_ipaddr = self.query_a_record(record_name)
420 if actual_ipaddr == ipaddr:
421 deltat = time.time() - t0
422 log.debug(" reached %s in %.3fs" % (repr(goal),
423 deltat))
424 self.goals.discard(goal)
425 self.summary['success_cnt'] += 1
426 self.summary['total_time_to_success'] += deltat
427
428 else:
429 log.error("Unknown goal %r" % goal)
430
431 if time.time() < self.prober_timeout_time:
432 time.sleep(1.0 / self.max_probes_per_second)
433 else:
434 break
435
436 if time.time() > self._progress_report_time:
437 self._print_summary(final=False)
438 self._progress_report_time = time.time() + 10
439
440 time.sleep(1.0 / self.max_probes_per_second)
441 now = time.time()
442
443 if now > self.prober_timeout_time:
444 log.info("prober timed out after %d s" % (
445 now - self.prober_start_time))
446
447 self._print_summary()
448
449 def probe_resolver(self, timeout=600):
450 """Probe the local resolver in a dedicated thread until all
451 goals have been achieved or timeout occours
452 """
453 assert not self.prober_is_running
454 self.prober_can_stop = False
455 self.prober_start_time = time.time()
456 self.prober_timeout_time = self.prober_start_time + timeout
457 self._prober_thread = Thread(target=self._probe_resolver)
458 self._prober_thread.daemon = True
459 self._prober_thread.start()
460
461 def stop_prober(self):
462 self.prober_can_stop = True
463 self.prober_timeout_time = 0
464
465 def wait_on_prober(self):
466 self.prober_can_stop = True
467 self._prober_thread.join()
468 assert self.prober_is_running is False
469
470
471def list_zones(cli):
472 zones = [z["name"] for z in cli.run_json('zone list')]
473 log.debug("Found zones: %r", zones)
474 return zones
475
476
477def delete_zone_by_name(cli, zn, ignore_missing=False):
478 if ignore_missing:
479 # Return if the zone is not present
480 zones = list_zones(cli)
481 if zn not in zones:
482 return
483
484 cli.run('zone delete %s' % zn)
485
486
487def create_and_probe_a_record(cli, digger, zone_id, record_name, ipaddr):
488 cli.run_json('recordset create %s %s --type A --records %s' %
489 (zone_id, record_name, ipaddr))
490 digger.add_goal(('record_a', record_name, ipaddr))
491
492
493def delete_all_zones(cli):
494 zones = list_zones(cli)
495 log.info("%d zones to be deleted" % len(zones))
496 for zone in zones:
497 log.info("Deleting %s", zone)
498 delete_zone_by_name(cli, zone)
499
500
501def create_zone_with_retry_on_duplicate(cli, digger, zn, timeout=300,
502 dig=False):
503 """Create a zone, retry when a duplicate is found,
504 optionally monitor for propagation
505
506 :returns: dict
507 """
508 t0 = time.time()
509 timeout_time = timeout + t0
510 created = False
511 while time.time() < timeout_time:
512 try:
513 output = cli.run_json(
514 "zone create %s --email devstack@example.org" % zn)
515 created = True
516 log.debug(" zone created after %f" % (time.time() - t0))
517 break
518
519 except CalledProcessError as e:
520 if e.output == 'Duplicate Zone':
521 # dup zone, sleep and retry
522 time.sleep(1)
523 pass
524
525 elif e.output == 'over_quota':
526 raise RuntimeError('over_quota')
527
528 else:
529 raise
530
531 assert output['serial']
532
533 if not created:
534 raise RuntimeError('timeout')
535
536 if dig:
537 digger.reset_goals()
538 digger.add_goal(('zone_serial_ge', zn, int(output['serial'])))
539 digger.probe_resolver(timeout=timeout)
540 digger.wait_on_prober()
541
542 return output
543
544
545def test_create_list_delete_loop(cli, digger, cycles_num, zn='cld.org.'):
546 """Create, list, delete a zone in a loop
547 Monitor for propagation time
548 """
549 log.info("Test zone creation, list, deletion")
550 delete_zone_by_name(cli, zn, ignore_missing=True)
551
552 for cycle_cnt in range(cycles_num):
553 zone = create_zone_with_retry_on_duplicate(cli, digger, zn, dig=True)
554
555 zones = cli.runcsv('domain-list')
556 assert any(z['name'] == zn for z in zones), zones
557
558 cli.run('domain-delete %s' % zone['id'])
559
560 zones = cli.runcsv('domain-list')
561 assert not any(z['name'] == zn for z in zones), zones
562
563 log.info("done")
564
565
566def test_one_big_zone(cli, digger, zone_size):
567 """Create a zone with many records,
568 perform CRUD on records and monitor for propagation time
569 """
570 t0 = time.time()
571 zn = 'bigzone-%s.org.' % gen_random_name(12)
572 delete_zone_by_name(cli, zn, ignore_missing=True)
573 zone = create_zone_with_retry_on_duplicate(cli, digger, zn, dig=True)
574 assert 'serial' in zone, zone
575 assert 'id' in zone, zone
576 try:
577 digger.reset_goals()
578 digger.add_goal(('zone_serial_ge', zn, int(zone['serial'])))
579 digger.probe_resolver(timeout=60)
580
581 record_creation_threads = []
582 for record_num in range(zone_size):
583 record_name = "rec%d" % record_num
584 ipaddr = "127.%d.%d.%d" % (
585 (record_num >> 16) % 256,
586 (record_num >> 8) % 256,
587 record_num % 256,
588 )
589 t = Thread(target=create_and_probe_a_record,
590 args=(cli, digger, zone['id'], record_name, ipaddr))
591 t.start()
592 record_creation_threads.append(t)
593 time.sleep(.5)
594
595 digger.wait_on_prober()
596
597 except KeyboardInterrupt:
598 log.info("Exiting on keyboard")
599 raise
600
601 finally:
602 digger.stop_prober()
603 delete_zone_by_name(cli, zone['name'])
604 log.info("Done in %ds" % (time.time() - t0))
605
606
607def test_servers_are_configured(cli):
608 servers = cli.runcsv('server-list')
609 assert servers[0]['name'] == 'ns1.devstack.org.'
610 log.info("done")
611
612
613def test_big_zone(args, cli, digger, tc):
614 log.info("Test creating many records in one big zone")
615
616 dns_latencies_ms = (1, 100)
617 dns_packet_losses = (1, 15)
618 zone_size = 20
619
620 for dns_latency_ms, dns_packet_loss_perc in product(dns_latencies_ms,
621 dns_packet_losses):
622 tc.cleanup_tc()
623 tc.setup_tc(dns_latency_ms=dns_latency_ms,
624 dns_packet_loss_perc=dns_packet_loss_perc)
625 log.info("Running test with DNS latency %dms packet loss %d%%" % (
626 dns_latency_ms, dns_packet_loss_perc))
627 test_one_big_zone(cli, digger, zone_size)
628
629
630def run_tests(args, cli, digger, tc):
631 """Run all integration tests
632 """
633 # test_servers_are_configured(cli)
634 # test_create_list_delete_loop(cli, digger, 10)
635 test_big_zone(args, cli, digger, tc)
636
637
638def main():
639 args = parse_args()
640 loglevel = logging.DEBUG if args.debug else logging.INFO
641 logging.basicConfig(
642 level=loglevel,
643 format='%(relativeCreated)8d %(levelname)s %(funcName)20s %(message)s',
644 )
645
646 cli = DesignateCLI()
647 cli.setup_quota(10000)
648
649 digger = Digger()
650
651 delete_all_zones(cli)
652
653 tc = TrafficControl()
654 tc.cleanup_tc()
655
656 try:
657 run_tests(args, cli, digger, tc)
658 finally:
659 tc.cleanup_tc()
660
661
662if __name__ == '__main__':
663 main()
diff --git a/devstack/networking_test_monitor_tc.sh b/devstack/networking_test_monitor_tc.sh
new file mode 100755
index 0000000..d32f8d0
--- /dev/null
+++ b/devstack/networking_test_monitor_tc.sh
@@ -0,0 +1,3 @@
1#!/bin/bash
2IF=lo
3watch -n1 "tc -p -s -d qdisc show dev $IF; echo; tc class show dev $IF; echo; tc filter show dev $IF"