Set benchmarks for innodb counters.

Use the currently collected innodb counters to decide what we expect from existing migrations. I've also moved metrics collection to a cloud db because json was a poor scaling choice. Change-Id: I2ff0e4b58ac54f53a569ec3e67ad2fa753748dbf
2014-01-16 16:12:46 +11:00 · 2014-01-16 16:12:46 +11:00 · c972d2d1d7
parent e7ee50e0c0
commit c972d2d1d7
14 changed files with 225 additions and 100 deletions
--- a/datasets/datasets_devstack_131007/131007_devstack_export/config.json
+++ b/datasets/datasets_devstack_131007/131007_devstack_export/config.json
@ -1,10 +1,16 @@
 {
+    "Innodb_rows_read": {
+        "default": 1000
+    }, 
+    "XInnodb_rows_changed": {
+        "default": 1000
+    }, 
    "database": "nova_dataset_131007_devstack", 
    "db_pass": "tester", 
    "db_user": "nova", 
    "logging_conf": "logging.conf", 
    "maximum_migration_times": {
-        "default": 30
+        "default": 60
    }, 
    "project": "openstack/nova", 
    "seed_data": "nova.sql", 
--- a/datasets/datasets_devstack_131007/131007_devstack_export/input.json
+++ b/datasets/datasets_devstack_131007/131007_devstack_export/input.json
@ -7,6 +7,12 @@
    "seed_data": "nova.sql",
    "logging_conf": "logging.conf",
    "maximum_migration_times": {
-        "default": 30
+        "default": 60
+    },
+    "XInnodb_rows_changed": {
+        "default": 1000
+    },
+    "Innodb_rows_read": {
+        "default": 1000
    }
 }
--- a/datasets/datasets_devstack_150/datasets_devstack_150/config.json
+++ b/datasets/datasets_devstack_150/datasets_devstack_150/config.json
@ -1,10 +1,17 @@
 {
+    "Innodb_rows_read": {
+        "default": 1000
+    }, 
+    "XInnodb_rows_changed": {
+        "default": 1000
+    }, 
    "database": "datasets_devstack_150", 
    "db_pass": "tester", 
    "db_user": "nova", 
    "logging_conf": "logging.conf", 
    "maximum_migration_times": {
-        "default": 30
+        "151->152": 67.0, 
+        "default": 60
    }, 
    "project": "openstack/nova", 
    "seed_data": "nova.sql", 
--- a/datasets/datasets_devstack_150/datasets_devstack_150/input.json
+++ b/datasets/datasets_devstack_150/datasets_devstack_150/input.json
@ -7,7 +7,13 @@
    "seed_data": "nova.sql",
    "logging_conf": "logging.conf",
    "maximum_migration_times": {
-        "default": 30
+        "default": 60
+    },
+    "XInnodb_rows_changed": {
+        "default": 1000
+    },
+    "Innodb_rows_read": {
+        "default": 1000
    }
 }

--- a/datasets/datasets_trivial_500/nova_trivial_500/config.json
+++ b/datasets/datasets_trivial_500/nova_trivial_500/config.json
@ -1,11 +1,18 @@
 {
+    "Innodb_rows_read": {
+        "default": 1000
+    }, 
+    "XInnodb_rows_changed": {
+        "default": 1000
+    }, 
    "database": "nova_dataset_trivial_500", 
    "db_pass": "tester", 
    "db_user": "nova", 
    "logging_conf": "logging.conf", 
    "maximum_migration_times": {
-        "138": 42.0, 
-        "default": 30
+        "151->152": 84.0, 
+        "152->151": 103.0, 
+        "default": 60
    }, 
    "project": "openstack/nova", 
    "seed_data": "nova_trivial_500.sql", 
--- a/datasets/datasets_trivial_500/nova_trivial_500/input.json
+++ b/datasets/datasets_trivial_500/nova_trivial_500/input.json
@ -7,7 +7,13 @@
    "seed_data": "nova_trivial_500.sql",
    "logging_conf": "logging.conf",
    "maximum_migration_times": {
-        "default": 30
+        "default": 60
+    },
+    "XInnodb_rows_changed": {
+        "default": 1000
+    },
+    "Innodb_rows_read": {
+        "default": 1000
    }
 }

--- a/datasets/datasets_trivial_6000/nova_trivial_6000/config.json
+++ b/datasets/datasets_trivial_6000/nova_trivial_6000/config.json
@ -1,11 +1,20 @@
 {
+    "Innodb_rows_read": {
+        "default": 1000
+    }, 
+    "XInnodb_rows_changed": {
+        "default": 1000
+    }, 
    "database": "nova_dataset_trivial_6000", 
    "db_pass": "tester", 
    "db_user": "nova", 
    "logging_conf": "logging.conf", 
    "maximum_migration_times": {
-        "152": 74.0, 
-        "default": 30
+        "151->152": 159.0, 
+        "152->151": 195.0, 
+        "184->185": 66.0, 
+        "186->185": 144.0, 
+        "default": 60
    }, 
    "project": "openstack/nova", 
    "seed_data": "nova_trivial_6000.sql", 
--- a/datasets/datasets_trivial_6000/nova_trivial_6000/input.json
+++ b/datasets/datasets_trivial_6000/nova_trivial_6000/input.json
@ -7,7 +7,13 @@
    "seed_data": "nova_trivial_6000.sql",
    "logging_conf": "logging.conf",
    "maximum_migration_times": {
-        "default": 30
+        "default": 60
+    },
+    "XInnodb_rows_changed": {
+        "default": 1000
+    },
+    "Innodb_rows_read": {
+        "default": 1000
    }
 }

--- a/datasets/datasets_user_001/user_001/config.json
+++ b/datasets/datasets_user_001/user_001/config.json
@ -1,18 +1,49 @@
 {
+    "Innodb_rows_read": {
+        "148->149": 110000, 
+        "151->152": 3470000, 
+        "159->160": 200000, 
+        "160->161": 390000, 
+        "202->203": 260000, 
+        "205->206": 140000, 
+        "215->216": 930000, 
+        "default": 100000
+    }, 
+    "XInnodb_rows_changed": {
+        "148->149": 110000, 
+        "151->152": 3200000, 
+        "184->185": 140000, 
+        "193->194": 150000, 
+        "202->203": 520000, 
+        "203->204": 260000, 
+        "205->206": 190000, 
+        "215->216": 260000, 
+        "229->230": 140000, 
+        "default": 100000
+    }, 
    "database": "nova_datasets_user_001", 
    "db_pass": "tester", 
    "db_user": "nova", 
    "logging_conf": "logging.conf", 
    "maximum_migration_times": {
-        "135": 62.0, 
-        "138": 44.0, 
-        "149": 87.0, 
-        "152": 241.0, 
-        "159": 86.0, 
-        "205": 51.0, 
-        "206": 63.0, 
-        "216": 102.0, 
-        "230": 53.0, 
+        "134->135": 116.0, 
+        "135->134": 97.0, 
+        "137->138": 85.0, 
+        "138->137": 100.0, 
+        "148->149": 135.0, 
+        "149->148": 158.0, 
+        "151->152": 333.0, 
+        "152->151": 330.0, 
+        "158->159": 136.0, 
+        "159->158": 168.0, 
+        "186->185": 569.0, 
+        "204->205": 97.0, 
+        "205->204": 98.0, 
+        "205->206": 116.0, 
+        "206->205": 106.0, 
+        "215->216": 137.0, 
+        "229->230": 122.0, 
+        "230->229": 84.0, 
        "_138_bugs": [
            1263835
        ], 
@ -22,7 +53,7 @@
        "_205_bugs": [
            1263868
        ], 
-        "default": 30
+        "default": 60
    }, 
    "project": "openstack/nova", 
    "seed_data": "nova_user_001.sql", 
--- a/datasets/datasets_user_001/user_001/input.json
+++ b/datasets/datasets_user_001/user_001/input.json
@ -7,17 +7,16 @@
    "seed_data": "nova_user_001.sql",
    "logging_conf": "logging.conf",
    "maximum_migration_times": {
-        "default": 30,
-        "135": 120,
-        "138": 180,
+        "default": 60,
        "_138_bugs": [1263835],
-        "149": 240,
        "_149_bugs": [1263836],
-        "152": 300,
-        "159": 120,
-        "205": 120,
-        "_205_bugs": [1263868],
-        "216": 180
+        "_205_bugs": [1263868]
+    },
+    "XInnodb_rows_changed": {
+        "default": 100000
+    },
+    "Innodb_rows_read": {
+        "default": 100000
    }
 }

--- a/datasets/datasets_user_002/user_002/config.json
+++ b/datasets/datasets_user_002/user_002/config.json
@ -1,10 +1,16 @@
 {
+    "Innodb_rows_read": {
+        "default": 100000
+    }, 
+    "XInnodb_rows_changed": {
+        "default": 100000
+    }, 
    "database": "nova_dataset_user_002", 
    "db_pass": "tester", 
    "db_user": "nova", 
    "logging_conf": "logging.conf", 
    "maximum_migration_times": {
-        "default": 30
+        "default": 60
    }, 
    "project": "openstack/nova", 
    "seed_data": "nova_user_002.sql", 
--- a/datasets/datasets_user_002/user_002/input.json
+++ b/datasets/datasets_user_002/user_002/input.json
@ -7,7 +7,13 @@
    "seed_data": "nova_user_002.sql",
    "logging_conf": "logging.conf",
    "maximum_migration_times": {
-        "default": 30
+        "default": 60
+    },
+    "XInnodb_rows_changed": {
+        "default": 100000
+    },
+    "Innodb_rows_read": {
+        "default": 100000
    }
 }

--- a/turbo_hipster/cmd/analyse_historical.py
+++ b/turbo_hipster/cmd/analyse_historical.py
@ -59,6 +59,7 @@ def main():

    # Open the results database
    db = MySQLdb.connect(host=config['results']['host'],
+                         port=config['results'].get('port', 3306),
                         user=config['results']['username'],
                         passwd=config['results']['password'],
                         db=config['results']['database'])
@ -83,16 +84,28 @@ def main():
                    if not 'duration' in migration:
                        continue

-                    cursor.execute('insert ignore into summary'
-                                   '(path, parsed_at, engine, dataset, '
-                                   'migration, duration, stats_json) '
-                                   'values("%s", now(), "%s", '
-                                   '"%s", "%s", %d, "%s");'
-                                   % (item['name'], engine, dataset,
-                                      '%s->%s' % (migration['from'],
-                                                  migration['to']),
-                                      migration['duration'],
-                                      migration['stats']))
+                    if migration['stats']:
+                        cursor.execute('insert ignore into summary'
+                                       '(path, parsed_at, engine, dataset, '
+                                       'migration, duration, stats_json) '
+                                       'values(%s, now(), %s, '
+                                       '%s, %s, %s, %s);',
+                                       (item['name'], engine, dataset,
+                                        '%s->%s' % (migration['from'],
+                                                    migration['to']),
+                                        migration['duration'],
+                                        json.dumps(migration['stats'])))
+                    else:
+                        cursor.execute('insert ignore into summary'
+                                       '(path, parsed_at, engine, dataset, '
+                                       'migration, duration, stats_json) '
+                                       'values(%s, now(), %s, '
+                                       '%s, %s, %s, NULL);',
+                                       (item['name'], engine, dataset,
+                                        '%s->%s' % (migration['from'],
+                                                    migration['to']),
+                                        migration['duration']))
+
                cursor.execute('commit;')

        items = connection.get_container(swift_config['container'],
--- a/turbo_hipster/cmd/report_historical.py
+++ b/turbo_hipster/cmd/report_historical.py
@ -17,7 +17,7 @@

 import json
 import math
-import numpy
+import MySQLdb
 import os
 import sys

@ -29,35 +29,63 @@ def main():


 def process_dataset(dataset):
-    with open('results.json') as f:
-        results = json.loads(f.read())
+    with open('/etc/turbo-hipster/config.json', 'r') as config_stream:
+        config = json.load(config_stream)
+    db = MySQLdb.connect(host=config['results']['host'],
+                         port=config['results'].get('port', 3306),
+                         user=config['results']['username'],
+                         passwd=config['results']['password'],
+                         db=config['results']['database'])
+    cursor = db.cursor(MySQLdb.cursors.DictCursor)

    migrations = {}
    all_times = {}
+    stats_summary = {}

    for engine in ['mysql', 'percona']:
-        print
-        print 'Dataset: %s' % dataset
-        print 'Engine: %s' % engine
-        print
+        print '%s, %s' % (dataset, engine)
+        cursor.execute('select distinct(migration) from summary where '
+                       'engine="%s" and dataset="%s" order by migration;'
+                       % (engine, dataset))
+        migrations_list = []
+        for row in cursor:
+            migrations_list.append(row['migration'])

-        for migration in sorted(results[engine][dataset]):
-            times = []
+        for migration in migrations_list:
            all_times.setdefault(migration, [])
-            for time in results[engine][dataset][migration]:
-                for i in range(results[engine][dataset][migration][time]):
-                    times.append(int(time))
-                    all_times[migration].append(int(time))

-            times = sorted(times)
-            emit_summary(engine, times, migrations, migration)
+            cursor.execute('select distinct(duration), count(*) from summary '
+                           'where engine="%s" and dataset="%s" and '
+                           'migration="%s" group by duration;'
+                           % (engine, dataset, migration))
+            for row in cursor:
+                for i in range(row['count(*)']):
+                    all_times[migration].append(row['duration'])

-    print
-    print 'Dataset: %s' % dataset
-    print 'Engine: combined'
-    print
-    for migration in sorted(all_times.keys()):
-        emit_summary('combined', all_times[migration], migrations, migration)
+            cursor.execute('select stats_json from summary where engine="%s" '
+                           'and dataset="%s" and migration="%s" and '
+                           'not (stats_json = "{}");'
+                           % (engine, dataset, migration))
+            for row in cursor:
+                stats = json.loads(row['stats_json'])
+                for key in stats:
+                    stats_summary.setdefault(migration, {})
+                    stats_summary[migration].setdefault(key, {})
+                    stats_summary[migration][key].setdefault(stats[key], 0)
+                    stats_summary[migration][key][stats[key]] += 1
+
+                # Composed stats
+                rows_changed = 0
+                for key in ['Innodb_rows_updated',
+                            'Innodb_rows_inserted',
+                            'Innodb_rows_deleted']:
+                    rows_changed += stats.get(key, 0)
+
+                stats_summary[migration].setdefault('XInnodb_rows_changed', {})
+                stats_summary[migration]['XInnodb_rows_changed'].setdefault(
+                    rows_changed, 0)
+                stats_summary[migration]['XInnodb_rows_changed'][rows_changed]\
+                    += 1

    with open('results.txt', 'w') as f:
        f.write('Migration,mysql,percona\n')
@ -75,10 +103,33 @@ def process_dataset(dataset):
        config = json.loads(f.read())

    for migration in sorted(all_times.keys()):
-        minimum, mean, maximum, stddev = analyse(all_times[migration])
-        recommend = mean + 2 * stddev
-        if recommend > 30.0:
-            config['maximum_migration_times'][migration] = math.ceil(recommend)
+        # Timing
+        config_max = config['maximum_migration_times']['default']
+        l = len(all_times[migration])
+        if l > 10:
+            sorted_all_times = sorted(all_times[migration])
+            one_percent = int(math.ceil(l / 100))
+            recommend = sorted_all_times[-one_percent] + 30
+            if recommend > config_max:
+                config['maximum_migration_times'][migration] = \
+                    math.ceil(recommend)
+
+        # Innodb stats
+        if not migration in stats_summary:
+            continue
+
+        for stats_key in ['XInnodb_rows_changed', 'Innodb_rows_read']:
+            config_max = config[stats_key]['default']
+
+            values = []
+            results = stats_summary[migration].get(stats_key, {})
+            for result in results:
+                values.append(result)
+
+            max_value = max(values)
+            rounding = max_value % 10000
+            if max_value > config_max:
+                config[stats_key][migration] = max_value + (10000 - rounding)

    with open(os.path.join(config_path, 'config.json'), 'w') as f:
        f.write(json.dumps(config, indent=4, sort_keys=True))
@ -94,40 +145,6 @@ def omg_hard_to_predict_names(dataset):
    return dataset


-def analyse(times):
-    np_times = numpy.array(times)
-    minimum = np_times.min()
-    mean = np_times.mean()
-    maximum = np_times.max()
-    stddev = np_times.std()
-    return minimum, mean, maximum, stddev
-
-
-def emit_summary(engine, times, migrations, migration):
-    minimum, mean, maximum, stddev = analyse(times)
-    failed_threshold = int(max(30.0, mean + stddev * 2))
-
-    failed = 0
-    for time in times:
-        if time > failed_threshold:
-            failed += 1
-
-    migrations.setdefault(migration, {})
-    migrations[migration][engine] = ('%.02f;%0.2f;%.02f'
-                                     % (mean - 2 * stddev,
-                                        mean,
-                                        mean + 2 * stddev))
-
-    if failed_threshold != 30 or failed > 0:
-        print ('%s: Values range from %s to %s seconds. %d values. '
-               'Mean is %.02f, stddev is %.02f.\n    '
-               'Recommend max of %d. With this value %.02f%% of tests '
-               'would have failed.'
-               % (migration, minimum, maximum,
-                  len(times), mean, stddev, failed_threshold,
-                  failed * 100.0 / len(times)))
-
-
 if __name__ == '__main__':
    sys.path.insert(0, os.path.abspath(
                    os.path.join(os.path.dirname(__file__), '../')))