turbo-hipster/turbo_hipster/cmd/report_historical.py

#!/usr/bin/python2
#
# Copyright 2013 Rackspace Australia
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.


import json
import math
import MySQLdb
import os
import sys
import yaml


def main():
    for dataset in ['devstack_131007', 'devstack_150', 'trivial_500',
                    'trivial_6000', 'user_001', 'user_002']:
        process_dataset(dataset)


def process_dataset(dataset):
    with open('/etc/turbo-hipster/config.yaml', 'r') as config_stream:
        config = yaml.safe_load(config_stream)
    db = MySQLdb.connect(host=config['results']['host'],
                         port=config['results'].get('port', 3306),
                         user=config['results']['username'],
                         passwd=config['results']['password'],
                         db=config['results']['database'])
    cursor = db.cursor(MySQLdb.cursors.DictCursor)

    migrations = {}
    all_times = {}
    stats_summary = {}

    for engine in ['mysql', 'percona']:
        print '%s, %s' % (dataset, engine)
        cursor.execute('select distinct(migration) from summary where '
                       'engine="%s" and dataset="%s" order by migration;'
                       % (engine, dataset))
        migrations_list = []
        for row in cursor:
            migrations_list.append(row['migration'])

        for migration in migrations_list:
            all_times.setdefault(migration, [])

            cursor.execute('select distinct(duration), count(*) from summary '
                           'where engine="%s" and dataset="%s" and '
                           'migration="%s" group by duration;'
                           % (engine, dataset, migration))
            for row in cursor:
                for i in range(row['count(*)']):
                    all_times[migration].append(row['duration'])

            cursor.execute('select stats_json from summary where engine="%s" '
                           'and dataset="%s" and migration="%s" and '
                           'not (stats_json = "{}");'
                           % (engine, dataset, migration))
            for row in cursor:
                stats = json.loads(row['stats_json'])
                for key in stats:
                    stats_summary.setdefault(migration, {})
                    stats_summary[migration].setdefault(key, {})
                    stats_summary[migration][key].setdefault(stats[key], 0)
                    stats_summary[migration][key][stats[key]] += 1

                # Composed stats
                rows_changed = 0
                for key in ['Innodb_rows_updated',
                            'Innodb_rows_inserted',
                            'Innodb_rows_deleted']:
                    rows_changed += stats.get(key, 0)

                stats_summary[migration].setdefault('XInnodb_rows_changed', {})
                stats_summary[migration]['XInnodb_rows_changed'].setdefault(
                    rows_changed, 0)
                stats_summary[migration]['XInnodb_rows_changed'][rows_changed]\
                    += 1

    with open('results.txt', 'w') as f:
        f.write('Migration,mysql,percona\n')
        for migration in sorted(migrations.keys()):
            f.write('%s' % migration)
            for engine in ['mysql', 'percona']:
                f.write(',%s' % migrations[migration].get(engine, ''))
            f.write('\n')

    # Write out the dataset config as a json blob
    config_path = os.path.join('datasets',
                               'datasets_%s' % dataset,
                               omg_hard_to_predict_names(dataset))
    with open(os.path.join(config_path, 'input.json')) as f:
        config = json.loads(f.read())

    for migration in sorted(all_times.keys()):
        # Timing
        config_max = config['maximum_migration_times']['default']
        l = len(all_times[migration])
        if l > 10:
            sorted_all_times = sorted(all_times[migration])
            one_percent = int(math.ceil(l / 100))
            recommend = sorted_all_times[-one_percent] + 30
            if recommend > config_max:
                config['maximum_migration_times'][migration] = \
                    math.ceil(recommend)

        # Innodb stats
        if migration not in stats_summary:
            continue

        for stats_key in ['XInnodb_rows_changed', 'Innodb_rows_read']:
            config_max = config[stats_key]['default']

            values = []
            results = stats_summary[migration].get(stats_key, {})
            for result in results:
                values.append(result)

            max_value = max(values)
            rounding = max_value % 10000
            if max_value > config_max:
                config[stats_key][migration] = max_value + (10000 - rounding)

    with open(os.path.join(config_path, 'config.json'), 'w') as f:
        f.write(json.dumps(config, indent=4, sort_keys=True))


def omg_hard_to_predict_names(dataset):
    if dataset.startswith('trivial'):
        return 'nova_%s' % dataset
    if dataset == 'devstack_150':
        return 'datasets_devstack_150'
    if dataset == 'devstack_131007':
        return '131007_devstack_export'
    return dataset


if __name__ == '__main__':
    sys.path.insert(0, os.path.abspath(
                    os.path.join(os.path.dirname(__file__), '../')))
    main()