Add support for unrestricted rate/iops for storage scale

Change-Id: I13c80470b2dbe320b57bdb06c37a2a6a57af480c
This commit is contained in:
ahothan 2016-08-15 12:05:06 -07:00
parent 1f0c94a2e5
commit c7fae16a89
9 changed files with 375 additions and 66 deletions

1
.gitignore vendored
View File

@ -53,6 +53,7 @@ ChangeLog
.*sw?
*cscope*
.ropeproject/
*.diff
.DS_store

View File

@ -155,6 +155,51 @@ What kind of VM storage are supported?
KloudBuster cam measure the performance of ephemeral disks and Cinder attached
volumes at scale.
How to measure the fastest IOPs or Throughput from a VM ?
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This feature is only available from the CLI by using a properly defined configuration file.
To measure the fastest IOPs, omit the "rate_iops" and "rate" parameters from the
workload definition in the configuration file.
The file kloudbuster/cfg.1GB.yaml provides and example of configuration file to measure
the highest IOPs and throughput for random/sequential, read/write for 1 VM on 1 1GB file
residing on an attached volume.
How to interpret the generated results in json?
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
General parameters:
- test_mode: is always "storage"
- storage_target: indicates if the storage used is a Cinder block storage ("volume") or an ephemeral disk,
- time: time the test was executed
- version: KloudBuster version
- tool: the FIO version used to generate the results
- block_size: the unit in the value indicates the unit (e.g "4k" = 4 kilobytes)
- iodepth: number of in-flight operations,
- total_client_vms: total number of VMs running an FIO client
- timeout_vms: number of VM/fio clients that did not return a result within the allocated time
(this parameter is absent if there was no VM timing out, should not be present for most runs)
These parameters represent aggregated values for all VMs (to get a per VM count, divide the value by the number of
client Vms (total_client_vms):
- read_runtime_ms, write_runtime_ms: aggregated time the fio tests ran in msec as measured by fio
- rate_iops: aggregated requested number of IOPs, 0 or missing = unlimited (i.e. test as high as possible)
- read_iops, wrote_iops: aggregated read or write IO operations per second as measured by fio
(if rate_iops is not zero, will be <= rate_iops)
- rate: aggregated requested kilobytes per second, 0 or missing = unlimited (i.e. test as high as possible)
- read_bw, write_bw: aggregated read or write bandwidth in KB/sec
(if rate is not zero, will be <= rate)
- read_KB, write_KB: aggregated number of kilobytes read or written as measured by fio
Latency values are reported using a list of pre-defined percentiles:
- read_hist: a list of pairs where each pair has a percentile value and a latency value in micro-seconds
e.g. [99.9, 1032] indicates that 99.9% of all I/O operations will take 1032 usec or less to complete
Common Pitfalls and Limitations
-------------------------------

67
kloudbuster/cfg.1GB.yaml Normal file
View File

@ -0,0 +1,67 @@
# KloudBuster Storage Scale Test configuration file
# Workload for reading and writing 1GB file in random and sequential mode
# as fast as possible using just 1 VM
# (pass this file using the -c option)
# Example of invocation (git installation)
# python kloudbuster.py --rc pod2rc.sh --passwd uVpEeMWyKa7Q2zGV -c cfg.1G.yaml --storage --json res-1GB.json
#
client:
# Flavor to use for the test images
flavor:
# Number of vCPUs for the flavor
vcpus: 1
# Memory for the flavor in MB
ram: 2048
# Size of local disk in GB
disk: 0
# Storage specific configs during staging phase
# Will effect only in storage testing mode.
storage_stage_configs:
# The number of VMs for running storage tests
vm_count: 1
# KloudBuster supports to run storage tests on Cinder Volumes or Ephemeral
# Disks. Available options to be configured: ['volume', 'ephemeral'].
target: 'volume'
# Volumes size in GB for each VM
disk_size: 10
# The size of the test file for running IO tests in GB. Must be less or
# equal than disk_size.
io_file_size: 1
#
# runtime:
# Maximum test duration in seconds, each test will finish
# when the configured runtime elapses or when the full 1GB
# file is read or written (whichever is earlier)
# block_size:
# Block size for I/O operations
# iodepth:
# Number of I/O operations to keep in flight against the file
# iops and rate are not configured, so they will be unrestricted
# (as high as possible)
#
storage_tool_configs:
- description: 'Random Read 1GB/1VM/4KB blocks/IO depth 4'
mode: 'randread'
block_size: '4k'
iodepth: 4
runtime: 120
- description: 'Random Write 1GB/1VM/4KB blocks/IO depth 4'
mode: 'randwrite'
block_size: '4k'
iodepth: 4
runtime: 240
- description: 'Sequential Read 1GB/1VM/64KB blocks/IO depth 64'
mode: 'read'
block_size: '64k'
iodepth: 64
runtime: 60
- description: 'Sequential Write 1GB/1VM/64KB blocks/IO depth 64'
mode: 'write'
block_size: '64k'
iodepth: 64
runtime: 60

View File

@ -258,7 +258,7 @@ client:
# Volumes size in GB for each VM
disk_size: 10
# The size of the test file for running IO tests in GB. Must be less or
# equal than disk_size.
# equal than disk_size. Defaults to 1 GB
io_file_size: 1
# Storage tool specific configs (per VM)
@ -266,10 +266,10 @@ client:
# a default profile which consists of six testing scenarios:
# (1) Random read
# (2) Random write
# (3) Random mixed read/write
# (3) Random mixed read/write (70% read, 30% write)
# (4) Sequential read
# (5) Sequential write
# (6) Sequential mixed read/write
# (6) Sequential mixed read/write (70% read, 30% write)
#
# Accepted testing parameters for each scenario:
# description: (Required)
@ -278,19 +278,23 @@ client:
# Self-explained with the name, must be one of the below:
# ['randread', 'randwrite', 'randrw', 'read', 'write', 'rw']
# runtime: (Required)
# Test duration in seconds
# Maximum test duration in seconds or 0 if unlimited
# block_size: (Required, default=4k)
# Block size for I/O units
# Block size for I/O operations
# iodepth: (Required, default=1)
# Number of I/O units to keep in flight against the file
# Number of I/O operations to keep in flight against the file
# rate_iops: (Optional, default=unlimited)
# Cap the bandwidth to this number of IOPS
# Cap the bandwidth to this number of IOPS for each VM
# This is a per VM cap.
# Omit or use 0 for unrestricted iops
# rate: (Optional, default=unlimited)
# Cap the bandwidth to this number of bytes/sec, normal postfix rules apply
# Cap the bandwidth to this number of bytes/sec for each VM, normal postfix rules apply
# This is a per VM cap.
# Omit or use 0 for unrestricted rate
# rwmixread: (Required when mode is 'randrw' or 'rw')
# Percentage of a mixed workload that should be reads
# Percentage of a mixed workload that should be reads (a value between 0 and 100)
# extra_opts: (Optional, default=None)
# Extra options that will be added to the FIO client
# Extra options that will be added verbatim to the FIO client
storage_tool_configs:
- description: 'Random Read'
mode: 'randread'

View File

@ -76,6 +76,8 @@ class Credentials(object):
self.rc_tenant_name = value
elif name == 'CACERT':
self.rc_cacert = value
elif name == 'PASSWORD':
self.rc_password = value
# Read a openrc file and take care of the password
# The 2 args are passed from the command line and can be None

View File

@ -0,0 +1,168 @@
{
"jobs": [{
"job options": {
"runtime": "30",
"rw": "randread",
"iodepth": "4",
"name": "kb_storage_test",
"bs": "4k"
},
"trim": {
"io_bytes": 0,
"slat": {
"max": 0,
"mean": 0.0,
"stddev": 0.0,
"min": 0
},
"total_ios": 0,
"drop_ios": 0,
"bw_max": 0,
"bw_mean": 0.0,
"iops": 0.0,
"short_ios": 0,
"bw": 0,
"lat": {
"max": 0,
"mean": 0.0,
"stddev": 0.0,
"min": 0
},
"bw_agg": 0.0,
"clat": {
"min": 0,
"max": 0,
"hist": "HISTFAAAABh4nJNpmSzMgADMUJoRTAitaIDxAT10Aoc=",
"stddev": 0.0,
"mean": 0.0
},
"runtime": 0,
"bw_min": 0,
"bw_dev": 0.0
},
"latency_window": 0,
"sys_cpu": 8.43,
"latency_us": {
"10": 0.0,
"750": 26.23,
"20": 0.0,
"50": 0.0,
"2": 0.0,
"4": 0.0,
"100": 0.0,
"250": 0.01,
"500": 73.38,
"1000": 0.34
},
"latency_ms": {
"10": 0.01,
"750": 0.0,
"20": 0.01,
">=2000": 0.0,
"50": 0.0,
"2000": 0.0,
"2": 0.03,
"4": 0.01,
"100": 0.0,
"250": 0.0,
"500": 0.0,
"1000": 0.0
},
"read": {
"io_bytes": 976560,
"slat": {
"max": 508,
"mean": 6.35,
"stddev": 3.96,
"min": 1
},
"total_ios": 244140,
"drop_ios": 0,
"bw_max": 36632,
"bw_mean": 34548.96,
"iops": 8647.33,
"short_ios": 0,
"bw": 34589,
"lat": {
"max": 10026,
"mean": 460.91,
"stddev": 96.86,
"min": 243
},
"bw_agg": 99.88,
"clat": {
"min": 241,
"max": 10020,
"hist": "HISTFAAAAa14nDVNTUhbQRjM7tt5eWqmStQg2lfFFhGhglVQIi/+1Gp+6KUHhYLQg8eee1II0kMQKUHFg4iEIBLeQSQH8aAgQTxL6UnMJZQiPQQpEqSU0M2L7uw33wzffLvPv262+nzit69+jMcuPArmkk++aEhhCqUroLlNPHuFMObxERmBkkBVoCLxx0AKyJtIW8g3oNyEfWK9BZUg9tpR7sBRF7I9uOvFah9uB5AdRGUIuyMojCE3jmQE15M4n0J+Bm4UpVmcR2viJo5qAt8TuI/BjWMnge0Y0gmk4rXpcQy5OVzN4uQd/r3Fzxn8ncKBvhM4jiA5gSsHD2Gsj2JjFLf6r2Fx8Yn7C3Q/sDDH9DS3wlwbYWGQd/3MveRuNzOdzIZYamWlmXvNLDfxsJEZi0k/t/08BFPgg0HX4DeDPyRdyargL8FTwUvBZX7hEif5mVFOM8Iw+znANwyxV/c2BtmjdUirF7SodNXRSJMBmlB2i23ayrZs6UitTFtqp7TWcLy+6Glv7rGjnnwNtudWartqRecX5Wv5Xj1mLKe+pYp+L+TIYkCeQT9xZujsf6zmb3o=",
"stddev": 96.84,
"mean": 454.31
},
"runtime": 28233,
"bw_min": 27688,
"bw_dev": 1591.33
},
"latency_depth": 4,
"majf": 0,
"ctx": 184833,
"latency_percentile": 100.0,
"minf": 16,
"elapsed": 29,
"write": {
"io_bytes": 0,
"slat": {
"max": 0,
"mean": 0.0,
"stddev": 0.0,
"min": 0
},
"total_ios": 0,
"drop_ios": 0,
"bw_max": 0,
"bw_mean": 0.0,
"iops": 0.0,
"short_ios": 0,
"bw": 0,
"lat": {
"max": 0,
"mean": 0.0,
"stddev": 0.0,
"min": 0
},
"bw_agg": 0.0,
"clat": {
"min": 0,
"max": 0,
"hist": "HISTFAAAABh4nJNpmSzMgADMUJoRTAitaIDxAT10Aoc=",
"stddev": 0.0,
"mean": 0.0
},
"runtime": 0,
"bw_min": 0,
"bw_dev": 0.0
},
"eta": 0,
"iodepth_level": {
"16": 0.0,
"32": 0.0,
"1": 0.1,
"2": 0.1,
"4": 100.0,
">=64": 0.0,
"8": 0.0
},
"usr_cpu": 2.37,
"error": 0,
"latency_target": 0,
"jobname": "kb_storage_test",
"groupid": 0
}],
"global options": {
"ioengine": "libaio",
"direct": "1",
"filename": "/kb_mnt/kb_storage_test.bin"
},
"time": "Sat Aug 13 17:50:14 2016",
"timestamp_ms": 1471110614720,
"timestamp": 1471110614,
"fio version": "fio-2.12"
}

View File

@ -22,6 +22,10 @@ import log as logging
LOG = logging.getLogger(__name__)
def assign_dict(dest, key, value, cond_key=None):
if not cond_key or cond_key in dest:
if value:
dest[key] = value
class FioTool(PerfTool):
@ -34,46 +38,45 @@ class FioTool(PerfTool):
# Sample Output:
# Refer to kloudbuster/fio_example.json for a sample output
parsed_output = {'tool': self.name}
try:
result = json.loads(stdout)
read_iops = result['jobs'][0]['read']['iops']
read_bw = result['jobs'][0]['read']['bw']
write_iops = result['jobs'][0]['write']['iops']
write_bw = result['jobs'][0]['write']['bw']
read_hist = result['jobs'][0]['read']['clat']['hist']
write_hist = result['jobs'][0]['write']['clat']['hist']
assign_dict(parsed_output, 'tool', result['fio version'])
job = result['jobs'][0]
assign_dict(parsed_output, 'read_iops', job['read']['iops'])
assign_dict(parsed_output, 'read_bw', job['read']['bw'])
assign_dict(parsed_output, 'read_runtime_ms', job['read']['runtime'])
assign_dict(parsed_output, 'read_KB', job['read']['io_bytes'])
assign_dict(parsed_output, 'read_hist', job['read']['clat']['hist'], 'read_bw')
assign_dict(parsed_output, 'write_iops', job['write']['iops'])
assign_dict(parsed_output, 'write_bw', job['write']['bw'])
assign_dict(parsed_output, 'write_runtime_ms', job['write']['runtime'])
assign_dict(parsed_output, 'write_KB', job['write']['io_bytes'])
assign_dict(parsed_output, 'write_hist', job['write']['clat']['hist'], 'write_bw')
except Exception:
return self.parse_error('Could not parse: "%s"' % (stdout))
parsed_output = {'tool': self.name}
if read_iops:
parsed_output['read_iops'] = read_iops
if read_bw:
parsed_output['read_bw'] = read_bw
if write_iops:
parsed_output['write_iops'] = write_iops
if write_bw:
parsed_output['write_bw'] = write_bw
if read_bw and read_hist:
parsed_output['read_hist'] = read_hist
if write_bw and write_hist:
parsed_output['write_hist'] = write_hist
return parsed_output
@staticmethod
def consolidate_results(results):
all_res = {'tool': 'fio'}
total_count = len(results)
if not total_count:
return all_res
return {'tool': 'fio'}
for key in ['read_iops', 'read_bw', 'write_iops', 'write_bw']:
all_res[key] = 0
all_res = {}
for key in ['read_iops', 'read_bw', 'write_iops', 'write_bw',
'read_runtime_ms', 'write_runtime_ms',
'read_KB', 'write_KB']:
total = 0
for item in results:
all_res[key] += item['results'].get(key, 0)
all_res[key] = int(all_res[key])
total += item['results'].get(key, 0)
if total:
all_res[key] = int(total)
all_res['tool'] = results[0]['results']['tool']
clat_list = []
# perc_list = [1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99, 99.5, 99.9, 99.95, 99.99]

View File

@ -89,15 +89,20 @@ class KBRunner_Storage(KBRunner):
'parameter': tool_config}
self.send_cmd('EXEC', 'storage', func)
# Give additional 30 seconds for everybody to report results
timeout = tool_config['runtime'] + 30
if tool_config['runtime']:
timeout = tool_config['runtime'] + 30
else:
# 0 = unlimited, for now set max to 24 hours
timeout = 60 * 60 * 24
cnt_pending = self.polling_vms(timeout)[2]
if cnt_pending != 0:
LOG.warning("Testing VMs are not returning results within grace period, "
"summary shown below may not be accurate!")
# Parse the results from storage benchmarking tool
for key, instance in self.client_dict.items():
self.result[key] = instance.perf_client_parser(**self.result[key])
if cnt_pending:
LOG.error("%d testing VM(s) not returning results within %d sec, "
"summary shown will be partial!" % (cnt_pending, timeout))
else:
# Parse the results from storage benchmarking tool
for key, instance in self.client_dict.items():
self.result[key] = instance.perf_client_parser(**self.result[key])
return cnt_pending
def single_run(self, active_range=None, test_only=False):
try:
@ -121,9 +126,12 @@ class KBRunner_Storage(KBRunner):
LOG.info("Running test case %d of %d..." % (idx + 1, test_count))
self.report = {'seq': 0, 'report': None}
self.result = {}
self.run_storage_test(active_range, dict(cur_config))
LOG.kbdebug(dict(cur_config))
timeout_vms = self.run_storage_test(active_range, dict(cur_config))
# Call the method in corresponding tools to consolidate results
LOG.kbdebug(self.result.values())
tc_result = perf_tool.consolidate_results(self.result.values())
tc_result['description'] = cur_config['description']
tc_result['mode'] = cur_config['mode']
@ -135,12 +143,18 @@ class KBRunner_Storage(KBRunner):
tc_result['rate_iops'] = vm_count * cur_config['rate_iops']
if 'rate' in cur_config:
req_rate = cur_config['rate']
ex_unit = 'KMG'.find(req_rate[-1].upper())
req_rate = vm_count * int(req_rate[:-1]) * (1024 ** (ex_unit))\
if ex_unit != -1 else vm_count * int(req_rate)
if req_rate:
ex_unit = 'KMG'.find(req_rate[-1].upper())
req_rate = vm_count * int(req_rate[:-1]) * (1024 ** (ex_unit))\
if ex_unit != -1 else vm_count * int(req_rate)
tc_result['rate'] = req_rate
tc_result['total_client_vms'] = vm_count
tc_result['timeout_vms'] = timeout_vms
self.tool_result.append(tc_result)
if timeout_vms:
return timeout_vms
return 0
except KBInitVolumeException:
raise KBException("Could not initilize the volume.")
@ -175,7 +189,8 @@ class KBRunner_Storage(KBRunner):
description = "-- %s --" % self.header_formatter(cur_stage, len(self.client_dict))
LOG.info(description)
self.single_run(active_range=[0, target_vm_count - 1], test_only=test_only)
timeout_vms = self.single_run(active_range=[0, target_vm_count - 1],
test_only=test_only)
LOG.info('-- Stage %s: %s --' % (cur_stage, str(self.tool_result)))
cur_stage += 1
@ -198,7 +213,11 @@ class KBRunner_Storage(KBRunner):
'reaches the stop limit.')
tc_flag = False
break
if timeout_vms:
LOG.warning('KloudBuster is stopping the iteration because of there are %d '
'VMs timing out' % timeout_vms)
tc_flag = False
break
yield self.tool_result
else:
self.single_run(test_only=test_only)

View File

@ -325,7 +325,12 @@
if($scope.perrow.total_client_vms == 1 && countRep!=1) $scope.xaxis = 0;
else $scope.xaxis = $scope.perrow.total_client_vms;
$scope.xaxisList.push($scope.xaxis);
if (!$scope.perrow.rate_iops) {
$scope.perrow.rate_iops = 0;
}
if (!$scope.perrow.rate) {
$scope.perrow.rate = 0;
}
if (mode == "randread") {
$scope.data.push({
x: $scope.xaxis,
@ -335,9 +340,8 @@
"latency3": $scope.perrow.read_hist[4][1] / 1000,
"requested_rate": $scope.perrow.rate_iops / $scope.perrow.total_client_vms
});
max = $scope.perrow.rate_iops / $scope.perrow.total_client_vms;
}
if (mode == "randwrite") {
max = Math.max($scope.perrow.rate_iops, $scope.perrow.read_iops)
} else if (mode == "randwrite") {
$scope.data.push({
x: $scope.xaxis,
"IOPS": $scope.perrow.write_iops / $scope.perrow.total_client_vms,
@ -346,10 +350,8 @@
"latency3": $scope.perrow.write_hist[4][1] / 1000,
"requested_rate": $scope.perrow.rate_iops / $scope.perrow.total_client_vms
});
max = $scope.perrow.rate_iops / $scope.perrow.total_client_vms;
}
if (mode == "read") {
max = Math.max($scope.perrow.rate_iops, $scope.perrow.write_iops);
} else if (mode == "read") {
$scope.data.push({
x: $scope.xaxis,
"IOPS": $scope.perrow.read_bw / $scope.perrow.total_client_vms,
@ -358,10 +360,8 @@
"latency3": $scope.perrow.read_hist[4][1] / 1000,
"requested_rate": $scope.perrow.rate / $scope.perrow.total_client_vms
});
max = $scope.perrow.rate / $scope.perrow.total_client_vms;
}
if (mode == "write") {
max = Math.max($scope.perrow.rate, $scope.perrow.read_bw);
} else if (mode == "write") {
$scope.data.push({
x: $scope.xaxis,
"IOPS": $scope.perrow.write_bw / $scope.perrow.total_client_vms,
@ -370,12 +370,12 @@
"latency3": $scope.perrow.write_hist[4][1] / 1000,
"requested_rate": $scope.perrow.rate / $scope.perrow.total_client_vms
});
max = $scope.perrow.rate / $scope.perrow.total_client_vms;
max = Math.max($scope.perrow.rate, $scope.perrow.write_bw);
}
max = max / $scope.perrow.total_client_vms;
var pickColor = get_color();
var chName = "mode-" + $scope.perrow.mode + "_VM-" + $scope.perrow.total_client_vms;
$scope.pushTableData(chName, $scope.perrow, pickColor)
}
}
}