Fuel-CCP OpenStack reliability testing report

Change-Id: I7ddfcb6574a2c535ba5392eb10d75ada4b8cbd5d
This commit is contained in:
Ilya Shakhat 2017-03-23 15:35:46 +04:00 committed by Ilya Shakhat
parent 856aa8e0fd
commit 1d636de8e1
37 changed files with 92010 additions and 0 deletions

View File

@ -288,6 +288,7 @@ Reports
Test plan execution reports:
* :ref:`reliability_test_results_version_2`
* :ref:`reliability_test_results_version_2_containerized`
.. references:

View File

@ -0,0 +1,64 @@
configs:
private_interface: p1p1.602
public_interface: p1p1.602
ingress:
enabled: true
glance:
tls:
enabled: false
bootstrap:
enable: true
neutron:
tls:
enabled: false
physnets:
- name: "physnet1"
bridge_name: "br-ex"
interface: "p1p1.639"
flat: true
vlan_range: false
dvr: false
bootstrap:
internal:
enable: true
external:
enable: true
net_name: ext-net
subnet_name: ext-subnet
physnet: physnet1
network: 172.20.136.0/22
gateway: 172.20.136.1
nameserver: 8.8.8.8
pool:
start: 172.20.137.1
end: 172.20.139.250
keystone:
debug: true
security:
tls:
create_certificates: false
etcd:
tls:
enabled: false
db:
tls:
enabled: false
rabbitmq:
tls:
enabled: false
keystone:
tls:
enabled: false
cinder:
tls:
enabled: false
heat:
tls:
enabled: false
nova:
tls:
enabled: false
horizon:
tls:
enabled: false

View File

@ -0,0 +1,16 @@
{
"type": "ExistingCloud",
"auth_url": "https://identity.external:8443/v3",
"region_name": "RegionOne",
"endpoint_type": "public",
"admin": {
"username": "admin",
"password": "password",
"user_domain_name": "default",
"project_name": "admin",
"project_domain_name": "default"
},
"https_insecure": True,
"https_cacert": "",
}

View File

@ -0,0 +1,8 @@
cloud_management:
driver: fuel-ccp
args:
jump_host: 172.20.8.63
jump_user: root
username: root
password: r00tme
private_key_file: ~/.ssh/os_faults

View File

@ -0,0 +1,34 @@
repositories:
repos:
- git_url: https://git.openstack.org/openstack/fuel-ccp-ceph
name: fuel-ccp-ceph
- git_url: https://git.openstack.org/openstack/fuel-ccp-cinder
name: fuel-ccp-cinder
- git_url: https://git.openstack.org/openstack/fuel-ccp-debian-base
name: fuel-ccp-debian-base
- git_url: https://git.openstack.org/openstack/fuel-ccp-entrypoint
name: fuel-ccp-entrypoint
- git_url: https://git.openstack.org/openstack/fuel-ccp-etcd
name: fuel-ccp-etcd
- git_url: https://git.openstack.org/openstack/fuel-ccp-glance
name: fuel-ccp-glance
- git_url: https://git.openstack.org/openstack/fuel-ccp-heat
name: fuel-ccp-heat
- git_url: https://git.openstack.org/openstack/fuel-ccp-horizon
name: fuel-ccp-horizon
- git_url: https://git.openstack.org/openstack/fuel-ccp-ironic
name: fuel-ccp-ironic
- git_url: https://git.openstack.org/openstack/fuel-ccp-keystone
name: fuel-ccp-keystone
- git_url: https://git.openstack.org/openstack/fuel-ccp-galera
name: fuel-ccp-galera
- git_url: https://git.openstack.org/openstack/fuel-ccp-memcached
name: fuel-ccp-memcached
- git_url: https://git.openstack.org/openstack/fuel-ccp-neutron
name: fuel-ccp-neutron
- git_url: https://git.openstack.org/openstack/fuel-ccp-nova
name: fuel-ccp-nova
- git_url: https://git.openstack.org/openstack/fuel-ccp-openstack-base
name: fuel-ccp-openstack-base
- git_url: https://git.openstack.org/openstack/fuel-ccp-rabbitmq
name: fuel-ccp-rabbitmq

View File

@ -0,0 +1,71 @@
nodes:
# node[1-3]: Kubernetes
node([1|2|3])$: # 1-3
roles:
- controller
- openvswitch
- messaging
- db
node[4-6]$: # 4-6
roles:
- compute
- openvswitch
replicas:
database: 3
etcd: 1
rpc: 3
notifications: 3
glance-api: 3
glance-registry: 3
keystone: 3
nova-api: 3
nova-scheduler: 3
nova-conductor: 3
neutron-server: 3
neutron-metadata-agent: 3
horizon: 3
heat-api: 3
heat-engine: 3
roles:
db:
- database
messaging:
- rpc
- notifications
controller:
- etcd
- glance-api
- glance-registry
- heat-api
- heat-api-cfn
- heat-engine
- horizon
- keystone
- memcached
- neutron-dhcp-agent
- neutron-l3-agent
- neutron-metadata-agent
- neutron-server
- nova-api
- nova-conductor
- nova-consoleauth
- nova-novncproxy
- nova-scheduler
compute:
- nova-compute
- nova-libvirt
openvswitch:
- neutron-openvswitch-agent
- openvswitch-db
- openvswitch-vswitchd
services:
database:
service_def: galera
rpc:
service_def: rabbitmq
notifications:
service_def: rabbitmq

View File

@ -0,0 +1,30 @@
images:
tag: newton
sources:
openstack/cinder:
git_ref: stable/newton
git_url: https://github.com/openstack/cinder.git
openstack/glance:
git_ref: stable/newton
git_url: https://github.com/openstack/glance.git
openstack/heat:
git_ref: stable/newton
git_url: https://github.com/openstack/heat.git
openstack/horizon:
git_ref: stable/newton
git_url: https://github.com/openstack/horizon.git
openstack/keystone:
git_ref: stable/newton
git_url: https://github.com/openstack/keystone.git
openstack/neutron:
git_ref: stable/newton
git_url: https://github.com/openstack/neutron.git
openstack/nova:
git_ref: stable/newton
git_url: https://github.com/openstack/nova.git
openstack/requirements:
git_ref: stable/newton
git_url: https://git.openstack.org/openstack/requirements.git
openstack/sahara-dashboard:
git_ref: stable/newton
git_url: https://git.openstack.org/openstack/sahara-dashboard.git

View File

@ -0,0 +1,54 @@
.. _reliability_test_results_version_2_containerized:
===========================================
Containerized OpenStack reliability testing
===========================================
Test results
============
Environment description
^^^^^^^^^^^^^^^^^^^^^^^
This report contains results for :ref:`reliability_testing_version_2`
test plan. The data is collected in :ref:`intel_mirantis_performance_lab_1`.
Software
~~~~~~~~
This section describes installed software.
+-----------------+--------------------------------------------+
| Parameter | Value |
+-----------------+--------------------------------------------+
| OS | Ubuntu 16.04.2 |
+-----------------+--------------------------------------------+
| Docker | 1.13 |
+-----------------+--------------------------------------------+
| Kubernetes | 1.5.3 |
+-----------------+--------------------------------------------+
| OpenStack | Fuel-CCP (Newton) |
+-----------------+--------------------------------------------+
| Networking | Neutron OVS ML2 plugin with VxLAN, non-DVR |
+-----------------+--------------------------------------------+
Configs
~~~~~~~
* Fuel-CCP: :download:`configs/configs.yaml`,
:download:`configs/topology.yaml`, :download:`configs/repos.yaml`,
:download:`configs/versions.yaml`
* Rally: :download:`configs/deployment.yaml`
* OS-Faults: :download:`configs/os-faults.yaml`
Reports
^^^^^^^
.. toctree::
:maxdepth: 1
:glob:
reports/*/*/index
Reports are calculated on :download:`Raw Rally data <raw/raw_data.tar.xz>`

View File

@ -0,0 +1,324 @@
Keystone authentication with kill of Keystone on one node
=========================================================
This report is generated on results collected by execution of the following
Rally scenario:
.. code-block:: yaml
---
{% set repeat = repeat|default(5) %}
Authenticate.keystone:
{% for iteration in range(repeat) %}
-
runner:
type: "constant_for_duration"
duration: 60
concurrency: 20
context:
users:
tenants: 1
users_per_tenant: 1
hooks:
-
name: fault_injection
args:
action: kill keystone service on one node
trigger:
name: event
args:
unit: iteration
at: [100]
{% endfor %}
Summary
-------
+-----------------------+-----------+---------------------------------------+-------------------------------------------+
| Service downtime, s | MTTR, s | Absolute performance degradation, s | Relative performance degradation, ratio |
+=======================+===========+=======================================+===========================================+
| 2.8 ±1.1 | 8.3 ±2.4 | 3.10 ±0.23 | 16.35 ±0.92 |
+-----------------------+-----------+---------------------------------------+-------------------------------------------+
Metrics:
* `Service downtime` is the time interval between the first and
the last errors.
* `MTTR` is the mean time to recover service performance after
the fault.
* `Absolute performance degradation` is an absolute difference between
the mean of operation duration during recovery period and the baseline's.
* `Relative performance degradation` is the ratio between the mean
of operation duration during recovery period and the baseline's.
Details
-------
This section contains individual data for particular scenario runs.
Run #1
^^^^^^
.. image:: plot_1.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 75 | 0.25 | 0.26 | 0.064 | 0.4 |
+-----------+-------------+-----------+-----------+---------------------+
Service downtime
~~~~~~~~~~~~~~~~
The tested service is not available during the following time period(s).
+-----+---------------+
| # | Downtime, s |
+=====+===============+
| 1 | 0.024 ±0.024 |
+-----+---------------+
| 2 | 1.830 ±0.010 |
+-----+---------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 4.189 ±0.033 | 1.94 ±0.59 | 8.5 ±2.3 |
+-----+----------------------+---------------------------+------------------------+
Run #2
^^^^^^
.. image:: plot_2.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 75 | 0.26 | 0.27 | 0.071 | 0.41 |
+-----------+-------------+-----------+-----------+---------------------+
Service downtime
~~~~~~~~~~~~~~~~
The tested service is not available during the following time period(s).
+-----+----------------+
| # | Downtime, s |
+=====+================+
| 1 | 0.0015 ±0.0015 |
+-----+----------------+
| 2 | 1.3940 ±0.0024 |
+-----+----------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 7.198 ±0.068 | 4.0 ±1.2 | 15.7 ±4.4 |
+-----+----------------------+---------------------------+------------------------+
Run #3
^^^^^^
.. image:: plot_3.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 71 | 0.24 | 0.24 | 0.054 | 0.32 |
+-----------+-------------+-----------+-----------+---------------------+
Service downtime
~~~~~~~~~~~~~~~~
The tested service is not available during the following time period(s).
+-----+---------------+
| # | Downtime, s |
+=====+===============+
| 1 | 1.883 ±0.011 |
+-----+---------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 5.762 ±0.047 | 2.46 ±0.79 | 11.2 ±3.2 |
+-----+----------------------+---------------------------+------------------------+
Run #4
^^^^^^
.. image:: plot_4.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 72 | 0.32 | 0.32 | 0.098 | 0.49 |
+-----------+-------------+-----------+-----------+---------------------+
Service downtime
~~~~~~~~~~~~~~~~
The tested service is not available during the following time period(s).
+-----+---------------+
| # | Downtime, s |
+=====+===============+
| 1 | 1.191 ±0.013 |
+-----+---------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 5.796 ±0.042 | 2.61 ±0.80 | 9.0 ±2.5 |
+-----+----------------------+---------------------------+------------------------+
Run #5
^^^^^^
.. image:: plot_5.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 76 | 0.19 | 0.2 | 0.041 | 0.29 |
+-----------+-------------+-----------+-----------+---------------------+
Service downtime
~~~~~~~~~~~~~~~~
The tested service is not available during the following time period(s).
+-----+---------------+
| # | Downtime, s |
+=====+===============+
| 1 | 7.468 ±0.017 |
+-----+---------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 0.8176 ±0.0016 | 0.176 ±0.024 | 1.88 ±0.12 |
+-----+----------------------+---------------------------+------------------------+
| 2 | 0.3703 ±0.0011 | 0.199 ±0.047 | 2.00 ±0.24 |
+-----+----------------------+---------------------------+------------------------+
| 3 | 3.7349 ±0.0019 | 0.228 ±0.018 | 2.145 ±0.090 |
+-----+----------------------+---------------------------+------------------------+
| 4 | 0.5604 ±0.0019 | 0.178 ±0.033 | 1.89 ±0.17 |
+-----+----------------------+---------------------------+------------------------+
| 5 | 0.5767 ±0.0012 | 0.175 ±0.026 | 1.88 ±0.13 |
+-----+----------------------+---------------------------+------------------------+
| 6 | 1.1616 ±0.0019 | 0.192 ±0.025 | 1.96 ±0.12 |
+-----+----------------------+---------------------------+------------------------+
| 7 | 1.1645 ±0.0019 | 0.200 ±0.024 | 2.01 ±0.12 |
+-----+----------------------+---------------------------+------------------------+
| 8 | 0.3467 ±0.0016 | 0.179 ±0.045 | 1.90 ±0.23 |
+-----+----------------------+---------------------------+------------------------+
| 9 | 0.8827 ±0.0014 | 0.218 ±0.033 | 2.09 ±0.16 |
+-----+----------------------+---------------------------+------------------------+
| 10 | 0.4818 ±0.0018 | 0.175 ±0.036 | 1.88 ±0.18 |
+-----+----------------------+---------------------------+------------------------+
| 11 | 2.9380 ±0.0013 | 0.193 ±0.018 | 1.967 ±0.091 |
+-----+----------------------+---------------------------+------------------------+
| 12 | 0.1112 ±0.0027 | 0.169 ±0.034 | 1.85 ±0.17 |
+-----+----------------------+---------------------------+------------------------+
| 13 | 0.6466 ±0.0015 | 0.205 ±0.032 | 2.03 ±0.16 |
+-----+----------------------+---------------------------+------------------------+
| 14 | 0.1132 ±0.0015 | 0.144 ±0.028 | 1.72 ±0.14 |
+-----+----------------------+---------------------------+------------------------+
| 15 | 4.886 ±0.039 | 1.82 ±0.57 | 10.1 ±2.9 |
+-----+----------------------+---------------------------+------------------------+

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 565 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 505 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 547 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 486 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 576 KiB

View File

@ -0,0 +1,68 @@
Keystone authentication with kill of MySQL on one node
======================================================
This report is generated on results collected by execution of the following
Rally scenario:
.. code-block:: yaml
---
Authenticate.keystone:
-
runner:
type: "constant_for_duration"
duration: 60
concurrency: 5
context:
users:
tenants: 1
users_per_tenant: 1
hooks:
-
name: fault_injection
args:
action: kill mysql service on one node
trigger:
name: event
args:
unit: iteration
at: [150]
Summary
-------
No errors nor performance degradation observed.
Details
-------
This section contains individual data for particular scenario runs.
Run #1
^^^^^^
.. image:: plot_1.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 134 | 0.12 | 0.13 | 0.028 | 0.19 |
+-----------+-------------+-----------+-----------+---------------------+

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 240 KiB

View File

@ -0,0 +1,316 @@
Keystone authentication with Keystone API restart on one node
=============================================================
This report is generated on results collected by execution of the following
Rally scenario:
.. code-block:: yaml
---
{% set repeat = repeat|default(5) %}
Authenticate.keystone:
{% for iteration in range(repeat) %}
-
runner:
type: "constant_for_duration"
duration: 90
concurrency: 5
context:
users:
tenants: 1
users_per_tenant: 1
hooks:
-
name: fault_injection
args:
action: restart keystone service on one node
trigger:
name: event
args:
unit: iteration
at: [100]
{% endfor %}
Summary
-------
+-----------------------+-----------+---------------------------------------+-------------------------------------------+
| Service downtime, s | MTTR, s | Absolute performance degradation, s | Relative performance degradation, ratio |
+=======================+===========+=======================================+===========================================+
| 0.39 ±0.19 | 20 ±10 | 1.11 ±0.20 | 10.8 ±1.2 |
+-----------------------+-----------+---------------------------------------+-------------------------------------------+
Metrics:
* `Service downtime` is the time interval between the first and
the last errors.
* `MTTR` is the mean time to recover service performance after
the fault.
* `Absolute performance degradation` is an absolute difference between
the mean of operation duration during recovery period and the baseline's.
* `Relative performance degradation` is the ratio between the mean
of operation duration during recovery period and the baseline's.
Details
-------
This section contains individual data for particular scenario runs.
Run #1
^^^^^^
.. image:: plot_1.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 84 | 0.11 | 0.12 | 0.025 | 0.18 |
+-----------+-------------+-----------+-----------+---------------------+
Service downtime
~~~~~~~~~~~~~~~~
The tested service is not available during the following time period(s).
+-----+---------------+
| # | Downtime, s |
+=====+===============+
| 1 | 0.317 ±0.013 |
+-----+---------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 0.4993 ±0.0026 | 0.108 ±0.018 | 1.91 ±0.15 |
+-----+----------------------+---------------------------+------------------------+
| 2 | 8.6845 ±0.0018 | 0.1185 ±0.0074 | 2.002 ±0.062 |
+-----+----------------------+---------------------------+------------------------+
| 3 | 0.7606 ±0.0015 | 0.104 ±0.016 | 1.88 ±0.14 |
+-----+----------------------+---------------------------+------------------------+
| 4 | 4.529 ±0.065 | 1.05 ±0.78 | 9.9 ±6.6 |
+-----+----------------------+---------------------------+------------------------+
| 5 | 1.9502 ±0.0014 | 0.106 ±0.012 | 1.895 ±0.099 |
+-----+----------------------+---------------------------+------------------------+
| 6 | 3.5471 ±0.0018 | 0.108 ±0.011 | 1.913 ±0.091 |
+-----+----------------------+---------------------------+------------------------+
Run #2
^^^^^^
.. image:: plot_2.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 85 | 0.18 | 0.18 | 0.033 | 0.22 |
+-----------+-------------+-----------+-----------+---------------------+
Service downtime
~~~~~~~~~~~~~~~~
The tested service is not available during the following time period(s).
+-----+---------------+
| # | Downtime, s |
+=====+===============+
| 1 | 0.466 ±0.046 |
+-----+---------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 6.76 ±0.10 | 1.30 ±0.89 | 8.1 ±4.8 |
+-----+----------------------+---------------------------+------------------------+
Run #3
^^^^^^
.. image:: plot_3.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 84 | 0.19 | 0.2 | 0.04 | 0.25 |
+-----------+-------------+-----------+-----------+---------------------+
Service downtime
~~~~~~~~~~~~~~~~
The tested service is not available during the following time period(s).
+-----+---------------+
| # | Downtime, s |
+=====+===============+
| 1 | 0.388 ±0.030 |
+-----+---------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 4.534 ±0.067 | 0.83 ±0.56 | 5.2 ±2.8 |
+-----+----------------------+---------------------------+------------------------+
Run #4
^^^^^^
.. image:: plot_4.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 85 | 0.11 | 0.12 | 0.014 | 0.15 |
+-----------+-------------+-----------+-----------+---------------------+
Service downtime
~~~~~~~~~~~~~~~~
The tested service is not available during the following time period(s).
+-----+---------------+
| # | Downtime, s |
+=====+===============+
| 1 | 0.236 ±0.018 |
+-----+---------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 2.5420 ±0.0023 | 0.0977 ±0.0069 | 1.847 ±0.060 |
+-----+----------------------+---------------------------+------------------------+
| 2 | 0.3927 ±0.0018 | 0.055 ±0.015 | 1.47 ±0.13 |
+-----+----------------------+---------------------------+------------------------+
| 3 | 19.428 ±0.010 | 0.179 ±0.075 | 2.55 ±0.65 |
+-----+----------------------+---------------------------+------------------------+
| 4 | 3.1419 ±0.0024 | 0.0870 ±0.0070 | 1.754 ±0.061 |
+-----+----------------------+---------------------------+------------------------+
| 5 | 18.6237 ±0.0018 | 0.0767 ±0.0039 | 1.665 ±0.034 |
+-----+----------------------+---------------------------+------------------------+
| 6 | 2.3729 ±0.0018 | 0.0823 ±0.0074 | 1.714 ±0.064 |
+-----+----------------------+---------------------------+------------------------+
| 7 | 4.0139 ±0.0014 | 0.0776 ±0.0055 | 1.672 ±0.048 |
+-----+----------------------+---------------------------+------------------------+
| 8 | 12.9462 ±0.0019 | 0.0893 ±0.0048 | 1.774 ±0.042 |
+-----+----------------------+---------------------------+------------------------+
Run #5
^^^^^^
.. image:: plot_5.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 84 | 0.19 | 0.19 | 0.028 | 0.24 |
+-----------+-------------+-----------+-----------+---------------------+
Service downtime
~~~~~~~~~~~~~~~~
The tested service is not available during the following time period(s).
+-----+---------------+
| # | Downtime, s |
+=====+===============+
| 1 | 0.543 ±0.053 |
+-----+---------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 5.570 ±0.093 | 1.05 ±0.92 | 6.6 ±4.9 |
+-----+----------------------+---------------------------+------------------------+

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 295 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 302 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 313 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 301 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 277 KiB

View File

@ -0,0 +1,255 @@
Keystone authentication with memcached restart on one node
==========================================================
This report is generated on results collected by execution of the following
Rally scenario:
.. code-block:: yaml
---
{% set repeat = repeat|default(5) %}
Authenticate.keystone:
{% for iteration in range(repeat) %}
-
runner:
type: "constant_for_duration"
duration: 90
concurrency: 5
context:
users:
tenants: 1
users_per_tenant: 1
hooks:
-
name: fault_injection
args:
action: restart memcached service on one node
trigger:
name: event
args:
unit: iteration
at: [100]
{% endfor %}
Summary
-------
+-----------------------+-----------+---------------------------------------+-------------------------------------------+
| Service downtime, s | MTTR, s | Absolute performance degradation, s | Relative performance degradation, ratio |
+=======================+===========+=======================================+===========================================+
| N/A | 19.2 ±3.9 | 4.6 ±1.2 | 28.8 ±6.8 |
+-----------------------+-----------+---------------------------------------+-------------------------------------------+
Metrics:
* `Service downtime` is the time interval between the first and
the last errors.
* `MTTR` is the mean time to recover service performance after
the fault.
* `Absolute performance degradation` is an absolute difference between
the mean of operation duration during recovery period and the baseline's.
* `Relative performance degradation` is the ratio between the mean
of operation duration during recovery period and the baseline's.
Details
-------
This section contains individual data for particular scenario runs.
Run #1
^^^^^^
.. image:: plot_1.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 85 | 0.17 | 0.17 | 0.042 | 0.25 |
+-----------+-------------+-----------+-----------+---------------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 33.18 ±0.63 | 11.4 ±8.2 | 66 ±47 |
+-----+----------------------+---------------------------+------------------------+
| 2 | 0.9594 ±0.0032 | 0.26 ±0.43 | 2.5 ±2.4 |
+-----+----------------------+---------------------------+------------------------+
Run #2
^^^^^^
.. image:: plot_2.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 85 | 0.19 | 0.2 | 0.043 | 0.25 |
+-----------+-------------+-----------+-----------+---------------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 13.51 ±0.23 | 3.4 ±2.5 | 18 ±12 |
+-----+----------------------+---------------------------+------------------------+
Run #3
^^^^^^
.. image:: plot_3.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 84 | 0.18 | 0.18 | 0.032 | 0.23 |
+-----------+-------------+-----------+-----------+---------------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 12.35 ±0.22 | 3.8 ±2.8 | 22 ±15 |
+-----+----------------------+---------------------------+------------------------+
Run #4
^^^^^^
.. image:: plot_4.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 85 | 0.15 | 0.15 | 0.02 | 0.19 |
+-----------+-------------+-----------+-----------+---------------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 13.97 ±0.12 | 1.3 ±1.0 | 9.7 ±6.7 |
+-----+----------------------+---------------------------+------------------------+
| 2 | 0.7793 ±0.0024 | 0.103 ±0.028 | 1.67 ±0.18 |
+-----+----------------------+---------------------------+------------------------+
| 3 | 0.1890 ±0.0016 | 0.090 ±0.032 | 1.59 ±0.21 |
+-----+----------------------+---------------------------+------------------------+
| 4 | 0.1364 ±0.0016 | 0.056 ±0.070 | 1.36 ±0.46 |
+-----+----------------------+---------------------------+------------------------+
| 5 | 2.9459 ±0.0021 | 0.088 ±0.012 | 1.574 ±0.080 |
+-----+----------------------+---------------------------+------------------------+
| 6 | 1.8468 ±0.0014 | 0.087 ±0.014 | 1.568 ±0.091 |
+-----+----------------------+---------------------------+------------------------+
| 7 | 0.7156 ±0.0015 | 0.087 ±0.025 | 1.57 ±0.16 |
+-----+----------------------+---------------------------+------------------------+
| 8 | 0.2876 ±0.0020 | 0.066 ±0.029 | 1.43 ±0.19 |
+-----+----------------------+---------------------------+------------------------+
| 9 | 2.4745 ±0.0021 | 0.086 ±0.012 | 1.564 ±0.081 |
+-----+----------------------+---------------------------+------------------------+
Run #5
^^^^^^
.. image:: plot_5.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 84 | 0.16 | 0.16 | 0.044 | 0.24 |
+-----------+-------------+-----------+-----------+---------------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 12.55 ±0.16 | 1.9 ±1.4 | 12.8 ±8.5 |
+-----+----------------------+---------------------------+------------------------+

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 192 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 260 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 259 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 263 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 278 KiB

View File

@ -0,0 +1,178 @@
Create and list networks with kill of one of MySQL servers
==========================================================
This report is generated on results collected by execution of the following
Rally scenario:
.. code-block:: yaml
---
{% set repeat = repeat|default(3) %}
NeutronNetworks.create_and_list_networks:
{% for iteration in range(repeat) %}
-
args:
network_create_args: {}
runner:
type: "constant_for_duration"
duration: 90
concurrency: 4
context:
users:
tenants: 1
users_per_tenant: 1
quotas:
neutron:
network: -1
hooks:
-
name: fault_injection
args:
action: kill mysql service on one node
trigger:
name: event
args:
unit: iteration
at: [100]
{% endfor %}
Summary
-------
+-----------------------+-----------+---------------------------------------+-------------------------------------------+
| Service downtime, s | MTTR, s | Absolute performance degradation, s | Relative performance degradation, ratio |
+=======================+===========+=======================================+===========================================+
| N/A | 13.0 ±2.3 | 2.50 ±0.72 | 6.4 ±1.3 |
+-----------------------+-----------+---------------------------------------+-------------------------------------------+
Metrics:
* `Service downtime` is the time interval between the first and
the last errors.
* `MTTR` is the mean time to recover service performance after
the fault.
* `Absolute performance degradation` is an absolute difference between
the mean of operation duration during recovery period and the baseline's.
* `Relative performance degradation` is the ratio between the mean
of operation duration during recovery period and the baseline's.
Details
-------
This section contains individual data for particular scenario runs.
Run #1
^^^^^^
.. image:: plot_1.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 85 | 0.49 | 0.53 | 0.16 | 0.84 |
+-----------+-------------+-----------+-----------+---------------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 8.59 ±0.13 | 1.4 ±1.2 | 3.7 ±2.2 |
+-----+----------------------+---------------------------+------------------------+
| 2 | 2.106 ±0.010 | 0.61 ±0.12 | 2.15 ±0.22 |
+-----+----------------------+---------------------------+------------------------+
| 3 | 4.077 ±0.018 | 0.72 ±0.21 | 2.37 ±0.40 |
+-----+----------------------+---------------------------+------------------------+
Run #2
^^^^^^
.. image:: plot_2.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 85 | 0.64 | 0.64 | 0.2 | 1 |
+-----------+-------------+-----------+-----------+---------------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 7.55 ±0.12 | 2.4 ±1.6 | 4.7 ±2.5 |
+-----+----------------------+---------------------------+------------------------+
Run #3
^^^^^^
.. image:: plot_3.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 85 | 0.47 | 0.54 | 0.18 | 0.83 |
+-----------+-------------+-----------+-----------+---------------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 10.11 ±0.13 | 1.5 ±1.1 | 3.8 ±2.0 |
+-----+----------------------+---------------------------+------------------------+
| 2 | 6.447 ±0.015 | 0.87 ±0.10 | 2.60 ±0.19 |
+-----+----------------------+---------------------------+------------------------+

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 92 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 91 KiB

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 90 KiB

View File

@ -0,0 +1,74 @@
Boot and delete VM with kill of RabbitMQ on one of nodes
========================================================
This report is generated on results collected by execution of the following
Rally scenario:
.. code-block:: yaml
---
NovaServers.boot_and_delete_server:
-
args:
flavor:
name: "m1.micro"
image:
name: "(^cirros.*uec$|TestVM)"
force_delete: false
runner:
type: "constant_for_duration"
duration: 240
concurrency: 4
context:
users:
tenants: 1
users_per_tenant: 1
hooks:
-
name: fault_injection
args:
action: kill rabbitmq service on one node
trigger:
name: event
args:
unit: iteration
at: [60]
Summary
-------
No errors nor performance degradation observed.
Details
-------
This section contains individual data for particular scenario runs.
Run #1
^^^^^^
.. image:: plot_1.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 47 | 7.3 | 7.3 | 0.86 | 8.8 |
+-----------+-------------+-----------+-----------+---------------------+

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 51 KiB

View File

@ -0,0 +1,111 @@
Boot and delete VM with reboot of one of controllers
====================================================
This report is generated on results collected by execution of the following
Rally scenario:
.. code-block:: yaml
---
NovaServers.boot_and_delete_server:
-
args:
flavor:
name: "m1.tiny"
image:
name: "(^cirros.*uec$|TestVM|^cirros)"
force_delete: false
runner:
type: "constant_for_duration"
duration: 900
concurrency: 4
context:
users:
tenants: 200
users_per_tenant: 1
hooks:
-
name: fault_injection
args:
action: reboot one node with rabbitmq service
trigger:
name: event
args:
unit: iteration
at: [50]
Summary
-------
+-----------------------+------------+---------------------------------------+-------------------------------------------+
| Service downtime, s | MTTR, s | Absolute performance degradation, s | Relative performance degradation, ratio |
+=======================+============+=======================================+===========================================+
| 477.8 ±5.9 | 570.3 ±2.8 | 18 ±17 | 3.1 ±2.0 |
+-----------------------+------------+---------------------------------------+-------------------------------------------+
Metrics:
* `Service downtime` is the time interval between the first and
the last errors.
* `MTTR` is the mean time to recover service performance after
the fault.
* `Absolute performance degradation` is an absolute difference between
the mean of operation duration during recovery period and the baseline's.
* `Relative performance degradation` is the ratio between the mean
of operation duration during recovery period and the baseline's.
Details
-------
This section contains individual data for particular scenario runs.
Run #1
^^^^^^
.. image:: plot_1.svg
Baseline
~~~~~~~~
Baseline samples are collected before the start of fault injection. They are
used to estimate service performance degradation after the fault.
+-----------+-------------+-----------+-----------+---------------------+
| Samples | Median, s | Mean, s | Std dev | 95% percentile, s |
+===========+=============+===========+===========+=====================+
| 36 | 8.6 | 8.8 | 1.2 | 11 |
+-----------+-------------+-----------+-----------+---------------------+
Service downtime
~~~~~~~~~~~~~~~~
The tested service is not available during the following time period(s).
+-----+---------------+
| # | Downtime, s |
+=====+===============+
| 1 | 478 ±35 |
+-----+---------------+
Service performance degradation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The tested service has measurable performance degradation during the
following time period(s).
+-----+----------------------+---------------------------+------------------------+
| # | Time to recover, s | Absolute degradation, s | Relative degradation |
+=====+======================+===========================+========================+
| 1 | 570.3 ±7.6 | 18 ±17 | 3.1 ±2.0 |
+-----+----------------------+---------------------------+------------------------+

File diff suppressed because it is too large Load Diff

After

Width:  |  Height:  |  Size: 63 KiB