Add human-friendly interface

With simplified API it is easy to make operations
like "restart keystone service" or "reboot random mysql node".

Change-Id: Ic877cf994889597c6726c0d461a4322f5d75b2e5
This commit is contained in:
Ilya Shakhat 2016-09-01 17:53:55 +03:00
parent 8ccc9a51c2
commit 7e1b15bd5c
10 changed files with 425 additions and 6 deletions

View File

@ -63,8 +63,35 @@ The library operates with 2 types of objects:
* `nodes` - nodes that host the cloud, e.g. a hardware server with a hostname
Use cases
---------
Simplified API
--------------
Simplified API is used to inject faults in a human-friendly form.
Service-based command performs specified `action` against `service` on
all, on one random node or on the node specified by FQDN::
<action> <service> service [on (random|one|single|<fqdn> node[s])]
Node-based command performs specified `action` on all or selected service's
node::
<action> [random|one|single] <service> node[s]
Network-management command is a subset of node-based query::
disable|enable network <network name> on <service> node[s]
Examples:
* `Restart Keystone service` - restarts Keystone service on all nodes
* `kill nova-api service on one node` - restarts Nova API on one of nodes
* `Reboot one node with mysql` - reboots one random node with MySQL
* `Reboot node-2.domain.tld node` - reboot node with specified name
Extended API
------------
1. Service actions
~~~~~~~~~~~~~~~~~~

View File

@ -17,6 +17,7 @@ import pbr.version
import yaml
from os_faults.api import error
from os_faults.api import human
from os_faults.drivers import devstack
from os_faults.drivers import fuel
from os_faults.drivers import ipmi
@ -39,8 +40,8 @@ CONFIG_FILES = [
]
def _read_config():
os_faults_config = os.environ.get('OS_FAULTS_CONFIG')
def _read_config(config_filename):
os_faults_config = config_filename or os.environ.get('OS_FAULTS_CONFIG')
if os_faults_config:
CONFIG_FILES.insert(0, os_faults_config)
@ -53,9 +54,9 @@ def _read_config():
raise error.OSFError(msg)
def connect(cloud_config=None):
def connect(cloud_config=None, config_filename=None):
if not cloud_config:
cloud_config = _read_config()
cloud_config = _read_config(config_filename)
cloud_management = None
cloud_management_params = cloud_config.get('cloud_management') or {}
@ -77,3 +78,12 @@ def connect(cloud_config=None):
cloud_management.set_power_management(power_management)
return cloud_management
def human_api(distractor, command):
"""Execute high-level text command with specified destructor
:param destructor: library instance as returned by :connect: function
:param command: text command
"""
human.execute(distractor, command)

116
os_faults/api/human.py Normal file
View File

@ -0,0 +1,116 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import inspect
import re
from os_faults.api import error
from os_faults.api import node_collection as node_collection_pkg
from os_faults.api import service as service_pkg
"""
Human API understands commands like these (examples):
* restart <service> service [on (random|one|single|<fqdn> node[s])]
* terminate <service> service [on (random|one|single|<fqdn> node[s])]
* start <service> service [on (random|one|single|<fqdn> node[s])]
* kill <service> service [on (random|one|single|<fqdn> node[s])]
* plug <service> service [on (random|one|single|<fqdn> node[s])]
* unplug <service> service [on (random|one|single|<fqdn> node[s])]
* freeze <service> service [on (random|one|single|<fqdn> node[s])]
[for <T> seconds]
* unfreeze <service> service [on (random|one|single|<fqdn> node[s])]
* reboot [random|one|single|<fqdn>] node[s] [with <service>]
* disable network <name> on <service> node[s]
* enable network <name> on <service> node[s]
"""
def list_actions(klazz):
return set(m[0].replace('_', ' ') for m in inspect.getmembers(
klazz,
predicate=lambda o: inspect.ismethod(o) and hasattr(o, '__public__')))
RANDOMNESS = {'one', 'random', 'some', 'single'}
RANDOMNESS_PATTERN = '|'.join(RANDOMNESS)
SERVICE_ACTIONS = list_actions(service_pkg.Service)
SERVICE_ACTIONS_PATTERN = '|'.join(SERVICE_ACTIONS)
NODE_ACTIONS = list_actions(node_collection_pkg.NodeCollection)
NODE_ACTIONS_PATTERN = '|'.join(NODE_ACTIONS)
PATTERNS = [
re.compile('(?P<action>%s)'
'\s+(?P<service>\S+)\s+service'
'(\s+on(\s+(?P<node>\S+))?\s+nodes?)?'
'(\s+for\s+(?P<duration>\d+)\s+seconds)?' %
SERVICE_ACTIONS_PATTERN),
re.compile('(?P<action>%s)'
'(\s+(?P<network>\w+)\s+on)?'
'(\s+(?P<node>%s|\S+))?'
'\s+node'
'(\s+with\s+(?P<service>\S+))?' %
(NODE_ACTIONS_PATTERN, RANDOMNESS_PATTERN)),
]
def execute(destructor, command):
command = command.lower()
rec = None
for pattern in PATTERNS:
rec = re.search(pattern, command)
if rec:
break
if not rec:
raise error.OSFException('Could not parse command: %s' % command)
groups = rec.groupdict()
action = groups.get('action').replace(' ', '_')
service_name = groups.get('service')
node_name = groups.get('node')
network_name = groups.get('network')
duration = groups.get('duration')
if service_name:
service = destructor.get_service(name=service_name)
if action in SERVICE_ACTIONS:
kwargs = {}
if node_name in RANDOMNESS:
kwargs['nodes'] = service.get_nodes().pick()
elif node_name:
kwargs['nodes'] = destructor.get_nodes(fqdns=[node_name])
if duration:
kwargs['sec'] = int(duration)
fn = getattr(service, action)
fn(**kwargs)
else: # node actions
nodes = service.get_nodes()
if node_name in RANDOMNESS:
nodes = nodes.pick()
fn = getattr(nodes, action)
fn()
else: # nodes operation
nodes = destructor.get_nodes(fqdns=[node_name])
kwargs = {}
if network_name:
kwargs['network_name'] = network_name
fn = getattr(nodes, action)
fn(**kwargs)

View File

@ -15,6 +15,8 @@ import abc
import six
from os_faults.api.util import public
@six.add_metaclass(abc.ABCMeta)
class NodeCollection(object):
@ -26,36 +28,42 @@ class NodeCollection(object):
:return: NodeCollection consisting just one node
"""
@public
def reboot(self):
"""Reboot all nodes gracefully
"""
raise NotImplementedError
@public
def oom(self):
"""Fill all node's RAM
"""
raise NotImplementedError
@public
def poweroff(self):
"""Power off all nodes abruptly
"""
raise NotImplementedError
@public
def poweron(self):
"""Power on all nodes abruptly
"""
raise NotImplementedError
@public
def reset(self):
"""Reset (cold restart) all nodes
"""
raise NotImplementedError
@public
def disable_network(self, network_name):
"""Disable network with name network_name on each of the nodes
@ -63,6 +71,7 @@ class NodeCollection(object):
"""
raise NotImplementedError
@public
def enable_network(self, network_name):
"""Enable network with name network_name on each of the nodes

View File

@ -15,6 +15,8 @@ import abc
import six
from os_faults.api.util import public
@six.add_metaclass(abc.ABCMeta)
class Service(object):
@ -27,6 +29,7 @@ class Service(object):
"""
pass
@public
def restart(self, nodes=None):
"""Restart Service on all nodes or on particular subset
@ -34,6 +37,7 @@ class Service(object):
"""
raise NotImplementedError
@public
def terminate(self, nodes=None):
"""Terminate Service gracefully on all nodes or on particular subset
@ -41,6 +45,7 @@ class Service(object):
"""
raise NotImplementedError
@public
def start(self, nodes=None):
"""Start Service on all nodes or on particular subset
@ -48,6 +53,7 @@ class Service(object):
"""
raise NotImplementedError
@public
def kill(self, nodes=None):
"""Terminate Service abruptly on all nodes or on particular subset
@ -55,6 +61,7 @@ class Service(object):
"""
raise NotImplementedError
@public
def unplug(self, nodes=None):
"""Unplug Service out of network on all nodes or on particular subset
@ -62,6 +69,7 @@ class Service(object):
"""
raise NotImplementedError
@public
def plug(self, nodes=None):
"""Plug Service into network on all nodes or on particular subset
@ -69,6 +77,7 @@ class Service(object):
"""
raise NotImplementedError
@public
def freeze(self, nodes=None, sec=None):
"""SIGSTOP
@ -81,6 +90,7 @@ class Service(object):
"""
raise NotImplementedError
@public
def unfreeze(self, nodes=None):
"""SIGCONT

17
os_faults/api/util.py Normal file
View File

@ -0,0 +1,17 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
def public(funcobj):
funcobj.__public__ = True
return funcobj

View File

80
os_faults/cmd/cmd.py Normal file
View File

@ -0,0 +1,80 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import argparse
import logging
import sys
import os_faults
# todo (ishakhat): list available actions and services
USAGE = """os-inject-fault [-h] [-c CONFIG] [-d] [-v] [command]
Service-based command performs specified action against service on
all, on one random node or on the node specified by FQDN:
<action> <service> service [on (random|one|single|<fqdn> node[s])]
Node-based command performs specified action on all or selected service's
node:
<action> [random|one|single] <service> node[s]
Network-management command is a subset of node-based query::
disable|enable network <network name> on <service> node[s]
Examples:
* Restart Keystone service - restarts Keystone service on all nodes
* kill nova-api service on one node - restarts Nova API on one of nodes
* Reboot one node with mysql - reboots one random node with MySQL
* Reboot node-2.domain.tld node - reboot node with specified name
"""
def main():
parser = argparse.ArgumentParser(prog='os-inject-fault', usage=USAGE)
parser.add_argument('-c', '--config', dest='config',
help='path to os-faults cloud connection config')
parser.add_argument('-d', '--debug', dest='debug', action='store_true')
parser.add_argument('-v', '--verify', action='store_true',
help='verify connection to the cloud')
parser.add_argument('command', nargs='*',
help='fault injection command, e.g. "restart keystone '
'service"')
args = parser.parse_args()
debug = args.debug
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
level=logging.DEBUG if debug else logging.INFO)
config = args.config
command = args.command
if not command and not args.verify:
parser.print_help()
sys.exit(0)
destructor = os_faults.connect(config_filename=config)
if args.verify:
destructor.verify()
if command:
command = ' '.join(command)
os_faults.human_api(destructor, command)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,146 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import ddt
import mock
from os_faults.api import error
from os_faults.api import human
from os_faults.api import node_collection
from os_faults.api import service as service_api
from os_faults.tests import test
@ddt.ddt
class TestHumanAPI(test.TestCase):
def setUp(self):
super(TestHumanAPI, self).setUp()
self.destructor = mock.MagicMock()
self.service = mock.MagicMock(service_api.Service)
self.destructor.get_service = mock.MagicMock(return_value=self.service)
@ddt.data(('restart', 'keystone'), ('kill', 'nova-api'))
@ddt.unpack
def test_service_action(self, action, service_name):
command = '%s %s service' % (action, service_name)
human.execute(self.destructor, command)
self.destructor.get_service.assert_called_once_with(name=service_name)
getattr(self.service, action).assert_called_once_with()
@ddt.data(('restart', 'keystone', 'random'), ('kill', 'nova-api', 'one'))
@ddt.unpack
def test_service_action_on_random_node(self, action, service_name, node):
nodes = mock.MagicMock(node_collection.NodeCollection)
self.service.get_nodes = mock.MagicMock(return_value=nodes)
one_node = mock.MagicMock(node_collection.NodeCollection)
nodes.pick = mock.MagicMock(return_value=one_node)
command = '%s %s service on %s node' % (action, service_name, node)
human.execute(self.destructor, command)
self.destructor.get_service.assert_called_once_with(name=service_name)
getattr(self.service, action).assert_called_once_with(nodes=one_node)
nodes.pick.assert_called_once()
@ddt.data(('freeze', 'keystone', 5))
@ddt.unpack
def test_service_action_with_duration(self, action, service_name, t):
command = '%s %s service for %d seconds' % (action, service_name, t)
human.execute(self.destructor, command)
self.destructor.get_service.assert_called_once_with(name=service_name)
getattr(self.service, action).assert_called_once_with(sec=t)
@ddt.data(('restart', 'keystone', 'node'), ('kill', 'nova-api', 'node'))
@ddt.unpack
def test_service_action_on_fqdn_node(self, action, service_name, node):
nodes = mock.MagicMock(node_collection.NodeCollection)
self.destructor.get_nodes.return_value = nodes
command = '%s %s service on %s node' % (action, service_name, node)
human.execute(self.destructor, command)
self.destructor.get_service.assert_called_once_with(name=service_name)
self.destructor.get_nodes.assert_called_once_with(fqdns=[node])
getattr(self.service, action).assert_called_once_with(nodes=nodes)
@ddt.data(('reboot', 'keystone'), ('reset', 'nova-api'))
@ddt.unpack
def test_node_action_on_all_nodes(self, action, service_name):
nodes = mock.MagicMock(node_collection.NodeCollection)
self.service.get_nodes = mock.MagicMock(return_value=nodes)
command = '%s node with %s' % (action, service_name)
human.execute(self.destructor, command)
self.destructor.get_service.assert_called_once_with(name=service_name)
getattr(nodes, action).assert_called_once_with()
@ddt.data(('reboot', 'keystone'), ('reset', 'nova-api'))
@ddt.unpack
def test_node_action_on_random_node(self, action, service_name):
nodes = mock.MagicMock(node_collection.NodeCollection)
nodes2 = mock.MagicMock(node_collection.NodeCollection)
self.service.get_nodes = mock.MagicMock(return_value=nodes)
nodes.pick = mock.MagicMock(return_value=nodes2)
command = '%s one node with %s' % (action, service_name)
human.execute(self.destructor, command)
self.destructor.get_service.assert_called_once_with(name=service_name)
nodes.pick.assert_called_once()
getattr(nodes2, action).assert_called_once_with()
@ddt.data('reboot', 'poweroff', 'poweron')
def test_node_action_by_fqdn(self, action):
destructor = mock.MagicMock()
nodes = mock.MagicMock(node_collection.NodeCollection)
destructor.get_nodes = mock.MagicMock(return_value=nodes)
command = '%s node-2.local node' % action.capitalize()
human.execute(destructor, command)
destructor.get_nodes.assert_called_once_with(fqdns=['node-2.local'])
getattr(nodes, action).assert_called_once()
@ddt.data(('Disable network', 'disable_network'),
('Enable network', 'enable_network'))
@ddt.unpack
def test_disable_network_node_by_fqdn(self, user_action, action):
destructor = mock.MagicMock()
nodes = mock.MagicMock(node_collection.NodeCollection)
destructor.get_nodes = mock.MagicMock(return_value=nodes)
command = '%s storage on node-2.local node' % user_action
human.execute(destructor, command)
destructor.get_nodes.assert_called_once_with(fqdns=['node-2.local'])
getattr(nodes, action).assert_called_once_with(network_name='storage')
def test_malformed_query(self):
destructor = mock.MagicMock()
command = 'inject some fault'
self.assertRaises(error.OSFException, human.execute, destructor,
command)

View File

@ -20,6 +20,10 @@ classifier =
packages =
os_faults
[entry_points]
console_scripts =
os-inject-fault = os_faults.cmd.cmd:main
[build_sphinx]
source-dir = doc/source
build-dir = doc/build