diff --git a/README.rst b/README.rst index 23ad0bd..1275238 100644 --- a/README.rst +++ b/README.rst @@ -7,7 +7,7 @@ OS-Faults The library does destructive actions inside an OpenStack cloud. It provides an abstraction layer over different types of cloud deployments. The actions are implemented as drivers (e.g. DevStack driver, Fuel driver, Libvirt driver, -IPMI driver). +IPMI driver, Universal driver). * Free software: Apache license * Documentation: http://os-faults.readthedocs.io @@ -197,7 +197,23 @@ Available actions: * `unplug` - unplug Service out of network * `plug` - plug Service into network -2. Node actions +2. Container actions +~~~~~~~~~~~~~~~~~~~~ + +Get a container and restart it: + +.. code-block:: python + + cloud_management = os_faults.connect(cloud_config) + container = cloud_management.get_container(name='neutron-api') + container.restart() + +Available actions: + * `start` - start Container + * `terminate` - terminate Container gracefully + * `restart` - restart Container + +3. Node actions ~~~~~~~~~~~~~~~ Get all nodes in the cloud and reboot them: @@ -214,7 +230,7 @@ Available actions: * `disconnect` - disable network with the specified name on all nodes * `connect` - enable network with the specified name on all nodes -3. Operate with nodes +4. Operate with nodes ~~~~~~~~~~~~~~~~~~~~~ Get all nodes where a service runs, pick one of them and reset: @@ -233,7 +249,7 @@ Get nodes where l3-agent runs and disable the management network on them: nodes = cloud_management.get_nodes(fqdns=fqdns) nodes.disconnect(network_name='management') -4. Operate with services +5. Operate with services ~~~~~~~~~~~~~~~~~~~~~~~~ Restart a service on a single node: diff --git a/examples/example_api_04.py b/examples/example_api_04.py new file mode 100644 index 0000000..8c62125 --- /dev/null +++ b/examples/example_api_04.py @@ -0,0 +1,106 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import logging +import os_faults + + +def main(): + # The cloud config details could be defined within the script or a + # separate os-faults.yml file and then loaded to the script. + # Ex. cloud_management = os_faults.connect(config_filename='os-faults.yml') + cloud_config = { + 'cloud_management': { + 'driver': 'universal' + }, + 'node_discover': { + 'driver': 'node_list', + 'args': [ + { + 'ip': '192.0.10.6', + 'auth': { + 'username': 'heat-admin', + 'private_key_file': '/home/stack/.ssh/id_rsa', + 'become': True + } + }, + { + 'ip': '192.0.10.8', + 'auth': { + 'username': 'heat-admin', + 'private_key_file': '/home/stack/.ssh/id_rsa', + 'become': True + } + }, + { + 'ip': '192.0.10.7', + 'auth': { + 'username': 'heat-admin', + 'private_key_file': '/home/stack/.ssh/id_rsa', + 'become': True + } + } + ] + }, + 'services': { + 'openvswitch': { + 'driver': 'system_service', + 'args': { + 'service_name': 'openvswitch', + 'grep': 'openvswitch' + } + } + }, + 'containers': { + 'neutron_ovs_agent': { + 'driver': 'docker_container', + 'args': { + 'container_name': 'neutron_ovs_agent' + } + }, + 'neutron_api': { + 'driver': 'docker_container', + 'args': { + 'container_name': 'neutron_api' + } + } + } + } + + logging.info('# Create connection to the cloud') + cloud_management = os_faults.connect(cloud_config) + logging.info('Verify connection to the cloud') + cloud_management.verify() + + logging.info('Get nodes where openvswitch service is running') + service = cloud_management.get_service(name='openvswitch') + service_nodes = service.get_nodes() + logging.info('Nodes: {}'.format(service_nodes)) + + logging.info('Stop openvswitch service on random node') + random_node = service.get_nodes().pick() + service.terminate(random_node) + + logging.info('Get nodes where neutron_ovs_agent container is running') + container = cloud_management.get_container(name='neutron_ovs_agent') + container_nodes = container.get_nodes() + logging.info('Nodes: {}'.format(container_nodes)) + + logging.info('Restart neutron_ovs_agent container on the ' + 'following nodes: {}'.format(container_nodes)) + container.restart(container_nodes) + + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', + level=logging.INFO) + main() diff --git a/os_faults/__init__.py b/os_faults/__init__.py index dbec317..3375411 100644 --- a/os_faults/__init__.py +++ b/os_faults/__init__.py @@ -163,6 +163,11 @@ def connect(cloud_config=None, config_filename=None): cloud_management.update_services(services) cloud_management.validate_services() + containers = cloud_config.get('containers') + if containers: + cloud_management.update_containers(containers) + cloud_management.validate_containers() + node_discover_conf = cloud_config.get('node_discover') if node_discover_conf: node_discover = _init_driver(node_discover_conf) diff --git a/os_faults/api/cloud_management.py b/os_faults/api/cloud_management.py index 2e1e858..8f81437 100644 --- a/os_faults/api/cloud_management.py +++ b/os_faults/api/cloud_management.py @@ -30,6 +30,7 @@ LOG = logging.getLogger(__name__) @six.add_metaclass(abc.ABCMeta) class CloudManagement(base_driver.BaseDriver): SERVICES = {} + CONTAINERS = {} SUPPORTED_NETWORKS = [] NODE_CLS = node_collection.NodeCollection @@ -37,6 +38,7 @@ class CloudManagement(base_driver.BaseDriver): self.power_manager = power_management.PowerManager() self.node_discover = None self.services = copy.deepcopy(self.SERVICES) + self.containers = copy.deepcopy(self.CONTAINERS) def add_power_management(self, driver): self.power_manager.add_driver(driver) @@ -47,11 +49,20 @@ class CloudManagement(base_driver.BaseDriver): def update_services(self, services): self.services.update(services) + def update_containers(self, containers): + self.containers.update(containers) + def validate_services(self): for service_name, serive_conf in self.services.items(): serive_cls = registry.get_driver(serive_conf["driver"]) jsonschema.validate(serive_conf['args'], serive_cls.CONFIG_SCHEMA) + def validate_containers(self): + for container_name, container_conf in self.containers.items(): + container_cls = registry.get_driver(container_conf["driver"]) + jsonschema.validate(container_conf['args'], + container_cls.CONFIG_SCHEMA) + @abc.abstractmethod def verify(self): """Verify connection to the cloud. @@ -97,6 +108,23 @@ class CloudManagement(base_driver.BaseDriver): service_name=name, config=config["args"], hosts=config.get('hosts')) + def get_container(self, name): + """Get container with specified name + + :param name: name of the container + :return: Container + """ + if name not in self.containers: + raise error.ContainerError( + '{} driver does not support {!r} container'.format( + self.NAME.title(), name)) + + config = self.containers[name] + klazz = registry.get_driver(config["driver"]) + return klazz(node_cls=self.NODE_CLS, cloud_management=self, + container_name=name, config=config["args"], + hosts=config.get('hosts')) + @abc.abstractmethod def execute_on_cloud(self, hosts, task, raise_on_error=True): """Execute task on specified hosts within the cloud. diff --git a/os_faults/api/container.py b/os_faults/api/container.py new file mode 100644 index 0000000..1935b64 --- /dev/null +++ b/os_faults/api/container.py @@ -0,0 +1,74 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc + +import six + +from os_faults.api import base_driver +from os_faults.api.utils import public + + +@six.add_metaclass(abc.ABCMeta) +class Container(base_driver.BaseDriver): + + def __init__(self, container_name, config, node_cls, cloud_management, + hosts=None): + self.container_name = container_name + self.config = config + self.node_cls = node_cls + self.cloud_management = cloud_management + self.hosts = hosts + + @abc.abstractmethod + def discover_nodes(self): + """Discover nodes where this Container is running + + :returns: NodesCollection + """ + + def get_nodes(self): + """Get nodes where this Container is running + + :returns: NodesCollection + """ + if self.hosts is not None: + nodes = self.cloud_management.get_nodes() + hosts = [h for h in nodes.hosts if h.ip in self.hosts] + return self.node_cls(cloud_management=self.cloud_management, + hosts=hosts) + return self.discover_nodes() + + @public + def start(self, nodes=None): + """Start Container on all nodes or on particular subset + + :param nodes: NodesCollection + """ + raise NotImplementedError + + @public + def terminate(self, nodes=None): + """Terminate Container gracefully on all nodes or on particular subset + + :param nodes: NodesCollection + """ + raise NotImplementedError + + @public + def restart(self, nodes=None): + """Restart Container on all nodes or on particular subset + + :param nodes: NodesCollection + """ + raise NotImplementedError diff --git a/os_faults/api/error.py b/os_faults/api/error.py index d287087..be76cda 100644 --- a/os_faults/api/error.py +++ b/os_faults/api/error.py @@ -32,6 +32,10 @@ class ServiceError(OSFError): """Base Error class for Service API""" +class ContainerError(OSFError): + """Base Error class for Container API""" + + class NodeCollectionError(OSFError): """Base Error class for NodeCollection API""" diff --git a/os_faults/api/human.py b/os_faults/api/human.py index 8f1f946..8b60bef 100644 --- a/os_faults/api/human.py +++ b/os_faults/api/human.py @@ -14,6 +14,7 @@ import inspect import logging import re +from os_faults.api import container as container_pkg from os_faults.api import error from os_faults.api import node_collection as node_collection_pkg from os_faults.api import service as service_pkg @@ -53,6 +54,8 @@ ANYTHING = {'all'} NODE_ALIASES_PATTERN = '|'.join(RANDOMNESS | ANYTHING) SERVICE_ACTIONS = list_actions(service_pkg.Service) SERVICE_ACTIONS_PATTERN = '|'.join(SERVICE_ACTIONS) +CONTAINER_ACTIONS = list_actions(container_pkg.Container) +CONTAINER_ACTIONS_PATTERN = '|'.join(CONTAINER_ACTIONS) NODE_ACTIONS = list_actions(node_collection_pkg.NodeCollection) NODE_ACTIONS_PATTERN = '|'.join(NODE_ACTIONS) @@ -62,6 +65,11 @@ PATTERNS = [ '(\s+on(\s+(?P\S+))?\s+nodes?)?' '(\s+for\s+(?P\d+)\s+seconds)?' % SERVICE_ACTIONS_PATTERN), + re.compile('(?P%s)' + '\s+(?P\S+)\s+container' + '(\s+on(\s+(?P\S+))?\s+nodes?)?' + '(\s+for\s+(?P\d+)\s+seconds)?' % + CONTAINER_ACTIONS_PATTERN), re.compile('(?P%s)' '(\s+(?P\w+)\s+network\s+on)?' '(\s+(?P\w+)' @@ -88,6 +96,7 @@ def execute(destructor, command): action = groups.get('action').replace(' ', '_') service_name = groups.get('service') + container_name = groups.get('container') node_name = groups.get('node') network_name = groups.get('network') target = groups.get('target') @@ -113,6 +122,38 @@ def execute(destructor, command): else: # node actions nodes = service.get_nodes() + if node_name in RANDOMNESS: + nodes = nodes.pick() + + kwargs = {} + if network_name: + kwargs['network_name'] = network_name + if target: + kwargs['target'] = target + kwargs['duration'] = int(duration) + + fn = getattr(nodes, action) + fn(**kwargs) + elif container_name: + container = destructor.get_container(name=container_name) + + if action in CONTAINER_ACTIONS: + + kwargs = {} + if node_name in RANDOMNESS: + kwargs['nodes'] = container.get_nodes().pick() + elif node_name and node_name not in ANYTHING: + kwargs['nodes'] = destructor.get_nodes(fqdns=[node_name]) + + if duration: + kwargs['sec'] = int(duration) + + fn = getattr(container, action) + fn(**kwargs) + + else: # node actions + nodes = container.get_nodes() + if node_name in RANDOMNESS: nodes = nodes.pick() diff --git a/os_faults/drivers/containers/__init__.py b/os_faults/drivers/containers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/os_faults/drivers/containers/docker.py b/os_faults/drivers/containers/docker.py new file mode 100644 index 0000000..b388b26 --- /dev/null +++ b/os_faults/drivers/containers/docker.py @@ -0,0 +1,107 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from os_faults.ansible import executor +from os_faults.api import container +from os_faults.api import error + +LOG = logging.getLogger(__name__) + + +class DockerContainers(container.Container): + """Docker container + + This is docker container driver for any docker containers supported by + Ansible. Please refer to Ansible documentation + https://docs.ansible.com/ansible/latest/modules/docker_container_module.html + for the whole list. + + **Example configuration:** + + .. code-block:: yaml + + containers: + app: + driver: docker_container + args: + container_name: app + + parameters: + + - **container_name** - name of the container + """ + NAME = 'docker_container' + DESCRIPTION = 'Docker container' + CONFIG_SCHEMA = { + 'type': 'object', + 'properties': { + 'container_name': {'type': 'string'}, + }, + 'required': ['container_name'], + 'additionalProperties': False, + } + + def __init__(self, *args, **kwargs): + super(DockerContainers, self).__init__(*args, **kwargs) + self.container_name = self.config['container_name'] + + def _run_task(self, nodes, task, message): + nodes = nodes if nodes is not None else self.get_nodes() + if len(nodes) == 0: + raise error.ContainerError( + 'Container %s is not found on any nodes' % self.container_name) + + LOG.info('%s container %s on nodes: %s', + message, self.container_name, nodes.get_ips()) + + return self.cloud_management.execute_on_cloud(nodes.hosts, task) + + def discover_nodes(self): + nodes = self.cloud_management.get_nodes() + cmd = 'bash -c "docker ps | grep \'{}\'"'.format(self.container_name) + results = self.cloud_management.execute_on_cloud( + nodes.hosts, {'command': cmd}, False) + success_ips = [r.host for r in results + if r.status == executor.STATUS_OK] + hosts = [h for h in nodes.hosts if h.ip in success_ips] + LOG.debug('Container %s is discovered on nodes %s', + self.container_name, hosts) + return self.node_cls(cloud_management=self.cloud_management, + hosts=hosts) + + def start(self, nodes=None): + task = { + 'docker_container': { + 'name': self.container_name, 'state': 'started' + }, + } + self._run_task(nodes, task, 'Start') + + def terminate(self, nodes=None): + task = { + 'docker_container': { + 'name': self.container_name, 'state': 'stopped', + }, + } + self._run_task(nodes, task, 'Terminate') + + def restart(self, nodes=None): + task = { + 'docker_container': { + 'name': self.container_name, 'state': 'started', + 'restart': 'yes' + }, + } + self._run_task(nodes, task, 'Restart')