diff --git a/requirements.txt b/requirements.txt index 5a7de2505..63bfa0313 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ eventlet>=0.17.3 alembic>=0.6.4 Babel>=1.3 -docker-py>=0.5.1 +docker-py==1.2.3 httplib2>=0.7.5 iso8601>=0.1.9 kombu>=2.4.8 diff --git a/solum/cmd/worker.py b/solum/cmd/worker.py index bdf0befdc..b317095ab 100644 --- a/solum/cmd/worker.py +++ b/solum/cmd/worker.py @@ -31,7 +31,16 @@ from solum.worker.handlers import shell as shell_handler LOG = logging.getLogger(__name__) +cli_opts = [ + cfg.IntOpt('run-container-cmd-as', metavar='UID', default=65533, + help='Run commands in containers as the user assigned ' + 'with the UID, which can be used to constrain resource, ' + 'e.g. disk usage, on a worker host.'), +] + + def main(): + cfg.CONF.register_cli_opts(cli_opts) cfg.CONF(sys.argv[1:], project='solum') logging.setup('solum') solum.TLS.trace = trace_data.TraceData() diff --git a/solum/common/clients.py b/solum/common/clients.py index 184a9484a..23a776e38 100644 --- a/solum/common/clients.py +++ b/solum/common/clients.py @@ -326,7 +326,6 @@ class OpenStackClients(object): def swift(self): # Not caching swift connections because of range requests # Check how glance_store uses swift client for a reference - endpoint_type = get_client_option('swift', 'endpoint_type') region_name = get_client_option('swift', 'region_name') args = { diff --git a/solum/deployer/handlers/heat.py b/solum/deployer/handlers/heat.py index 48b332b75..d5be86b01 100644 --- a/solum/deployer/handlers/heat.py +++ b/solum/deployer/handlers/heat.py @@ -82,7 +82,7 @@ def list_opts(): cfg.CONF.register_opts(SERVICE_OPTS, group='deployer') cfg.CONF.import_opt('image_format', 'solum.api.handlers.assembly_handler', group='api') -cfg.CONF.import_group('worker', 'solum.worker.handlers.shell') +cfg.CONF.import_opt('image_storage', 'solum.worker.config', group='worker') deployer_log_dir = cfg.CONF.deployer.deployer_log_dir diff --git a/solum/uploaders/tenant_logger.py b/solum/uploaders/tenant_logger.py index 70e5b0410..a87df6bd5 100644 --- a/solum/uploaders/tenant_logger.py +++ b/solum/uploaders/tenant_logger.py @@ -23,8 +23,8 @@ from solum.openstack.common import log as openstack_logger import solum.uploaders.local as local_uploader import solum.uploaders.swift as swift_uploader -cfg.CONF.import_group('worker', 'solum.worker.handlers.shell') - +cfg.CONF.import_opt('log_upload_strategy', 'solum.worker.config', + group='worker') LOG = openstack_logger.getLogger(__name__) diff --git a/solum/worker/config.py b/solum/worker/config.py index de7e07e3b..145f7e73a 100644 --- a/solum/worker/config.py +++ b/solum/worker/config.py @@ -89,6 +89,13 @@ SERVICE_OPTS = [ cfg.StrOpt('lp_operator_tenant_name', default="demo", help='LP operator tenant name.'), + cfg.StrOpt('docker_daemon_url', + default="unix://var/run/docker.sock", + help='docker daemon url.'), + cfg.IntOpt('container_mem_limit', default=0, + help='max memory a container can consume. No limit by default'), + cfg.IntOpt('docker_build_timeout', default=1800, + help='max time a docker build can take. Default: 30 minutes'), ] opt_group = cfg.OptGroup( diff --git a/solum/worker/lp_handlers/__init__.py b/solum/worker/lp_handlers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/solum/worker/lp_handlers/base.py b/solum/worker/lp_handlers/base.py new file mode 100644 index 000000000..4112d6fba --- /dev/null +++ b/solum/worker/lp_handlers/base.py @@ -0,0 +1,320 @@ +# Copyright 2015 - Rackspace Hosting +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base LP handler for building apps""" + +import errno +import io +import json +import logging +import os +import random +import string +import time + +import docker +from docker import errors +from oslo_config import cfg +from requests.packages.urllib3 import exceptions as req_exp + +from solum.common import exception as exc +from solum.common import solum_swiftclient +from solum.openstack.common import log as solum_log +from solum.uploaders import tenant_logger +from solum.worker.lp_handlers import utils + +from swiftclient import exceptions as swiftexp + + +LOG = solum_log.getLogger(__name__) + +cfg.CONF.import_opt('task_log_dir', 'solum.worker.config', group='worker') +cfg.CONF.import_opt('docker_daemon_url', 'solum.worker.config', group='worker') +cfg.CONF.import_opt('docker_build_timeout', 'solum.worker.config', + group='worker') +cfg.CONF.import_opt('container_mem_limit', 'solum.worker.config', + group='worker') +log_dir = cfg.CONF.worker.task_log_dir +docker_daemon_url = cfg.CONF.worker.docker_daemon_url +build_timeout = cfg.CONF.worker.docker_build_timeout +mem_limit = cfg.CONF.worker.container_mem_limit + +MAX_GIT_CLONE_RETRY = 5 +GIT_CLONE_TIMEOUT = 900 # 15 minutes +cloner_gid = os.getgid() + + +class BaseHandler(object): + + def __init__(self, context, assembly, image_storage): + self.context = context + self.assembly = assembly + self.image_storage = image_storage + self._docker = None + self.docker_cmd_uid = cfg.CONF.run_container_cmd_as + self.cloner_image = None + self.images = list() + self.containers = list() + self.work_dir = None + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, exc_traceback): + self.close() + + @property + def docker(self): + if self._docker is None: + self._docker = docker.Client(base_url=docker_daemon_url) + return self._docker + + def _get_tenant_logger(self, stage): + return tenant_logger.TenantLogger(self.context, + self.assembly, log_dir, stage) + + def close(self): + for ct in self.containers: + if ct: + try: + self.docker.remove_container(container=ct.get('Id')) + except (errors.DockerException, errors.APIError) as e: + LOG.warning('Failed to remove container %s, %s' % + (ct.get('Id'), str(e))) + + for img in self.images: + try: + self.docker.remove_image(image=img, force=True) + except (errors.DockerException, errors.APIError) as e: + LOG.warning('Failed to remove docker image %s, %s' % + (img, str(e))) + + if self.work_dir: + self._remove_cloned_repo(self.work_dir) + try: + utils.rm_tree(self.work_dir) + except OSError as e: + if e.errno != errno.ENOENT: + LOG.critical('critical: cannot remove dir %s,' + ' disk may be full.' % self.work_dir) + + if self.cloner_image: + try: + self.docker.remove_image(image=self.cloner_image, force=True) + except (errors.DockerException, errors.APIError) as e: + LOG.error('Error in removing docker image %s, %s' % + (self.cloner_image, str(e))) + + def _validate_pub_repo(self, repo_url): + pass + + @utils.retry + def _remove_cloned_repo(self, destination): + if not os.path.exists(destination): + return 0 + + result = 1 + try: + ct = self.docker.create_container( + image=self.cloner_image, user=str(self.docker_cmd_uid), + command=['rm', '-rf', '/tmp/code']) + self.docker.start(container=ct.get('Id'), + binds={destination: '/tmp'}) + result = self.docker.wait(container=ct.get('Id')) + self.docker.remove_container(container=ct.get('Id')) + except (errors.DockerException, errors.APIError) as e: + clone_dir = '{}/code'.format(destination) + LOG.error('Error in remove cloned repo %s, %s' % + (clone_dir, str(e))) + + return result + + def _clone_repo(self, repo_url, destination, logger, revision='master'): + # Clone a repo with the constraints of disk and memory usage + # Need to consider limiting network bandwidth as well. + container_dest = '/tmp/code' + if utils.is_git_sha(revision): + clone_cmd = ('git clone {url} {dst} &&' + ' cd {dst} &&' + ' git checkout -B solum {rev} &&' + ' echo sha=$(git log -1 --pretty=%H)').format( + url=repo_url, dst=container_dest, rev=revision) + else: + clone_cmd = ('git clone -b {branch} --depth 1 {url} {dst} &&' + ' cd {dst} &&' + ' echo sha=$(git log -1 --pretty=%H)').format( + branch=revision, url=repo_url, dst=container_dest) + + timeout_clone = 'timeout --signal=SIGKILL {t} {clone}'.format( + t=GIT_CLONE_TIMEOUT, clone=clone_cmd) + + dockerfile = ('FROM solum/cloner\n' + 'RUN groupadd -f -g {gid} s-cloner-group\n' + 'RUN useradd -s /bin/bash -u {uid} -g {gid} s-cloner\n' + 'USER s-cloner\n' + 'CMD {cmd}').format(uid=self.docker_cmd_uid, + gid=cloner_gid, + cmd=timeout_clone) + + ranid = ''.join(random.choice(string.digits) for _ in range(5)) + self.cloner_image = '{}-cloner-{}'.format(self.docker_cmd_uid, ranid) + try: + self._docker_build_with_retry( + self.cloner_image, logger, pull=False, + fileobj=io.BytesIO(dockerfile.encode('utf-8'))) + ct = self.docker.create_container(image=self.cloner_image, + mem_limit=mem_limit, + memswap_limit=-1) + except (errors.DockerException, errors.APIError) as e: + logger.log(logging.ERROR, 'Pre git clone stage failed.') + LOG.error('Error in building/creating container for cloning,' + ' assembly: %s, %s' % (self.assembly.uuid, str(e))) + return + + head_sha = None + for i in range(MAX_GIT_CLONE_RETRY): + # retry cloning + try: + self.docker.start(container=ct.get('Id'), + binds={destination: '/tmp'}) + for line in self.docker.attach(container=ct.get('Id'), + stream=True): + if line.startswith('sha='): + head_sha = line.replace('sha=', '').strip() + else: + logger.log(logging.INFO, line) + except (errors.DockerException, errors.APIError) as e: + logger.log(logging.ERROR, 'Got an error in cloning the repo,' + ' max retry %s times. Repo: %s,' + ' revision: %s' % + (MAX_GIT_CLONE_RETRY, repo_url, revision)) + LOG.error('Error in cloning. assembly: %s, repo: %s,' + ' rev: %s, %s' % + (self.assembly.uuid, repo_url, revision, str(e))) + if head_sha: + logger.log(logging.INFO, 'Finished cloning repo.') + break + elif i < MAX_GIT_CLONE_RETRY - 1: + clone_dir = '{}/code'.format(destination) + res = self._remove_cloned_repo(destination) + if res != 0: + LOG.critical('critical: cannot remove dir %s,' + ' disk may be full.' % clone_dir) + time.sleep(3) + + try: + self.docker.remove_container(container=ct.get('Id')) + except (errors.DockerException, errors.APIError): + pass + + return head_sha + + def _docker_build(self, tag, logger, timeout, limits, path=None, + dockerfile=None, fileobj=None, forcerm=True, quiet=True, + nocache=False, pull=True): + success = 1 + try: + for l in self.docker.build(path=path, dockerfile=dockerfile, + fileobj=fileobj, tag=tag, + timeout=timeout, forcerm=forcerm, + quiet=quiet, nocache=nocache, pull=pull, + container_limits=limits): + try: + info = json.loads(l).get('stream', '') + if info: + if 'successfully built' in info.lower(): + success = 0 + else: + err = json.loads(l).get('errorDetail', '') + if err: + logger.log(logging.ERROR, err) + except ValueError: + pass + except req_exp.ReadTimeoutError: + logger.log(logging.ERROR, 'docker build timed out, max value: %s' % + timeout) + except (errors.DockerException, errors.APIError) as e: + LOG.error('Error in building docker image %s, assembly: %s, %s' % + (tag, self.assembly.uuid, str(e))) + return success + + def _docker_build_with_retry(self, tag, logger, path=None, dockerfile=None, + fileobj=None, forcerm=True, quiet=True, + limits=None, pull=True, + timeout=build_timeout): + limits = limits or {'memory': mem_limit, 'memswap': -1} + + result = self._docker_build(tag, logger, timeout, limits, path=path, + dockerfile=dockerfile, fileobj=fileobj, + forcerm=forcerm, quiet=quiet, pull=pull, + nocache=False) + if result == 0: + return 0 + + time.sleep(2) + result = self._docker_build(tag, logger, timeout, limits, path=path, + dockerfile=dockerfile, fileobj=fileobj, + forcerm=forcerm, quiet=quiet, pull=pull, + nocache=True) + + return result + + @utils.retry + def _docker_save(self, image, output): + result = 1 + try: + lp = self.docker.get_image(image) + with open(output, 'w') as f: + f.write(lp.data) + result = 0 + except (OSError, errors.DockerException, errors.APIError) as e: + LOG.error('Error saving docker image, %s' % str(e)) + return result + + @utils.retry + def _docker_load(self, path): + result = 1 + try: + with open(path, 'rb') as f: + self.docker.load_image(f) + result = 0 + except (OSError, errors.DockerException, errors.APIError) as e: + LOG.error('Error in loading docker image, %s' % str(e)) + return result + + def _persist_to_backend(self, local_file, swift_container, swift_obj, + logger): + loc = None + if (self.image_storage == 'glance' or + self.image_storage == 'docker_registry'): + return loc + elif self.image_storage == 'swift': + swift = solum_swiftclient.SwiftClient(self.context) + try: + swift.upload(local_file, swift_container, swift_obj) + loc = swift_obj + except exc.InvalidObjectSizeError: + logger.log(logging.INFO, 'Image with size exceeding 5GB' + ' is not supported') + except swiftexp.ClientException as e: + LOG.error('Error in persisting artifact to swift, %s' % str(e)) + return loc + + def unittest_app(self, *args): + """Interface to implement in derived class.""" + pass + + def build_app(self, *args): + """Interface to implement in derived class.""" + pass diff --git a/solum/worker/lp_handlers/default.py b/solum/worker/lp_handlers/default.py new file mode 100644 index 000000000..ea7b12d0a --- /dev/null +++ b/solum/worker/lp_handlers/default.py @@ -0,0 +1,115 @@ +# Copyright 2015 - Rackspace Hosting +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""LP handler for building apps running on solum language packs""" + +import logging +import os +import random +import string + +from oslo_config import cfg + +from solum.common import clients +from solum.openstack.common import log as solum_log +from solum.worker.lp_handlers import base +from solum.worker.lp_handlers import utils + + +LOG = solum_log.getLogger(__name__) + +cfg.CONF.import_opt('container_mem_limit', 'solum.worker.config', + group='worker') +mem_limit = cfg.CONF.worker.container_mem_limit +UNITTEST_TIMEOUT = 1800 # 30 minutes + + +class DockerHandler(base.BaseHandler): + + def __init__(self, context, assembly, lp_type, image_storage): + super(DockerHandler, self).__init__(context, assembly, image_storage) + + self.lp_type = lp_type + self.lp = None + if self.image_storage == 'glance': + self.glance = clients.OpenStackClients(context).glance() + + def _download_lp(self, lp_obj_name, lp_img_tag, logger): + # TODO(james_li): try cache before downloading from origin + pass + + def build_lp(self, lp_name, git_info): + logger = self._get_tenant_logger('language_pack') + tenant = self.context.tenant + ts = utils.timestamp() + ranid = (''.join(random.choice(string.ascii_uppercase) + for _ in range(20))) + self.work_dir = '/tmp/lps/{tenant}/{id}'.format(tenant=tenant, + id=ranid) + try: + os.makedirs(self.work_dir) + os.chmod(self.work_dir, 0o774) + except OSError as e: + LOG.error('Error creating working dir %s, %s' % + (self.work_dir, str(e))) + logger.log(logging.ERROR, 'Building LP preparation failed.') + logger.upload() + return + + revision = git_info.get('revision', 'master') + head_sha = self._clone_repo(git_info['source_url'], self.work_dir, + logger, revision=revision) + if not head_sha: + logger.log(logging.ERROR, 'Failed cloning LP repo %s.' % + git_info['source_url']) + logger.upload() + return + + storage_obj_name = '{name}-{ts}-{sha}'.format(name=lp_name, ts=ts, + sha=head_sha) + lp_image_tag = '{tenant}-{obj}'.format(tenant=tenant, + obj=storage_obj_name) + dockerfile = '{}/code'.format(self.work_dir) + + logger.log(logging.INFO, 'Start building LP...') + result = self._docker_build_with_retry(lp_image_tag, logger, + path=dockerfile) + if result != 0: + logger.log(logging.ERROR, 'Failed building LP image.') + logger.upload() + return + + lp_file = '{}/{}'.format(self.work_dir, storage_obj_name) + result = self._docker_save(lp_image_tag, lp_file) + if result != 0: + logger.log(logging.ERROR, 'Failed saving LP image.') + logger.upload() + return + + image_loc = self._persist_to_backend(lp_file, 'solum_lp', + storage_obj_name, logger) + if image_loc is None: + logger.log(logging.ERROR, 'Failed persisting LP to backend.') + logger.upload() + return + else: + logger.log(logging.INFO, 'Successfully created LP image.') + logger.upload() + return (image_loc, lp_image_tag) + + def unittest_app(self, *args): + pass + + def build_app(self, *args): + pass diff --git a/solum/worker/lp_handlers/heroku.py b/solum/worker/lp_handlers/heroku.py new file mode 100644 index 000000000..89a7cbf51 --- /dev/null +++ b/solum/worker/lp_handlers/heroku.py @@ -0,0 +1,29 @@ +# Copyright 2015 - Rackspace Hosting +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""LP handler for building apps running on cedarish build packs""" + +from solum.worker.lp_handlers import base + + +class HerokuHandler(base.BaseHandler): + + def __init__(self, context, assembly): + super(HerokuHandler, self).__init__(context, assembly) + + def unittest_app(self, *args): + pass + + def build_app(self, *args): + pass diff --git a/solum/worker/lp_handlers/utils.py b/solum/worker/lp_handlers/utils.py new file mode 100644 index 000000000..b778d2829 --- /dev/null +++ b/solum/worker/lp_handlers/utils.py @@ -0,0 +1,70 @@ +# Copyright 2015 - Rackspace Hosting +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import functools +import re +import shutil +import subprocess +import time +import types + + +def retry(fun): + """Decorator to retry a call.""" + @functools.wraps(fun) + def _wrapper(*args, **kwargs): + max_retries = kwargs.pop('max_retries', 2) + expected = kwargs.pop('expected_ret_code', 0) + + actual_ret = None + for tries in range(max_retries): + try: + actual_ret = fun(*args, **kwargs) + if expected == actual_ret: + return actual_ret + except Exception: + if tries + 1 >= max_retries: + raise + + time.sleep(1) + return actual_ret + return _wrapper + + +def is_git_sha(revision): + return re.match(r'^([a-f0-9]{40})$', revision) + + +def timestamp(): + return datetime.datetime.utcnow().strftime('%Y%m%dt%H%M%S%f') + + +def rm_tree(directory): + shutil.rmtree(directory) + + +class Shell(object): + """Helper methods to run shell commands.""" + + @classmethod + def run(cls, args, timeout=None): + if timeout: + prefix = ['timeout', '--signal=SIGKILL', str(timeout)] + if isinstance(args, types.StringTypes): + args = prefix.append(args) + elif isinstance(args, list): + args = prefix.extend(args) + + return subprocess.call(args)