Convert bash scripts to Python

This patch sets up a LP handler framework to accommodate
build-lp, unittest-app and build-app functionalities.

Additionally, the patch also implements isolation improvements in
building an application container on worker node.
Specifically, the git clone command for cloning application code
is run inside a docker container. This helps in constraining
the resources of memory, CPU, disk and network bandwidth
to prevent Solum from the DoS attacks of a malicious git server.

Partially-fixes-bug: #1302552
Change-Id: Ie0f700c0d4bd74e24ac1a9727f4b17907f88cca1
This commit is contained in:
James Li 2015-05-07 22:29:58 +00:00
parent 9920039d78
commit 24649a7e24
11 changed files with 554 additions and 5 deletions

View File

@ -1,7 +1,7 @@
eventlet>=0.17.3
alembic>=0.6.4
Babel>=1.3
docker-py>=0.5.1
docker-py==1.2.3
httplib2>=0.7.5
iso8601>=0.1.9
kombu>=2.4.8

View File

@ -31,7 +31,16 @@ from solum.worker.handlers import shell as shell_handler
LOG = logging.getLogger(__name__)
cli_opts = [
cfg.IntOpt('run-container-cmd-as', metavar='UID', default=65533,
help='Run commands in containers as the user assigned '
'with the UID, which can be used to constrain resource, '
'e.g. disk usage, on a worker host.'),
]
def main():
cfg.CONF.register_cli_opts(cli_opts)
cfg.CONF(sys.argv[1:], project='solum')
logging.setup('solum')
solum.TLS.trace = trace_data.TraceData()

View File

@ -326,7 +326,6 @@ class OpenStackClients(object):
def swift(self):
# Not caching swift connections because of range requests
# Check how glance_store uses swift client for a reference
endpoint_type = get_client_option('swift', 'endpoint_type')
region_name = get_client_option('swift', 'region_name')
args = {

View File

@ -82,7 +82,7 @@ def list_opts():
cfg.CONF.register_opts(SERVICE_OPTS, group='deployer')
cfg.CONF.import_opt('image_format', 'solum.api.handlers.assembly_handler',
group='api')
cfg.CONF.import_group('worker', 'solum.worker.handlers.shell')
cfg.CONF.import_opt('image_storage', 'solum.worker.config', group='worker')
deployer_log_dir = cfg.CONF.deployer.deployer_log_dir

View File

@ -23,8 +23,8 @@ from solum.openstack.common import log as openstack_logger
import solum.uploaders.local as local_uploader
import solum.uploaders.swift as swift_uploader
cfg.CONF.import_group('worker', 'solum.worker.handlers.shell')
cfg.CONF.import_opt('log_upload_strategy', 'solum.worker.config',
group='worker')
LOG = openstack_logger.getLogger(__name__)

View File

@ -89,6 +89,13 @@ SERVICE_OPTS = [
cfg.StrOpt('lp_operator_tenant_name',
default="demo",
help='LP operator tenant name.'),
cfg.StrOpt('docker_daemon_url',
default="unix://var/run/docker.sock",
help='docker daemon url.'),
cfg.IntOpt('container_mem_limit', default=0,
help='max memory a container can consume. No limit by default'),
cfg.IntOpt('docker_build_timeout', default=1800,
help='max time a docker build can take. Default: 30 minutes'),
]
opt_group = cfg.OptGroup(

View File

View File

@ -0,0 +1,320 @@
# Copyright 2015 - Rackspace Hosting
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base LP handler for building apps"""
import errno
import io
import json
import logging
import os
import random
import string
import time
import docker
from docker import errors
from oslo_config import cfg
from requests.packages.urllib3 import exceptions as req_exp
from solum.common import exception as exc
from solum.common import solum_swiftclient
from solum.openstack.common import log as solum_log
from solum.uploaders import tenant_logger
from solum.worker.lp_handlers import utils
from swiftclient import exceptions as swiftexp
LOG = solum_log.getLogger(__name__)
cfg.CONF.import_opt('task_log_dir', 'solum.worker.config', group='worker')
cfg.CONF.import_opt('docker_daemon_url', 'solum.worker.config', group='worker')
cfg.CONF.import_opt('docker_build_timeout', 'solum.worker.config',
group='worker')
cfg.CONF.import_opt('container_mem_limit', 'solum.worker.config',
group='worker')
log_dir = cfg.CONF.worker.task_log_dir
docker_daemon_url = cfg.CONF.worker.docker_daemon_url
build_timeout = cfg.CONF.worker.docker_build_timeout
mem_limit = cfg.CONF.worker.container_mem_limit
MAX_GIT_CLONE_RETRY = 5
GIT_CLONE_TIMEOUT = 900 # 15 minutes
cloner_gid = os.getgid()
class BaseHandler(object):
def __init__(self, context, assembly, image_storage):
self.context = context
self.assembly = assembly
self.image_storage = image_storage
self._docker = None
self.docker_cmd_uid = cfg.CONF.run_container_cmd_as
self.cloner_image = None
self.images = list()
self.containers = list()
self.work_dir = None
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
self.close()
@property
def docker(self):
if self._docker is None:
self._docker = docker.Client(base_url=docker_daemon_url)
return self._docker
def _get_tenant_logger(self, stage):
return tenant_logger.TenantLogger(self.context,
self.assembly, log_dir, stage)
def close(self):
for ct in self.containers:
if ct:
try:
self.docker.remove_container(container=ct.get('Id'))
except (errors.DockerException, errors.APIError) as e:
LOG.warning('Failed to remove container %s, %s' %
(ct.get('Id'), str(e)))
for img in self.images:
try:
self.docker.remove_image(image=img, force=True)
except (errors.DockerException, errors.APIError) as e:
LOG.warning('Failed to remove docker image %s, %s' %
(img, str(e)))
if self.work_dir:
self._remove_cloned_repo(self.work_dir)
try:
utils.rm_tree(self.work_dir)
except OSError as e:
if e.errno != errno.ENOENT:
LOG.critical('critical: cannot remove dir %s,'
' disk may be full.' % self.work_dir)
if self.cloner_image:
try:
self.docker.remove_image(image=self.cloner_image, force=True)
except (errors.DockerException, errors.APIError) as e:
LOG.error('Error in removing docker image %s, %s' %
(self.cloner_image, str(e)))
def _validate_pub_repo(self, repo_url):
pass
@utils.retry
def _remove_cloned_repo(self, destination):
if not os.path.exists(destination):
return 0
result = 1
try:
ct = self.docker.create_container(
image=self.cloner_image, user=str(self.docker_cmd_uid),
command=['rm', '-rf', '/tmp/code'])
self.docker.start(container=ct.get('Id'),
binds={destination: '/tmp'})
result = self.docker.wait(container=ct.get('Id'))
self.docker.remove_container(container=ct.get('Id'))
except (errors.DockerException, errors.APIError) as e:
clone_dir = '{}/code'.format(destination)
LOG.error('Error in remove cloned repo %s, %s' %
(clone_dir, str(e)))
return result
def _clone_repo(self, repo_url, destination, logger, revision='master'):
# Clone a repo with the constraints of disk and memory usage
# Need to consider limiting network bandwidth as well.
container_dest = '/tmp/code'
if utils.is_git_sha(revision):
clone_cmd = ('git clone {url} {dst} &&'
' cd {dst} &&'
' git checkout -B solum {rev} &&'
' echo sha=$(git log -1 --pretty=%H)').format(
url=repo_url, dst=container_dest, rev=revision)
else:
clone_cmd = ('git clone -b {branch} --depth 1 {url} {dst} &&'
' cd {dst} &&'
' echo sha=$(git log -1 --pretty=%H)').format(
branch=revision, url=repo_url, dst=container_dest)
timeout_clone = 'timeout --signal=SIGKILL {t} {clone}'.format(
t=GIT_CLONE_TIMEOUT, clone=clone_cmd)
dockerfile = ('FROM solum/cloner\n'
'RUN groupadd -f -g {gid} s-cloner-group\n'
'RUN useradd -s /bin/bash -u {uid} -g {gid} s-cloner\n'
'USER s-cloner\n'
'CMD {cmd}').format(uid=self.docker_cmd_uid,
gid=cloner_gid,
cmd=timeout_clone)
ranid = ''.join(random.choice(string.digits) for _ in range(5))
self.cloner_image = '{}-cloner-{}'.format(self.docker_cmd_uid, ranid)
try:
self._docker_build_with_retry(
self.cloner_image, logger, pull=False,
fileobj=io.BytesIO(dockerfile.encode('utf-8')))
ct = self.docker.create_container(image=self.cloner_image,
mem_limit=mem_limit,
memswap_limit=-1)
except (errors.DockerException, errors.APIError) as e:
logger.log(logging.ERROR, 'Pre git clone stage failed.')
LOG.error('Error in building/creating container for cloning,'
' assembly: %s, %s' % (self.assembly.uuid, str(e)))
return
head_sha = None
for i in range(MAX_GIT_CLONE_RETRY):
# retry cloning
try:
self.docker.start(container=ct.get('Id'),
binds={destination: '/tmp'})
for line in self.docker.attach(container=ct.get('Id'),
stream=True):
if line.startswith('sha='):
head_sha = line.replace('sha=', '').strip()
else:
logger.log(logging.INFO, line)
except (errors.DockerException, errors.APIError) as e:
logger.log(logging.ERROR, 'Got an error in cloning the repo,'
' max retry %s times. Repo: %s,'
' revision: %s' %
(MAX_GIT_CLONE_RETRY, repo_url, revision))
LOG.error('Error in cloning. assembly: %s, repo: %s,'
' rev: %s, %s' %
(self.assembly.uuid, repo_url, revision, str(e)))
if head_sha:
logger.log(logging.INFO, 'Finished cloning repo.')
break
elif i < MAX_GIT_CLONE_RETRY - 1:
clone_dir = '{}/code'.format(destination)
res = self._remove_cloned_repo(destination)
if res != 0:
LOG.critical('critical: cannot remove dir %s,'
' disk may be full.' % clone_dir)
time.sleep(3)
try:
self.docker.remove_container(container=ct.get('Id'))
except (errors.DockerException, errors.APIError):
pass
return head_sha
def _docker_build(self, tag, logger, timeout, limits, path=None,
dockerfile=None, fileobj=None, forcerm=True, quiet=True,
nocache=False, pull=True):
success = 1
try:
for l in self.docker.build(path=path, dockerfile=dockerfile,
fileobj=fileobj, tag=tag,
timeout=timeout, forcerm=forcerm,
quiet=quiet, nocache=nocache, pull=pull,
container_limits=limits):
try:
info = json.loads(l).get('stream', '')
if info:
if 'successfully built' in info.lower():
success = 0
else:
err = json.loads(l).get('errorDetail', '')
if err:
logger.log(logging.ERROR, err)
except ValueError:
pass
except req_exp.ReadTimeoutError:
logger.log(logging.ERROR, 'docker build timed out, max value: %s' %
timeout)
except (errors.DockerException, errors.APIError) as e:
LOG.error('Error in building docker image %s, assembly: %s, %s' %
(tag, self.assembly.uuid, str(e)))
return success
def _docker_build_with_retry(self, tag, logger, path=None, dockerfile=None,
fileobj=None, forcerm=True, quiet=True,
limits=None, pull=True,
timeout=build_timeout):
limits = limits or {'memory': mem_limit, 'memswap': -1}
result = self._docker_build(tag, logger, timeout, limits, path=path,
dockerfile=dockerfile, fileobj=fileobj,
forcerm=forcerm, quiet=quiet, pull=pull,
nocache=False)
if result == 0:
return 0
time.sleep(2)
result = self._docker_build(tag, logger, timeout, limits, path=path,
dockerfile=dockerfile, fileobj=fileobj,
forcerm=forcerm, quiet=quiet, pull=pull,
nocache=True)
return result
@utils.retry
def _docker_save(self, image, output):
result = 1
try:
lp = self.docker.get_image(image)
with open(output, 'w') as f:
f.write(lp.data)
result = 0
except (OSError, errors.DockerException, errors.APIError) as e:
LOG.error('Error saving docker image, %s' % str(e))
return result
@utils.retry
def _docker_load(self, path):
result = 1
try:
with open(path, 'rb') as f:
self.docker.load_image(f)
result = 0
except (OSError, errors.DockerException, errors.APIError) as e:
LOG.error('Error in loading docker image, %s' % str(e))
return result
def _persist_to_backend(self, local_file, swift_container, swift_obj,
logger):
loc = None
if (self.image_storage == 'glance' or
self.image_storage == 'docker_registry'):
return loc
elif self.image_storage == 'swift':
swift = solum_swiftclient.SwiftClient(self.context)
try:
swift.upload(local_file, swift_container, swift_obj)
loc = swift_obj
except exc.InvalidObjectSizeError:
logger.log(logging.INFO, 'Image with size exceeding 5GB'
' is not supported')
except swiftexp.ClientException as e:
LOG.error('Error in persisting artifact to swift, %s' % str(e))
return loc
def unittest_app(self, *args):
"""Interface to implement in derived class."""
pass
def build_app(self, *args):
"""Interface to implement in derived class."""
pass

View File

@ -0,0 +1,115 @@
# Copyright 2015 - Rackspace Hosting
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""LP handler for building apps running on solum language packs"""
import logging
import os
import random
import string
from oslo_config import cfg
from solum.common import clients
from solum.openstack.common import log as solum_log
from solum.worker.lp_handlers import base
from solum.worker.lp_handlers import utils
LOG = solum_log.getLogger(__name__)
cfg.CONF.import_opt('container_mem_limit', 'solum.worker.config',
group='worker')
mem_limit = cfg.CONF.worker.container_mem_limit
UNITTEST_TIMEOUT = 1800 # 30 minutes
class DockerHandler(base.BaseHandler):
def __init__(self, context, assembly, lp_type, image_storage):
super(DockerHandler, self).__init__(context, assembly, image_storage)
self.lp_type = lp_type
self.lp = None
if self.image_storage == 'glance':
self.glance = clients.OpenStackClients(context).glance()
def _download_lp(self, lp_obj_name, lp_img_tag, logger):
# TODO(james_li): try cache before downloading from origin
pass
def build_lp(self, lp_name, git_info):
logger = self._get_tenant_logger('language_pack')
tenant = self.context.tenant
ts = utils.timestamp()
ranid = (''.join(random.choice(string.ascii_uppercase)
for _ in range(20)))
self.work_dir = '/tmp/lps/{tenant}/{id}'.format(tenant=tenant,
id=ranid)
try:
os.makedirs(self.work_dir)
os.chmod(self.work_dir, 0o774)
except OSError as e:
LOG.error('Error creating working dir %s, %s' %
(self.work_dir, str(e)))
logger.log(logging.ERROR, 'Building LP preparation failed.')
logger.upload()
return
revision = git_info.get('revision', 'master')
head_sha = self._clone_repo(git_info['source_url'], self.work_dir,
logger, revision=revision)
if not head_sha:
logger.log(logging.ERROR, 'Failed cloning LP repo %s.' %
git_info['source_url'])
logger.upload()
return
storage_obj_name = '{name}-{ts}-{sha}'.format(name=lp_name, ts=ts,
sha=head_sha)
lp_image_tag = '{tenant}-{obj}'.format(tenant=tenant,
obj=storage_obj_name)
dockerfile = '{}/code'.format(self.work_dir)
logger.log(logging.INFO, 'Start building LP...')
result = self._docker_build_with_retry(lp_image_tag, logger,
path=dockerfile)
if result != 0:
logger.log(logging.ERROR, 'Failed building LP image.')
logger.upload()
return
lp_file = '{}/{}'.format(self.work_dir, storage_obj_name)
result = self._docker_save(lp_image_tag, lp_file)
if result != 0:
logger.log(logging.ERROR, 'Failed saving LP image.')
logger.upload()
return
image_loc = self._persist_to_backend(lp_file, 'solum_lp',
storage_obj_name, logger)
if image_loc is None:
logger.log(logging.ERROR, 'Failed persisting LP to backend.')
logger.upload()
return
else:
logger.log(logging.INFO, 'Successfully created LP image.')
logger.upload()
return (image_loc, lp_image_tag)
def unittest_app(self, *args):
pass
def build_app(self, *args):
pass

View File

@ -0,0 +1,29 @@
# Copyright 2015 - Rackspace Hosting
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""LP handler for building apps running on cedarish build packs"""
from solum.worker.lp_handlers import base
class HerokuHandler(base.BaseHandler):
def __init__(self, context, assembly):
super(HerokuHandler, self).__init__(context, assembly)
def unittest_app(self, *args):
pass
def build_app(self, *args):
pass

View File

@ -0,0 +1,70 @@
# Copyright 2015 - Rackspace Hosting
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import functools
import re
import shutil
import subprocess
import time
import types
def retry(fun):
"""Decorator to retry a call."""
@functools.wraps(fun)
def _wrapper(*args, **kwargs):
max_retries = kwargs.pop('max_retries', 2)
expected = kwargs.pop('expected_ret_code', 0)
actual_ret = None
for tries in range(max_retries):
try:
actual_ret = fun(*args, **kwargs)
if expected == actual_ret:
return actual_ret
except Exception:
if tries + 1 >= max_retries:
raise
time.sleep(1)
return actual_ret
return _wrapper
def is_git_sha(revision):
return re.match(r'^([a-f0-9]{40})$', revision)
def timestamp():
return datetime.datetime.utcnow().strftime('%Y%m%dt%H%M%S%f')
def rm_tree(directory):
shutil.rmtree(directory)
class Shell(object):
"""Helper methods to run shell commands."""
@classmethod
def run(cls, args, timeout=None):
if timeout:
prefix = ['timeout', '--signal=SIGKILL', str(timeout)]
if isinstance(args, types.StringTypes):
args = prefix.append(args)
elif isinstance(args, list):
args = prefix.extend(args)
return subprocess.call(args)