storlets/Engine/swift/storlet_gateway/storlet_docker_gateway.py

555 lines
22 KiB
Python

'''-------------------------------------------------------------------------
Copyright IBM Corp. 2015, 2015 All Rights Reserved
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
Limitations under the License.
-------------------------------------------------------------------------'''
'''
Created on Mar 24, 2015
@author: Yosef Moatti
'''
import os
import select
import shutil
from eventlet import Timeout
from storlet_middleware.storlet_common import StorletGatewayBase
from storlet_runtime import RunTimePaths
from storlet_runtime import RunTimeSandbox
from storlet_runtime import StorletInvocationGETProtocol
from storlet_runtime import StorletInvocationPUTProtocol
from storlet_runtime import StorletInvocationSLOProtocol
from swift.common.internal_client import InternalClient as ic
from swift.common.swob import Request
from swift.common.utils import config_true_value
'''---------------------------------------------------------------------------
The Storlet Gateway API
The API is made of:
(1) The classes StorletGETRequest, StorletPUTRequest. These encapsulate
what goes in and comes out of the gateway. Both share a mutual parent:
DockerStorletRequest
(2) The StorletGateway is the Docker flavor of the StorletGateway API:
validateStorletUpload
authorizeStorletExecution
augmentStorletRequest
gatewayProxyPutFlow
gatewayProxyGetFlow
(3) parse_gateway_conf parses the docker gateway specific configuration. While
it is part of the API, it is implemented as a static method as the parsing
of the configuration takes place before the StorletGateway is instantiated
---------------------------------------------------------------------------'''
class DockerStorletRequest(object):
'''The StorletRequest class represents a request to be processed by the
storlet the request is derived from the Swift request and
essentially consists of:
1. A data stream to be processed
2. Metadata identifying the stream
'''
def user_metadata(self, headers):
metadata = {}
for key in headers:
if (key.startswith('X-Storlet') or
key.startswith('X-Object-Meta-Storlet')):
pass
elif (key.startswith('X-Object-Meta-') or
key.startswith('X-Object-Meta-'.lower())):
short_key = key[len('X-Object-Meta-'):]
metadata[short_key] = headers[key]
return metadata
def _getInitialRequest(self):
return self.request
def __init__(self, account, request, params):
self.generate_log = request.headers.get('X-Storlet-Generate-Log',
False)
self.storlet_id = request.headers.get('X-Object-Meta-Storlet-Main')
self.user_metadata = self.user_metadata(request.headers)
self.params = params
self.account = account
self.request = request
pass
class StorletGETRequest(DockerStorletRequest):
def __init__(self, account, orig_resp, params):
DockerStorletRequest.__init__(self, account, orig_resp, params)
self.stream = orig_resp.app_iter._fp.fileno()
class StorletPUTRequest(DockerStorletRequest):
def __init__(self, account, request):
DockerStorletRequest.__init__(self, account, request, request.params)
self.stream = request.environ['wsgi.input'].read
return
class StorletSLORequest(DockerStorletRequest):
def __init__(self, account, orig_resp, params):
DockerStorletRequest.__init__(self, account, orig_resp, params)
self.stream = orig_resp.app_iter
return
class StorletGatewayDocker(StorletGatewayBase):
def __init__(self, sconf, logger, app, version, account, container,
obj):
self.logger = logger
self.app = app
self.version = version
self.account = account
self.container = container
self.obj = obj
self.sconf = sconf
self.storlet_metadata = None
self.storlet_timeout = int(self.sconf['storlet_timeout'])
self.paths = RunTimePaths(account, sconf)
class IterLike(object):
def __init__(self, obj_data, timeout, cancel_func):
self.closed = False
self.obj_data = obj_data
self.timeout = timeout
self.cancel_func = cancel_func
def __iter__(self):
return self
def read_with_timeout(self, size):
timeout = Timeout(self.timeout)
try:
chunk = os.read(self.obj_data, size)
except Timeout as t:
if t is timeout:
if self.cancel_func:
self.cancel_func()
self.close()
raise t
except Exception as e:
self.close()
raise e
finally:
timeout.cancel()
return chunk
def next(self, size=1024):
chunk = None
r, w, e = select.select([self.obj_data], [], [], self.timeout)
if len(r) == 0:
self.close()
if self.obj_data in r:
chunk = self.read_with_timeout(size)
if chunk == '':
raise StopIteration('Stopped iterator ex')
else:
return chunk
raise StopIteration('Stopped iterator ex')
def read(self, size=1024):
return self.next(size)
def readline(self, size=-1):
return ''
def readlines(self, sizehint=-1):
pass
def close(self):
if self.closed is True:
return
self.closed = True
os.close(self.obj_data)
def __del__(self):
self.close()
def validateStorletUpload(self, req):
if (self.container == self.sconf['storlet_container']):
if (self.obj.find('-') < 0 or self.obj.find('.') < 0):
return 'Storlet name is incorrect'
ret = self._validate_mandatory_headers(req)
if ret:
return ret
return False
def authorizeStorletExecution(self, req):
res, headers = self.verify_access(req.environ,
self.version,
self.account,
self.sconf['storlet_container'],
req.headers['X-Run-Storlet'])
if not res:
return False
# keep the storlets headers for later use.
self.storlet_metadata = headers
return True
def augmentStorletRequest(self, req):
if self.storlet_metadata:
self._fix_request_headers(req)
def gatewayProxyPutFlow(self, orig_req, container, obj):
sreq = StorletPUTRequest(self.account, orig_req)
req = sreq._getInitialRequest()
self.idata = self._get_storlet_invocation_data(req)
run_time_sbox = RunTimeSandbox(self.account, self.sconf, self.logger)
docker_updated = self.update_docker_container_from_cache()
run_time_sbox.activate_storlet_daemon(self.idata,
docker_updated)
self._add_system_params(req.params)
# Clean all Storlet stuff from the request headers
# we do not need them anymore, and they
# may interfere with the rest of the execution.
self._clean_storlet_stuff_from_request(req.headers)
req.headers.pop('X-Run-Storlet')
slog_path = self. \
paths.slog_path(self.idata['storlet_main_class'])
storlet_pipe_path = self. \
paths.host_storlet_pipe(self.idata['storlet_main_class'])
sprotocol = StorletInvocationPUTProtocol(sreq,
storlet_pipe_path,
slog_path,
self.storlet_timeout)
out_md, self.data_read_fd = sprotocol.communicate()
self._set_metadata_in_headers(req.headers, out_md)
self._upload_storlet_logs(slog_path)
return out_md, StorletGatewayDocker.IterLike(self.data_read_fd,
self.storlet_timeout,
sprotocol._cancel)
def gatewayProxyGETFlow(self, req, container, obj, orig_resp):
# Flow for running the GET computation on the proxy
sreq = StorletSLORequest(self.account, orig_resp, req.params)
self.idata = self._get_storlet_invocation_data(req)
run_time_sbox = RunTimeSandbox(self.account, self.sconf, self.logger)
docker_updated = self.update_docker_container_from_cache()
run_time_sbox.activate_storlet_daemon(self.idata,
docker_updated)
self._add_system_params(req.params)
slog_path = self. \
paths.slog_path(self.idata['storlet_main_class'])
storlet_pipe_path = self. \
paths.host_storlet_pipe(self.idata['storlet_main_class'])
sprotocol = StorletInvocationSLOProtocol(sreq,
storlet_pipe_path,
slog_path,
self.storlet_timeout)
out_md, self.data_read_fd = sprotocol.communicate()
self._set_metadata_in_headers(orig_resp.headers, out_md)
self._upload_storlet_logs(slog_path)
return out_md, StorletGatewayDocker.IterLike(self.data_read_fd,
self.storlet_timeout,
sprotocol._cancel)
def gatewayObjectGetFlow(self, req, container, obj, orig_resp):
sreq = StorletGETRequest(self.account, orig_resp, req.params)
self.idata = self._get_storlet_invocation_data(req)
run_time_sbox = RunTimeSandbox(self.account, self.sconf, self.logger)
docker_updated = self.update_docker_container_from_cache()
run_time_sbox.activate_storlet_daemon(self.idata,
docker_updated)
self._add_system_params(req.params)
slog_path = self. \
paths.slog_path(self.idata['storlet_main_class'])
storlet_pipe_path = self.paths. \
host_storlet_pipe(self.idata['storlet_main_class'])
sprotocol = StorletInvocationGETProtocol(sreq, storlet_pipe_path,
slog_path,
self.storlet_timeout)
out_md, self.data_read_fd = sprotocol.communicate()
orig_resp = sreq._getInitialRequest()
self._set_metadata_in_headers(orig_resp.headers, out_md)
self._upload_storlet_logs(slog_path)
return out_md, StorletGatewayDocker.IterLike(self.data_read_fd,
self.storlet_timeout,
sprotocol._cancel)
def verify_access(self, env, version, account, container, object):
self.logger.info('Verify access to {0}/{1}/{2}'.format(account,
container,
object))
new_env = dict(env)
if 'HTTP_TRANSFER_ENCODING' in new_env.keys():
del new_env['HTTP_TRANSFER_ENCODING']
new_env['REQUEST_METHOD'] = 'HEAD'
new_env['swift.source'] = 'SE'
new_env['PATH_INFO'] = os.path.join('/' + version, account,
container, object)
new_env['RAW_PATH_INFO'] = os.path.join('/' + version, account,
container, object)
storlet_req = Request.blank(new_env['PATH_INFO'], new_env)
resp = storlet_req.get_response(self.app)
if resp.status_int < 300 and resp.status_int >= 200:
return True, resp.headers
return False, []
def _validate_mandatory_headers(self, req):
mandatory_md = None
if self.container in [self.sconf['storlet_container']]:
self.logger.info('PUT method for storlet dependency. Sanity check')
mandatory_md = ['X-Object-Meta-Storlet-Language',
'X-Object-Meta-Storlet-Interface-Version',
'X-Object-Meta-Storlet-Dependency',
'X-Object-Meta-Storlet-Object-Metadata',
'X-Object-Meta-Storlet-Main']
elif self.container in [self.sconf['storlet_dependency']]:
self.logger.info('PUT method for storlet container. Sanity check')
mandatory_md = ['X-Object-Meta-Storlet-Dependency-Version']
if mandatory_md is not None:
for md in mandatory_md:
if md not in req.headers:
self.logger.info('Mandatory header ' +
'is missing: {0}'.format(md))
return 'Mandatory header is missing: {0}'.format(md)
return None
def _fix_request_headers(self, req):
# add to request the storlet metadata to be used in case the request
# is forwarded to the data node (GET case)
for key, val in self.storlet_metadata.iteritems():
if key.startswith('X-Object-Meta-Storlet'):
req.headers[key] = val
elif key in ['X-Timestamp', 'Content-Length']:
req.headers['X-Storlet-' + key] = val
def _add_system_params(self, params):
'''Adds Storlet engine specific parameters to the invocation
currently, this consists only of the execution path of the
Storlet within the Docker container.
'''
params['storlet_execution_path'] = self. \
paths.sbox_storlet_exec(self.idata['storlet_main_class'])
def _clean_storlet_stuff_from_request(self, headers):
for key in headers:
if (key.startswith('X-Storlet') or
key.startswith('X-Object-Meta-Storlet')):
del headers[key]
return headers
def _get_storlet_invocation_data(self, req):
data = dict()
data['storlet_name'] = req.headers.get('X-Run-Storlet')
data['generate_log'] = req.headers.get('X-Storlet-Generate-Log', False)
data['storlet_original_timestamp'] = req.headers. \
get('X-Storlet-X-Timestamp')
data['storlet_original_size'] = req.headers. \
get('X-Storlet-Content-Length')
data['storlet_md'] = {'storlet_original_timestamp':
data['storlet_original_timestamp'],
'storlet_original_size':
data['storlet_original_size']}
data['storlet_main_class'] = req.headers. \
get('X-Object-Meta-Storlet-Main')
scope = self.account
data['scope'] = scope
if data['scope'].rfind(':') > 0:
data['scope'] = data['scope'][:data['scope'].rfind(':')]
data['storlet_dependency'] = req.headers. \
get('X-Object-Meta-Storlet-Dependency')
data['request_params'] = req.params
return data
def _set_metadata_in_headers(self, headers, md):
if md:
for key, val in md.iteritems():
headers['X-Object-Meta-%s' % key] = val
def _upload_storlet_logs(self, slog_path):
if (config_true_value(self.idata['generate_log'])):
logfile = open(slog_path, 'r')
client = ic('/etc/swift/storlet-proxy-server.conf', 'SA', 1)
try:
headers = dict()
headers['CONTENT_TYPE'] = 'text/html'
log_obj_name = '%s.log' % \
self.idata['storlet_name'][:self.idata['storlet_name'].
find('-')]
client.upload_object(logfile, self.account,
self.sconf['storlet_logcontainer'],
log_obj_name, headers)
except Exception as e:
raise e
def bring_from_cache(self, obj_name, is_storlet):
'''Auxiliary function that:
(1) Brings from Swift obj_name, whether this is a
storlet or a storlet dependency.
(2) Copies from local cache into the Docker conrainer
If this is a Storlet then also validates that the cache is updated
with most recent copy of the Storlet compared to the copy residing in
Swift.
Retunrs wheather the Docker container was updated with obj_name
'''
# Determine the cache we are to work with
# e.g. dependency or storlet
if not is_storlet:
cache_dir = self.paths.get_host_dependency_cache_dir()
swift_source_container = self.paths.storlet_dependency
else:
cache_dir = self.paths.get_host_storlet_cache_dir()
swift_source_container = self.paths.storlet_container
if not os.path.exists(cache_dir):
os.makedirs(cache_dir, 0o755)
# cache_target_path is the actual object we need to deal with
# e.g. a concrete storlet or dependency we need to bring/update
cache_target_path = os.path.join(cache_dir, obj_name)
# Determine if we need to update the cache for cache_target_path
# We default for no
update_cache = False
# If it does not exist in cache, we obviously need to bring
if not os.path.isfile(cache_target_path):
update_cache = True
elif is_storlet:
# The cache_target_path exists, we test if it is up-to-date
# with the metadata we got.
# We mention that this is currenlty applicable for storlets
# only, and not for dependencies.
# This will change when we will head dependencies as well
storlet_md = self.idata['storlet_md']
fstat = os.stat(cache_target_path)
storlet_or_size = long(storlet_md['storlet_original_size'])
storlet_or_time = float(storlet_md['storlet_original_timestamp'])
b_storlet_size_changed = fstat.st_size != storlet_or_size
b_storlet_file_updated = float(fstat.st_mtime) < storlet_or_time
if b_storlet_size_changed or b_storlet_file_updated:
update_cache = True
expected_perm = ''
if update_cache:
# If the cache needs to be updated, then get on with it
# bring the object from Swift using ic
client = ic('/etc/swift/storlet-proxy-server.conf', 'SA', 1)
path = client.make_path(self.account, swift_source_container,
obj_name)
self.logger.debug('Invoking ic on path %s' % path)
resp = client.make_request('GET', path, {'PATH_INFO': path}, [200])
fn = open(cache_target_path, 'w')
fn.write(resp.body)
fn.close()
if not is_storlet:
expected_perm = resp.headers. \
get('X-Object-Meta-Storlet-Dependency-Permissions', '')
if expected_perm != '':
os.chmod(cache_target_path, int(expected_perm, 8))
# The node's local cache is now updated.
# We now verify if we need to update the
# Docker container itself.
# The Docker container needs to be updated if:
# 1. The Docker container does not hold a copy of the object
# 2. The Docker container holds an older version of the object
update_docker = False
docker_storlet_path = self.paths. \
host_storlet(self.idata['storlet_main_class'])
docker_target_path = os.path.join(docker_storlet_path, obj_name)
if not os.path.exists(docker_storlet_path):
os.makedirs(docker_storlet_path, 0o755)
update_docker = True
elif not os.path.isfile(docker_target_path):
update_docker = True
else:
fstat_cached_object = os.stat(cache_target_path)
fstat_docker_object = os.stat(docker_target_path)
b_size_changed = fstat_cached_object.st_size \
!= fstat_docker_object.st_size
b_time_changed = float(fstat_cached_object.st_mtime) < \
float(fstat_docker_object.st_mtime)
if (b_size_changed or b_time_changed):
update_docker = True
if update_docker:
# need to copy from cache to docker
# copy2 also copies the permissions
shutil.copy2(cache_target_path, docker_target_path)
return update_docker
def update_docker_container_from_cache(self):
'''Iterates over the storlet name and its dependencies appearing
in the invocation data and make sure they are brought to the
local cache, and from there to the Docker container.
Uses the bring_from_cache auxiliary function.
Returns True if the Docker container was updated
'''
# where at the host side, reside the storlet containers
storlet_path = self.paths.host_storlet_prefix()
if not os.path.exists(storlet_path):
os.makedirs(storlet_path, 0o755)
# Iterate over storlet and dependencies, and make sure
# they are updated within the Docker container.
# return True if any of them wea actually
# updated within the Docker container
docker_updated = False
updated = self.bring_from_cache(self.idata['storlet_name'],
True)
docker_updated = docker_updated or updated
if self.idata['storlet_dependency']:
for dep in self.idata['storlet_dependency'].split(','):
updated = self.bring_from_cache(dep, False)
docker_updated = docker_updated or updated
return docker_updated
def validate_conf(middleware_conf):
mandatory = ['storlet_logcontainer', 'lxc_root', 'cache_dir',
'log_dir', 'script_dir', 'storlets_dir', 'pipes_dir',
'docker_repo', 'restart_linux_container_timeout']
for key in mandatory:
if key not in mandatory:
raise Exception("Key {} is missing in configuration".format(key))