swiftonhpss/bin/swiftonhpss-nstool

855 lines
34 KiB
Python
Executable File

#!/usr/bin/env python
#
# Copyright (c) 2015-2016 IBM Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import stat
import os
import multiprocessing
from pwd import getpwuid
import logging
import argparse
import swiftclient
from swiftclient.exceptions import ClientException
import requests
import getpass
from functools import wraps
from simplejson.scanner import JSONDecodeError
"""
Namespace reconciliation tool for SwiftOnHPSS.
Depends on Keystone for authentication, preferably with the Keystone HPSS
identity backend.
Has not been tested with any other authentication service.
WARNING: Do not run nstool on a regular Swift JBOD disk, or 'fun' things
will happen to your metadata.
LIMITATION: In order to save unnecessary file I/O on HPSS, it is highly
recommended to add 'object_post_as_copy=false' to the configuration of
the proxy server that will handle SwiftOnHPSS calls!
LIMITATION: Only supports the Keystone Identity V2 API
"""
# TODO: pull OpenStack details from env vars
def trace_function(f):
@wraps(f)
def wrap(*args, **kwargs):
logging.info('entering %s' % f.__name__)
logging.info('args: %s' % str(args))
logging.info('kwargs: %s' % str(kwargs))
result = f(*args, **kwargs)
logging.info('result: %s' % str(result))
logging.info('exiting %s' % f.__name__)
return result
return wrap
@trace_function
def main(program_args):
nstool = Reconciler(program_args)
if program_args.device.endswith('/'):
program_args.device = program_args.device[:-1]
# Get and check authentication
password = getpass.getpass(prompt='Keystone password for %s@admin: '
% program_args.username)
try:
admin_keystone_api, admin_token =\
nstool.auth_into_keystone(password, 'admin')
except ValueError:
logging.debug("Failed login!")
print "Authentication failed."
sys.exit(1)
del password
# Figure out what we're doing.
target_account, target_container = program_args.account,\
program_args.container
# Start doing it.
#pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()-1)
# Multiprocessing does not play nicely with the lazy loading that
# keystoneclient does, so let's not mess with it for now.
pool = None
# Doing this convolution because if we scope into a project as root@admin,
# and initialize it, the symlink always gets written to 'admin' because
# SwiftOnHPSS only gets the name of the account currently scoped into,
# even if it's not the same as the account we're modifying containers in.
# So we just cheat and make ourselves a member of every project, and
# then scope in as root into that account to initialize it.
# TODO: make this use a multiprocessing pool
# FIXME: this repeats itself, refactor to be less bad
if program_args.account == "":
for account in admin_keystone_api.list_tenants():
try:
admin_keystone_api.grant_role_on_tenant(account['name'],
program_args.username,
'_member_')
admin_keystone_api.grant_role_on_tenant(account['name'],
program_args.username,
'admin')
except KeyError:
sys.exit(1)
user_keystone_api = \
LightweightKeystoneAPI(admin_keystone_api.url,
admin_keystone_api.username,
admin_keystone_api.password,
account['name'],
admin_keystone_api.version)
user_keystone_api.authenticate()
nstool.initialize_account(keystone_api=user_keystone_api,
target_account=account['name'])
else:
try:
admin_keystone_api.grant_role_on_tenant(target_account,
program_args.username,
'_member_')
admin_keystone_api.grant_role_on_tenant(target_account,
program_args.username,
'admin')
except KeyError:
sys.exit(1)
user_keystone_api = \
LightweightKeystoneAPI(admin_keystone_api.url,
admin_keystone_api.username,
admin_keystone_api.password,
target_account,
admin_keystone_api.version)
user_keystone_api.authenticate()
nstool.initialize_account(keystone_api=user_keystone_api,
target_account=target_account)
# Handle reconciling one or all containers.
if target_container != "":
swift_api = nstool.auth_into_swift(keystone_api=admin_keystone_api,
target_account=target_account)
nstool.reconcile_container(keystone_api=admin_keystone_api,
swift_api=swift_api,
target_account=target_account,
target_container=target_container)
elif target_account != "":
nstool.reconcile_account(keystone_api=admin_keystone_api,
target_account=target_account,
pool=pool)
else:
# reconcile everything
nstool.reconcile_all_accounts(keystone_api=admin_keystone_api,
pool=pool)
if pool:
pool.join()
pool.close()
print "Done"
def check_usage():
"""
Checks the user arguments and parses them
:returns: argument namespace
:rtype: argparse.Namespace
"""
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--device",
type=str,
help="Swift device path",
required=True)
parser.add_argument('-a', "--account",
type=str,
help="Account to reconcile",
default="")
parser.add_argument('-c', '--container',
type=str,
help="Container to reconcile",
default="")
parser.add_argument("-k", "--keystone-url",
type=str,
help="Keystone url endpoint",
required=True)
# TODO: get this from Keystone
parser.add_argument("-s", "--storage-url",
type=str,
help="Storage url endpoint",
required=True)
parser.add_argument("-u", "--username",
type=str,
help="Admin user (default: root)",
default="root")
parser.add_argument("-p", "--prefix",
type=str,
help="Auth prefix for account",
required=True)
parser.add_argument("-l", "--logfile",
type=str,
help="Location of log file",
default='/var/log/swift/nstool.log')
parser.add_argument("-g", "--verify-ssl",
type=bool,
help="Whether Keystone should verify \
SSL certificate (True or False)",
default=True)
parser.add_argument("-n", "--storage-policy-name",
type=str,
help="Swift storage policy name for SwiftOnHPSS",
required=True)
parser.add_argument("-v", "--verbose",
help="Show debug traces",
action="store_true")
return parser.parse_args()
class Reconciler:
def __init__(self, args):
self._args = args
def _report(self, msg):
if self._args.verbose:
print msg
@trace_function
def auth_into_swift(self, keystone_api, target_account):
"""
Gets a Swift API client object that is authorized to make changes to
the specified Swift account/tenant by name.
:param LightweightKeystoneAPI keystone_api:
Keystone API instance
:param str target_account:
Account name from Keystone we want to target
:returns:
Swift API instance that is authorized to alter the given account
"""
keystone_acct = keystone_api.get_tenant(target_account)
account_url = "%s/%s_%s" % (self._args.storage_url,
self._args.prefix,
keystone_acct['id'])
swift_api = \
swiftclient.client.Connection(preauthurl=account_url,
preauthtoken=keystone_api.auth_token)
return swift_api
def auth_into_keystone(self, admin_password, target_project):
"""
Returns a Keystone client object and auth token if authentication is
successful.
:param str admin_password: Password to authenticate with
:param str target_project: Project to scope into
:returns: Keystone API client, and authorization token
"""
keystone_api = LightweightKeystoneAPI(url=self._args.keystone_url,
username=self._args.username,
password=admin_password,
tenant_name=target_project,
api_version="v2"
)
if not keystone_api.authenticate():
raise ValueError('Could not authenticate into Keystone!')
return keystone_api, keystone_api.auth_token
@trace_function
def list_hpss_containers(self, swift_api, target_account):
"""
Lists containers in Swift metadata that use the given storage policy
name, for a given account.
:param target_account: Account to look in
:param swiftclient.client.Connection swift_api:
Swift API client, authenticated for a given account
:return:
List of container dictionaries with this property
"""
hpss_containers = []
containers = swift_api.get_account(target_account)[1]
for container in containers:
storage_policy = swift_api.get_container()[0]['X-Storage-Policy']
if storage_policy == self._args.storage_policy_name:
hpss_containers.append(container)
return hpss_containers
@trace_function
def list_accounts(self, keystone_api):
"""
Convenience function to list all Keystone accounts.
:param LightweightKeystoneAPI keystone_api: Keystone API client
:returns: List of accounts in Keystone
:rtype: List of Keystone tenant names
"""
return [tenant['name'] for tenant in keystone_api.list_tenants()]
@trace_function
def initialize_account(self, keystone_api, target_account):
"""
Initializes a single account in HPSS, or in other words creates the
account directory in HPSS.
:param LightweightKeystoneAPI root_keystone_api:
Keystone API client for root user scoped into target account
:param str target_account: Account name to initialize
"""
# TODO: decide if account needs initializing before here
swift_api = self.auth_into_swift(keystone_api=keystone_api,
target_account=target_account)
temp_name = ".deleteme"
print "Initializing account %s..." % target_account
swift_api.put_container(temp_name,
headers={'X-Storage-Policy':
self._args.storage_policy_name})
swift_api.put_object(temp_name, '.deleteme', contents=None)
@trace_function
def reconcile_all_accounts(self, keystone_api, pool):
"""
Reconciles all accounts.
:param LightweightKeystoneAPI keystone_api:
Keystone API client
:param multiprocessing.Pool pool: Process pool
:returns: nothing
"""
all_accounts = self.list_accounts(keystone_api)
print "Reconciling all accounts..."
for account in all_accounts:
self.reconcile_account(keystone_api, account, pool)
@trace_function
def reconcile_account(self, keystone_api, target_account, pool):
"""
Reconciles all the containers in a single account.
:param LightweightKeystoneAPI keystone_api:
Keystone API client
:param str target_account: Account name with containers to reconcile
:param multiprocessing.Pool pool: Process pool
:returns: nothing
"""
swift_containers = []
account_directory = "%s/%s" % (self._args.device, target_account)
print "Reconciling account %s" % target_account
if not (os.path.isdir(account_directory) or
os.path.islink(account_directory)):
print "%s is not a directory" % account_directory
return
if target_account is None:
print "Account '%s' does not exist" % target_account
logging.exception("Account '%s' does not exist" % target_account)
return
swift_api = self.auth_into_swift(keystone_api=keystone_api,
target_account=target_account)
stats, containers = swift_api.get_account()
for val in containers:
swift_containers.append(val['name'])
good_containers = \
self.clean_account(swift_api, target_account)
# FIXME: Can we figure out a better way to deal with accounts with a
# ridiculous number of containers?
if len(good_containers) > 1000000:
print "Account has too many containers, exiting..."
return
# If we're operating on all containers, spawn more worker processes
for cont in good_containers:
if pool:
pool.apply_async(func=self.reconcile_container,
args=(keystone_api, swift_api, target_account,
cont))
else:
self.reconcile_container(keystone_api, swift_api,
target_account, cont)
@trace_function
def reconcile_container(self, keystone_api, swift_api, target_account,
target_container):
"""
Entry point for worker processes.
Makes changes to Swift containers depending on the changes made in HPSS
:param LightweightKeystoneAPI keystone_api: Keystone API client
:param swiftclient.Connection swift_api: Swift API client
:param str target_account: Account name the target container belongs to
:param str target_container: Container name that needs reconciling
:returns: nothing
"""
swift_containers = [account['name'] for account in
swift_api.get_account()[1]]
# Check if container directory exists
container_dir = "%s/%s/%s" % (self._args.device, target_account,
target_container)
if not os.path.isdir(container_dir):
print "%s is not a directory" % container_dir
logging.error("%s is not a directory" % container_dir)
return
# Check if container exists or else create it
if target_container not in swift_containers \
and target_container is not "":
print "Container %s does not exist, creating" % target_container
try:
swift_api.put_container(
target_container,
headers={'X-Storage-Policy':
self._args.storage_policy_name})
except ClientException as e:
print "Putting container %s went wrong" % target_container
logging.exception("Putting container %s went wrong" %
target_container)
raise e
print "Reconciling container %s/%s" % (target_account, target_container)
# Make sure those objects get added into the Swift metadata DBs
self.add_objects_from_hpss(swift_api, target_container, container_dir)
# Clear out objects that exist only in Swift
self.clean_container(swift_api, target_account, target_container)
# TODO: make sure we don't clobber permissions already existing!
# Set up permissions for the container in Swift afterwards
self.configure_permissions(swift_api, keystone_api, target_account,
target_container)
def _get_object_names(self, container_dir):
file_gen = os.walk(container_dir)
objects = []
for path, directories, files in file_gen:
for file in files:
objects.append(os.path.relpath('%s/%s' % (path, file),
container_dir))
return objects
@trace_function
def add_objects_from_hpss(self, swift_api, target_container, container_dir):
"""
Update object metadata on object creates, and returns a list of all the
objects existing in the container from Swift.
:param swiftclient.Connection swift_api: Swift API client
:param str target_container: Container to add objects to
:param str container_dir: Container directory to scan for objects in
:returns: List of objects from Swift metadata in container
"""
# TODO: be smarter about what files we HEAD
hpss_objects = self._get_object_names(container_dir)
logging.debug('hpss_objects is %s' % str(hpss_objects))
for obj in hpss_objects:
print "Adding object %s..." % obj
try:
swift_api.head_object(target_container, obj)
except ClientException as err:
fail_reason = "Updating object %s in container %s went wrong"\
% (obj, target_container)
print fail_reason
raise err
# Get list of objects from this container
try:
container_stats, swift_objects =\
swift_api.get_container(target_container)
logging.debug('swift_objects is %s' % str(swift_objects))
except ClientException as err:
print "Get on container %s went wrong" % target_container
raise err
return swift_objects
@trace_function
def clean_account(self, swift_api, target_account):
"""
Deletes containers in Swift that do not exist on the HPSS filesystem,
and returns a list of the containers that exist in HPSS.
:param swiftclient.Connection swift_api: Swift API client
:param str target_account: Account of the containers that need cleaning
:returns: List of containers existing on HPSS mount point
"""
# Check containers in HPSS
account_directory = "%s/%s" % (self._args.device, target_account)
# Check containers in Swift
swift_containers = [container['name'] for container
in swift_api.get_account()[1]]
try:
hpss_containers = os.listdir(account_directory)
except OSError as err:
print "Unable to list files under directory: %s" % account_directory
raise err
# Delete containers that only exist in Swift, but not HPSS
ghost_containers = list(set(swift_containers) - set(hpss_containers))
for target_container in ghost_containers:
try:
for obj in swift_api.get_container(target_container)[1]:
try:
swift_api.delete_object(target_container, obj['name'])
except ClientException as e:
if e.http_status == 404:
pass
swift_api.delete_container(target_container)
except ClientException:
print "Deleting container '%s' went wrong!" % target_container
raise
return hpss_containers
@trace_function
def clean_container(self, swift_api, target_account, target_container):
"""
Deletes Swift objects that do not exist in HPSS from the container
:param swiftclient.Connection swift_api: Swift API client
:param str target_account: Account that target container belongs to
:param str target_container: Container that needs cleaning
:returns: nothing
"""
container_path = "%s/%s/%s" % (self._args.device, target_account,
target_container)
hpss_objects = self._get_object_names(container_path)
swift_objects = [obj['name'] for obj in
swift_api.get_container(target_container)[1]]
known_good_objects = []
swift_only_objects = list(set(swift_objects) - set(hpss_objects))
# If we have objects that only exist in the Swift metadata, delete those
# objects.
for target_obj in swift_only_objects:
try:
swift_api.delete_object(target_container, target_obj)
except ClientException:
container_stats, post_delete_dict =\
swift_api.get_container(target_container)
for obj in post_delete_dict:
known_good_objects.append(obj['name'])
if target_obj in known_good_objects:
fail_reason =\
"Deleting object %s in container %s went wrong"\
% (target_obj, target_container)
logging.error(fail_reason)
print fail_reason
raise IOError(fail_reason)
# TODO: clean up this code
@trace_function
def configure_permissions(self, swift_api, keystone_api, target_account,
target_container):
"""
Configuring Swift Container ACLs
:param swiftclient.Connection swift_api:
Swift API client
:param LightweightKeystoneAPI keystone_api:
Keystone API client
:param str target_account: Account that container belongs in
:param str target_container: Container name
:returns: nothing
"""
# TODO: figure out how to deal with files that have more restrictive
# permissions than the container does
path = "%s/%s/%s" % (self._args.device, target_account,
target_container)
# Obtain unix user permissions for path
try:
file_user = getpwuid(os.stat(path).st_uid).pw_name
mode = os.stat(path).st_mode
except KeyError as err:
fail_reason = "Cannot find permissions for %s" % path
print fail_reason
logging.exception(fail_reason)
raise err
# Check if file user exists in Keystone
try:
keystone_users = [user['name'] for user
in keystone_api.list_users()]
except Exception as e:
fail_reason = "Couldn't get user list"
print fail_reason
logging.exception(fail_reason)
raise e
if file_user not in keystone_users:
fail_reason = \
"Cannot configure proper permissions for this path %s\
because user %s does not exist in keystone" % (path, file_user)
print fail_reason
logging.error(fail_reason)
raise IOError(fail_reason)
# Update container ACLs
try:
if mode & stat.S_IRUSR:
headers = {"X-Read-Container": "%s:%s"
% (target_account, file_user)}
swift_api.post_container(target_container, headers)
if mode & stat.S_IWUSR:
headers = {"X-Write-Container": "%s:%s"
% (target_account, file_user)}
swift_api.post_container(target_container, headers)
except requests.ConnectionError as err:
print "Cannot configure ACLs due to metadata header issue"
logging.exception(
"Cannot configure ACLs due to metadata header issue")
raise err
# This only exists because the keystoneclient library is so massive that it has
# to have a lazy-loading mechanism that ensures only one of it can be active,
# so we can't have handles to multiple different Keystone scopes simultaneously
class LightweightKeystoneAPI:
MEMBER_ROLE_ID = '9fe2ff9ee4384b1894a90878d3e92bab'
def __init__(self, url, username, password, tenant_name, api_version='v2'):
self.url = url
self.username = username
self.password = password
self.tenant_name = tenant_name
self.auth_token = None
self.version = api_version
logging.debug('New LightweightKeystoneAPI instance created for %s@%s,'
' id = %s' %
(self.username, self.tenant_name, hex(id(self))))
def _get_keystone_response(self, method, url, headers=None, json=None):
resp = method(url=url,
headers=headers,
json=json)
logging.debug('Response code: %s' % str(resp.status_code))
logging.debug('Response content: %s' % resp.content)
if not (200 <= resp.status_code <= 299):
raise IOError(resp.status_code, 'Request was unsuccessful')
try:
resp_json = resp.json()
logging.debug('response json: %s' % resp_json)
except JSONDecodeError:
logging.error('Malformed response from Keystone')
raise IOError('Malformed response from Keystone')
return resp.headers, resp_json
@trace_function
def authenticate(self):
if self.auth_token:
return True
if self.version == 'v2':
url = '%s/tokens' % self.url
token_req = {'auth': {'tenantName': self.tenant_name,
'passwordCredentials': {
'username': self.username,
'password': self.password
}}}
else:
url = '%s/auth/tokens' % self.url
token_req = {'auth': {'identity':
{'methods': ['password'],
'password': {
'user': {
'name': self.username,
'password': self.password,
'domain': {'id': 'default'}
}
}
},
'scope': {
'project': {
'name': self.tenant_name,
'domain': {
'id': 'default'
}
}
}
}
}
try:
resp_headers, resp_json =\
self._get_keystone_response(requests.post,
url,
None,
token_req)
except IOError:
return False
if self.version == 'v2':
self.auth_token = resp_json['access']['token']['id']
else:
self.auth_token = resp_headers['X-Subject-Token']
logging.debug('New auth token: %s' % self.auth_token)
return True
@trace_function
def list_users(self):
headers = {'X-Auth-Token': self.auth_token}
resp_headers, resp_json = \
self._get_keystone_response(requests.get,
'%s/users' % self.url,
headers)
return resp_json['users']
@trace_function
def list_tenants(self):
if self.version == 'v2':
url = '%s/tenants' % self.url
else:
url = '%s/projects' % self.url
resp_headers, resp_json = \
self._get_keystone_response(requests.get,
url,
{'X-Auth-Token': self.auth_token})
if self.version == 'v2':
return resp_json['tenants']
else:
return resp_json['projects']
@trace_function
def list_roles(self):
if self.version == 'v2':
url = '%s/OS-KSADM/roles' % self.url
else:
url = '%s/roles' % self.url
resp_headers, resp_json =\
self._get_keystone_response(requests.get,
url,
{'X-Auth-Token':
self.auth_token})
return resp_json['roles']
@trace_function
def get_role(self, role_name):
role_id = self._get_id_for_role_name(role_name)
if self.version == 'v2':
url = '%s/OS-KSADM/%s' % (self.url, role_id)
else:
url = '%s/roles/%s' % (self.url, role_id)
resp_headers, resp_json = \
self._get_keystone_response(requests.get,
url,
{'X-Auth-Token':
self.auth_token})
return resp_json['role']
@trace_function
# TODO: write v3 version of this
def get_tenant(self, target_tenant):
url = '%s/tenants?name=%s' % (self.url, target_tenant)
headers = {'X-Auth-Token': self.auth_token,
'name': target_tenant}
logging.debug('url: %s' % url)
logging.debug('headers: %s' % headers)
resp_headers, resp_json = self._get_keystone_response(requests.get,
url,
headers)
return resp_json['tenant']
@trace_function
def _get_id_for_user_name(self, user_name):
users = [user['id'] for user in
self.list_users() if user['username'] == user_name]
if len(users) == 0:
raise KeyError('%s is not a user' % user_name)
else:
return users[0]
@trace_function
def _get_id_for_tenant_name(self, tenant_name):
tenants = [tenant['id'] for tenant in self.list_tenants()
if tenant['name'] == tenant_name]
if len(tenants) == 0:
raise KeyError('%s is not a tenant' % tenant_name)
else:
return tenants[0]
@trace_function
def _get_id_for_role_name(self, role_name):
roles = [role['id'] for role in
self.list_roles() if role['name'] == role_name]
if len(roles) == 0:
raise KeyError('%s is not a role' % role_name)
else:
return roles[0]
@trace_function
def grant_role_on_tenant(self, target_project, target_user, target_role):
try:
tenant_id = self._get_id_for_tenant_name(target_project)
user_id = self._get_id_for_user_name(target_user)
role_id = self._get_id_for_role_name(target_role)
except KeyError as e:
print "Could not grant role %s to %s@%s (reason: %s)" % \
(target_role, target_user, target_project, e.message)
raise
if self.version == 'v2':
url = '%s/tenants/%s/users/%s/roles/OS-KSADM/%s' %\
(self.url, tenant_id, user_id, role_id)
else:
url = '%s/projects/%s/users/%s/roles/%s' %\
(self.url, tenant_id, user_id, role_id)
try:
self._get_keystone_response(requests.put, url,
{'X-Auth-Token': self.auth_token})
except IOError as e:
if e.errno == 409:
# We must've already granted ourselves this role. Carry on.
pass
else:
raise
if __name__ == "__main__":
_args = check_usage()
if os.getuid() != 0:
print 'swiftonhpss-nstool must be run as root'
sys.exit(1)
# Initiating Log File
if _args.verbose:
log_level = logging.DEBUG
else:
log_level = logging.ERROR
logging.basicConfig(filename=_args.logfile,
level=log_level)
main(_args)