Use bulk-delete middleware when available

When issuing `delete` commands that would require three or more
individual deletes, check whether the cluster supports bulk deletes
and use that if it's available.

Additionally, a new option is added to the `delete` command:

  * --prefix <prefix>

    Delete all objects that start with <prefix>. This is similar to the
    --prefix option for the `list` command.

Example:

$ swift delete c --prefix obj_prefix/

    ...will delete from container "c" all objects whose name begins with
    "obj_prefix/", such as "obj_prefix/foo" and "obj_prefix/bar".

Change-Id: I6b9504848d6ef562cf4f570bbcd17db4e3da8264
This commit is contained in:
Tim Burke 2015-06-11 14:33:39 -07:00
parent 6ed6c3343f
commit 7a1e192803
8 changed files with 431 additions and 64 deletions

View File

@ -93,6 +93,7 @@ You can specify optional headers with the repeatable cURL-like option
\fBdelete\fR [\fIcommand-options\fR] [\fIcontainer\fR] [\fIobject\fR] [\fIobject\fR] [...]
.RS 4
Deletes everything in the account (with \-\-all), or everything in a container,
or all objects in a container that start with a given string (given by \-\-prefix),
or a list of objects depending on the args given. Segments of manifest objects
will be deleted as well, unless you specify the \-\-leave\-segments option.
For more details and options see swift delete \-\-help.

View File

@ -686,7 +686,7 @@ def head_account(url, token, http_conn=None, service_token=None):
def post_account(url, token, headers, http_conn=None, response_dict=None,
service_token=None):
service_token=None, query_string=None, data=None):
"""
Update an account's metadata.
@ -698,17 +698,23 @@ def post_account(url, token, headers, http_conn=None, response_dict=None,
:param response_dict: an optional dictionary into which to place
the response - status, reason and headers
:param service_token: service auth token
:param query_string: if set will be appended with '?' to generated path
:param data: an optional message body for the request
:raises ClientException: HTTP POST request failed
:returns: resp_headers, body
"""
if http_conn:
parsed, conn = http_conn
else:
parsed, conn = http_connection(url)
method = 'POST'
path = parsed.path
if query_string:
path += '?' + query_string
headers['X-Auth-Token'] = token
if service_token:
headers['X-Service-Token'] = service_token
conn.request(method, parsed.path, '', headers)
conn.request(method, path, data, headers)
resp = conn.getresponse()
body = resp.read()
http_log((url, method,), {'headers': headers}, resp, body)
@ -723,6 +729,10 @@ def post_account(url, token, headers, http_conn=None, response_dict=None,
http_status=resp.status,
http_reason=resp.reason,
http_response_content=body)
resp_headers = {}
for header, value in resp.getheaders():
resp_headers[header.lower()] = value
return resp_headers, body
def get_container(url, token, container, marker=None, limit=None,
@ -1541,9 +1551,11 @@ class Connection(object):
prefix=prefix, end_marker=end_marker,
full_listing=full_listing)
def post_account(self, headers, response_dict=None):
def post_account(self, headers, response_dict=None,
query_string=None, data=None):
"""Wrapper for :func:`post_account`"""
return self._retry(None, post_account, headers,
query_string=query_string, data=data,
response_dict=response_dict)
def head_container(self, container, headers=None):

View File

@ -12,7 +12,9 @@
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import unicode_literals
import logging
import os
from concurrent.futures import as_completed, CancelledError, TimeoutError
@ -41,7 +43,7 @@ from swiftclient.command_helpers import (
)
from swiftclient.utils import (
config_true_value, ReadableToIterable, LengthWrapper, EMPTY_ETAG,
parse_api_response, report_traceback
parse_api_response, report_traceback, n_groups
)
from swiftclient.exceptions import ClientException
from swiftclient.multithreading import MultiThreadingManager
@ -380,6 +382,7 @@ class SwiftService(object):
object_uu_threads=self._options['object_uu_threads'],
container_threads=self._options['container_threads']
)
self.capabilities_cache = {} # Each instance should have its own cache
def __enter__(self):
self.thread_manager.__enter__()
@ -2040,13 +2043,14 @@ class SwiftService(object):
{
'yes_all': False,
'leave_segments': False,
'prefix': None,
}
:returns: A generator for returning the results of the delete
operations. Each result yielded from the generator is either
a 'delete_container', 'delete_object' or 'delete_segment'
dictionary containing the results of an individual delete
operation.
a 'delete_container', 'delete_object', 'delete_segment', or
'bulk_delete' dictionary containing the results of an
individual delete operation.
:raises: ClientException
:raises: SwiftError
@ -2056,19 +2060,24 @@ class SwiftService(object):
else:
options = self._options
rq = Queue()
if container is not None:
if objects is not None:
if options['prefix']:
objects = [obj for obj in objects
if obj.startswith(options['prefix'])]
rq = Queue()
obj_dels = {}
for obj in objects:
obj_del = self.thread_manager.object_dd_pool.submit(
self._delete_object, container, obj, options,
results_queue=rq
)
obj_details = {'container': container, 'object': obj}
obj_dels[obj_del] = obj_details
# Start a thread to watch for upload results
if self._should_bulk_delete(objects):
for obj_slice in n_groups(
objects, self._options['object_dd_threads']):
self._bulk_delete(container, obj_slice, options,
obj_dels)
else:
self._per_item_delete(container, objects, options,
obj_dels, rq)
# Start a thread to watch for delete results
Thread(
target=self._watch_futures, args=(obj_dels, rq)
).start()
@ -2091,6 +2100,8 @@ class SwiftService(object):
else:
if objects:
raise SwiftError('Objects specified without container')
if options['prefix']:
raise SwiftError('Prefix specified without container')
if options['yes_all']:
cancelled = False
containers = []
@ -2114,6 +2125,33 @@ class SwiftService(object):
and not res['success']):
cancelled = True
def _should_bulk_delete(self, objects):
if len(objects) < 2 * self._options['object_dd_threads']:
# Not many objects; may as well delete one-by-one
return False
try:
cap_result = self.capabilities()
if not cap_result['success']:
# This shouldn't actually happen, but just in case we start
# being more nuanced about our capabilities result...
return False
except ClientException:
# Old swift, presumably; assume no bulk middleware
return False
swift_info = cap_result['capabilities']
return 'bulk_delete' in swift_info
def _per_item_delete(self, container, objects, options, rdict, rq):
for obj in objects:
obj_del = self.thread_manager.object_dd_pool.submit(
self._delete_object, container, obj, options,
results_queue=rq
)
obj_details = {'container': container, 'object': obj}
rdict[obj_del] = obj_details
@staticmethod
def _delete_segment(conn, container, obj, results_queue=None):
results_dict = {}
@ -2242,18 +2280,20 @@ class SwiftService(object):
def _delete_container(self, container, options):
try:
for part in self.list(container=container):
if part["success"]:
objs = [o['name'] for o in part['listing']]
for part in self.list(container=container, options=options):
if not part["success"]:
o_dels = self.delete(
container=container, objects=objs, options=options
)
for res in o_dels:
yield res
else:
raise part["error"]
for res in self.delete(
container=container,
objects=[o['name'] for o in part['listing']],
options=options):
yield res
if options['prefix']:
# We're only deleting a subset of objects within the container
return
con_del = self.thread_manager.container_pool.submit(
self._delete_empty_container, container
)
@ -2274,9 +2314,55 @@ class SwiftService(object):
yield con_del_res
# Bulk methods
#
def _bulk_delete(self, container, objects, options, rdict):
if objects:
bulk_del = self.thread_manager.object_dd_pool.submit(
self._bulkdelete, container, objects, options
)
bulk_details = {'container': container, 'objects': objects}
rdict[bulk_del] = bulk_details
@staticmethod
def _bulkdelete(conn, container, objects, options):
results_dict = {}
try:
headers = {
'Accept': 'application/json',
'Content-Type': 'text/plain',
}
res = {'container': container, 'objects': objects}
objects = [quote(('/%s/%s' % (container, obj)).encode('utf-8'))
for obj in objects]
headers, body = conn.post_account(
headers=headers,
query_string='bulk-delete',
data=b''.join(obj.encode('utf-8') + b'\n' for obj in objects),
response_dict=results_dict)
if body:
res.update({'success': True,
'result': parse_api_response(headers, body)})
else:
res.update({
'success': False,
'error': SwiftError(
'No content received on account POST. '
'Is the bulk operations middleware enabled?')})
except Exception as e:
res.update({'success': False, 'error': e})
res.update({
'action': 'bulk_delete',
'attempts': conn.attempts,
'response_dict': results_dict
})
return res
# Capabilities related methods
#
def capabilities(self, url=None):
def capabilities(self, url=None, refresh_cache=False):
"""
List the cluster capabilities.
@ -2285,30 +2371,29 @@ class SwiftService(object):
:returns: A dictionary containing the capabilities of the cluster.
:raises: ClientException
:raises: SwiftError
"""
if not refresh_cache and url in self.capabilities_cache:
return self.capabilities_cache[url]
res = {
'action': 'capabilities'
'action': 'capabilities',
'timestamp': time(),
}
try:
cap = self.thread_manager.container_pool.submit(
self._get_capabilities, url
)
capabilities = get_future_result(cap)
cap = self.thread_manager.container_pool.submit(
self._get_capabilities, url
)
capabilities = get_future_result(cap)
res.update({
'success': True,
'capabilities': capabilities
})
if url is not None:
res.update({
'success': True,
'capabilities': capabilities
'url': url
})
if url is not None:
res.update({
'url': url
})
except ClientException as err:
if err.http_status != 404:
raise err
raise SwiftError('Account not found', exc=err)
self.capabilities_cache[url] = res
return res
@staticmethod

View File

@ -23,7 +23,8 @@ import socket
from optparse import OptionParser, OptionGroup, SUPPRESS_HELP
from os import environ, walk, _exit as os_exit
from os.path import isfile, isdir, join
from six import text_type
from six import text_type, PY2
from six.moves.urllib.parse import unquote
from sys import argv as sys_argv, exit, stderr
from time import gmtime, strftime
@ -81,6 +82,9 @@ def st_delete(parser, args, output_manager):
parser.add_option(
'-a', '--all', action='store_true', dest='yes_all',
default=False, help='Delete all containers and objects.')
parser.add_option(
'-p', '--prefix', dest='prefix',
help='Only delete items beginning with the <prefix>.')
parser.add_option(
'', '--leave-segments', action='store_true',
dest='leave_segments', default=False,
@ -128,25 +132,55 @@ def st_delete(parser, args, output_manager):
o = r.get('object', '')
a = r.get('attempts')
if r['success']:
if options.verbose:
a = ' [after {0} attempts]'.format(a) if a > 1 else ''
if r['action'] == 'bulk_delete':
if r['success']:
objs = r.get('objects', [])
for o, err in r.get('result', {}).get('Errors', []):
# o will be of the form quote("/<cont>/<obj>")
o = unquote(o)
if PY2:
# In PY3, unquote(unicode) uses utf-8 like we
# want, but PY2 uses latin-1
o = o.encode('latin-1').decode('utf-8')
output_manager.error('Error Deleting: {0}: {1}'
.format(o[1:], err))
try:
objs.remove(o[len(c) + 2:])
except ValueError:
# shouldn't happen, but ignoring it won't hurt
pass
if r['action'] == 'delete_object':
for o in objs:
if options.yes_all:
p = '{0}/{1}'.format(c, o)
else:
p = o
elif r['action'] == 'delete_segment':
p = '{0}/{1}'.format(c, o)
elif r['action'] == 'delete_container':
p = c
output_manager.print_msg('{0}{1}'.format(p, a))
output_manager.print_msg('{0}{1}'.format(p, a))
else:
for o in r.get('objects', []):
output_manager.error('Error Deleting: {0}/{1}: {2}'
.format(c, o, r['error']))
else:
p = '{0}/{1}'.format(c, o) if o else c
output_manager.error('Error Deleting: {0}: {1}'
.format(p, r['error']))
if r['success']:
if options.verbose:
a = (' [after {0} attempts]'.format(a)
if a > 1 else '')
if r['action'] == 'delete_object':
if options.yes_all:
p = '{0}/{1}'.format(c, o)
else:
p = o
elif r['action'] == 'delete_segment':
p = '{0}/{1}'.format(c, o)
elif r['action'] == 'delete_container':
p = c
output_manager.print_msg('{0}{1}'.format(p, a))
else:
p = '{0}/{1}'.format(c, o) if o else c
output_manager.error('Error Deleting: {0}: {1}'
.format(p, r['error']))
except SwiftError as err:
output_manager.error(err.value)

View File

@ -264,3 +264,13 @@ def iter_wrapper(iterable):
# causing the server to close the connection
continue
yield chunk
def n_at_a_time(seq, n):
for i in range(0, len(seq), n):
yield seq[i:i + n]
def n_groups(seq, n):
items_per_group = ((len(seq) - 1) // n) + 1
return n_at_a_time(seq, items_per_group)

View File

@ -693,25 +693,147 @@ class TestShell(testtools.TestCase):
'x-object-meta-mtime': mock.ANY},
response_dict={})
@mock.patch.object(swiftclient.service.SwiftService, '_should_bulk_delete',
lambda *a: False)
@mock.patch('swiftclient.service.Connection')
def test_delete_account(self, connection):
connection.return_value.get_account.side_effect = [
[None, [{'name': 'container'}]],
[None, [{'name': 'container'}, {'name': 'container2'}]],
[None, [{'name': 'empty_container'}]],
[None, []],
]
connection.return_value.get_container.side_effect = [
[None, [{'name': 'object'}, {'name': 'obj\xe9ct2'}]],
[None, []],
[None, [{'name': 'object'}]],
[None, []],
[None, []],
]
connection.return_value.attempts = 0
argv = ["", "delete", "--all"]
connection.return_value.head_object.return_value = {}
swiftclient.shell.main(argv)
connection.return_value.delete_container.assert_called_with(
'container', response_dict={})
connection.return_value.delete_object.assert_called_with(
'container', 'object', query_string=None, response_dict={})
self.assertEqual(
connection.return_value.delete_object.mock_calls, [
mock.call('container', 'object', query_string=None,
response_dict={}),
mock.call('container', 'obj\xe9ct2', query_string=None,
response_dict={}),
mock.call('container2', 'object', query_string=None,
response_dict={})])
self.assertEqual(
connection.return_value.delete_container.mock_calls, [
mock.call('container', response_dict={}),
mock.call('container2', response_dict={}),
mock.call('empty_container', response_dict={})])
@mock.patch.object(swiftclient.service.SwiftService, '_should_bulk_delete',
lambda *a: True)
@mock.patch('swiftclient.service.Connection')
def test_delete_bulk_account(self, connection):
connection.return_value.get_account.side_effect = [
[None, [{'name': 'container'}, {'name': 'container2'}]],
[None, [{'name': 'empty_container'}]],
[None, []],
]
connection.return_value.get_container.side_effect = [
[None, [{'name': 'object'}, {'name': 'obj\xe9ct2'},
{'name': 'object3'}]],
[None, []],
[None, [{'name': 'object'}]],
[None, []],
[None, []],
]
connection.return_value.attempts = 0
argv = ["", "delete", "--all", "--object-threads", "2"]
connection.return_value.post_account.return_value = {}, (
b'{"Number Not Found": 0, "Response Status": "200 OK", '
b'"Errors": [], "Number Deleted": 1, "Response Body": ""}')
swiftclient.shell.main(argv)
self.assertEqual(
connection.return_value.post_account.mock_calls, [
mock.call(query_string='bulk-delete',
data=b'/container/object\n/container/obj%C3%A9ct2\n',
headers={'Content-Type': 'text/plain',
'Accept': 'application/json'},
response_dict={}),
mock.call(query_string='bulk-delete',
data=b'/container/object3\n',
headers={'Content-Type': 'text/plain',
'Accept': 'application/json'},
response_dict={}),
mock.call(query_string='bulk-delete',
data=b'/container2/object\n',
headers={'Content-Type': 'text/plain',
'Accept': 'application/json'},
response_dict={})])
self.assertEqual(
connection.return_value.delete_container.mock_calls, [
mock.call('container', response_dict={}),
mock.call('container2', response_dict={}),
mock.call('empty_container', response_dict={})])
@mock.patch('swiftclient.service.Connection')
def test_delete_bulk_account_with_capabilities(self, connection):
connection.return_value.get_capabilities.return_value = {
'bulk_delete': {
'max_deletes_per_request': 10000,
'max_failed_deletes': 1000,
},
}
connection.return_value.get_account.side_effect = [
[None, [{'name': 'container'}]],
[None, [{'name': 'container2'}]],
[None, [{'name': 'empty_container'}]],
[None, []],
]
connection.return_value.get_container.side_effect = [
[None, [{'name': 'object'}, {'name': 'obj\xe9ct2'},
{'name': 'z_object'}, {'name': 'z_obj\xe9ct2'}]],
[None, []],
[None, [{'name': 'object'}, {'name': 'obj\xe9ct2'},
{'name': 'z_object'}, {'name': 'z_obj\xe9ct2'}]],
[None, []],
[None, []],
]
connection.return_value.attempts = 0
argv = ["", "delete", "--all", "--object-threads", "1"]
connection.return_value.post_account.return_value = {}, (
b'{"Number Not Found": 0, "Response Status": "200 OK", '
b'"Errors": [], "Number Deleted": 1, "Response Body": ""}')
swiftclient.shell.main(argv)
self.assertEqual(
connection.return_value.post_account.mock_calls, [
mock.call(query_string='bulk-delete',
data=b''.join([
b'/container/object\n',
b'/container/obj%C3%A9ct2\n',
b'/container/z_object\n',
b'/container/z_obj%C3%A9ct2\n'
]),
headers={'Content-Type': 'text/plain',
'Accept': 'application/json'},
response_dict={}),
mock.call(query_string='bulk-delete',
data=b''.join([
b'/container2/object\n',
b'/container2/obj%C3%A9ct2\n',
b'/container2/z_object\n',
b'/container2/z_obj%C3%A9ct2\n'
]),
headers={'Content-Type': 'text/plain',
'Accept': 'application/json'},
response_dict={})])
self.assertEqual(
connection.return_value.delete_container.mock_calls, [
mock.call('container', response_dict={}),
mock.call('container2', response_dict={}),
mock.call('empty_container', response_dict={})])
self.assertEqual(connection.return_value.get_capabilities.mock_calls,
[mock.call(None)]) # only one /info request
@mock.patch.object(swiftclient.service.SwiftService, '_should_bulk_delete',
lambda *a: False)
@mock.patch('swiftclient.service.Connection')
def test_delete_container(self, connection):
connection.return_value.get_container.side_effect = [
@ -727,6 +849,28 @@ class TestShell(testtools.TestCase):
connection.return_value.delete_object.assert_called_with(
'container', 'object', query_string=None, response_dict={})
@mock.patch.object(swiftclient.service.SwiftService, '_should_bulk_delete',
lambda *a: True)
@mock.patch('swiftclient.service.Connection')
def test_delete_bulk_container(self, connection):
connection.return_value.get_container.side_effect = [
[None, [{'name': 'object'}]],
[None, []],
]
connection.return_value.attempts = 0
argv = ["", "delete", "container"]
connection.return_value.post_account.return_value = {}, (
b'{"Number Not Found": 0, "Response Status": "200 OK", '
b'"Errors": [], "Number Deleted": 1, "Response Body": ""}')
swiftclient.shell.main(argv)
connection.return_value.post_account.assert_called_with(
query_string='bulk-delete', data=b'/container/object\n',
headers={'Content-Type': 'text/plain',
'Accept': 'application/json'},
response_dict={})
connection.return_value.delete_container.assert_called_with(
'container', response_dict={})
def test_delete_verbose_output_utf8(self):
container = 't\u00e9st_c'
base_argv = ['', '--verbose', 'delete']
@ -759,8 +903,10 @@ class TestShell(testtools.TestCase):
self.assertTrue(out.out.find(
't\u00e9st_c [after 2 attempts]') >= 0, out)
@mock.patch.object(swiftclient.service.SwiftService, '_should_bulk_delete',
lambda *a: False)
@mock.patch('swiftclient.service.Connection')
def test_delete_object(self, connection):
def test_delete_per_object(self, connection):
argv = ["", "delete", "container", "object"]
connection.return_value.head_object.return_value = {}
connection.return_value.attempts = 0
@ -768,6 +914,22 @@ class TestShell(testtools.TestCase):
connection.return_value.delete_object.assert_called_with(
'container', 'object', query_string=None, response_dict={})
@mock.patch.object(swiftclient.service.SwiftService, '_should_bulk_delete',
lambda *a: True)
@mock.patch('swiftclient.service.Connection')
def test_delete_bulk_object(self, connection):
argv = ["", "delete", "container", "object"]
connection.return_value.post_account.return_value = {}, (
b'{"Number Not Found": 0, "Response Status": "200 OK", '
b'"Errors": [], "Number Deleted": 1, "Response Body": ""}')
connection.return_value.attempts = 0
swiftclient.shell.main(argv)
connection.return_value.post_account.assert_called_with(
query_string='bulk-delete', data=b'/container/object\n',
headers={'Content-Type': 'text/plain',
'Accept': 'application/json'},
response_dict={})
def test_delete_verbose_output(self):
del_obj_res = {'success': True, 'response_dict': {}, 'attempts': 2,
'container': 't\xe9st_c', 'action': 'delete_object',

View File

@ -596,6 +596,40 @@ class TestHeadAccount(MockHttpTest):
self.assertEqual(e.__str__()[-89:], new_body)
class TestPostAccount(MockHttpTest):
def test_ok(self):
c.http_connection = self.fake_http_connection(200, headers={
'X-Account-Meta-Color': 'blue',
}, body='foo')
resp_headers, body = c.post_account(
'http://www.tests.com/path/to/account', 'asdf',
{'x-account-meta-shape': 'square'}, query_string='bar=baz',
data='some data')
self.assertEqual('blue', resp_headers.get('x-account-meta-color'))
self.assertEqual('foo', body)
self.assertRequests([
('POST', 'http://www.tests.com/path/to/account?bar=baz',
'some data', {'x-auth-token': 'asdf',
'x-account-meta-shape': 'square'})
])
def test_server_error(self):
body = 'c' * 65
c.http_connection = self.fake_http_connection(500, body=body)
e = self.assertRaises(c.ClientException, c.post_account,
'http://www.tests.com', 'asdf', {})
self.assertEqual(e.http_response_content, body)
self.assertEqual(e.http_status, 500)
self.assertRequests([
('POST', 'http://www.tests.com', None, {'x-auth-token': 'asdf'})
])
# TODO: this is a fairly brittle test of the __repr__ on the
# ClientException which should probably be in a targeted test
new_body = "[first 60 chars of response] " + body[0:60]
self.assertEqual(e.__str__()[-89:], new_body)
class TestGetContainer(MockHttpTest):
def test_no_content(self):
@ -1976,7 +2010,8 @@ class TestResponseDict(MockHttpTest):
"""
Verify handling of optional response_dict argument.
"""
calls = [('post_container', 'c', {}),
calls = [('post_account', {}),
('post_container', 'c', {}),
('put_container', 'c'),
('delete_container', 'c'),
('post_object', 'c', 'o', {}),

View File

@ -290,3 +290,31 @@ class TestLengthWrapper(testtools.TestCase):
self.assertEqual(segment_length, len(read_data))
self.assertEqual(s, read_data)
self.assertEqual(md5(s).hexdigest(), data.get_md5sum())
class TestGroupers(testtools.TestCase):
def test_n_at_a_time(self):
result = list(u.n_at_a_time(range(100), 9))
self.assertEqual([9] * 11 + [1], list(map(len, result)))
result = list(u.n_at_a_time(range(100), 10))
self.assertEqual([10] * 10, list(map(len, result)))
result = list(u.n_at_a_time(range(100), 11))
self.assertEqual([11] * 9 + [1], list(map(len, result)))
result = list(u.n_at_a_time(range(100), 12))
self.assertEqual([12] * 8 + [4], list(map(len, result)))
def test_n_groups(self):
result = list(u.n_groups(range(100), 9))
self.assertEqual([12] * 8 + [4], list(map(len, result)))
result = list(u.n_groups(range(100), 10))
self.assertEqual([10] * 10, list(map(len, result)))
result = list(u.n_groups(range(100), 11))
self.assertEqual([10] * 10, list(map(len, result)))
result = list(u.n_groups(range(100), 12))
self.assertEqual([9] * 11 + [1], list(map(len, result)))