swift/test/probe/test_object_expirer.py

374 lines
16 KiB
Python

#!/usr/bin/python -u
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import errno
import os
import random
import time
import uuid
import unittest
from swift.common.internal_client import InternalClient, UnexpectedResponse
from swift.common.manager import Manager
from swift.common.utils import Timestamp
from test.probe.common import ReplProbeTest, ENABLED_POLICIES
from test.probe.brain import BrainSplitter
from swiftclient import client
class TestObjectExpirer(ReplProbeTest):
def setUp(self):
self.expirer = Manager(['object-expirer'])
self.expirer.start()
err = self.expirer.stop()
if err:
raise unittest.SkipTest('Unable to verify object-expirer service')
conf_files = []
for server in self.expirer.servers:
conf_files.extend(server.conf_files())
conf_file = conf_files[0]
self.client = InternalClient(conf_file, 'probe-test', 3)
super(TestObjectExpirer, self).setUp()
self.container_name = 'container-%s' % uuid.uuid4()
self.object_name = 'object-%s' % uuid.uuid4()
self.brain = BrainSplitter(self.url, self.token, self.container_name,
self.object_name)
def _check_obj_in_container_listing(self):
for obj in self.client.iter_objects(self.account,
self.container_name):
if self.object_name == obj['name']:
return True
return False
@unittest.skipIf(len(ENABLED_POLICIES) < 2, "Need more than one policy")
def test_expirer_object_split_brain(self):
old_policy = random.choice(ENABLED_POLICIES)
wrong_policy = random.choice([p for p in ENABLED_POLICIES
if p != old_policy])
# create an expiring object and a container with the wrong policy
self.brain.stop_primary_half()
self.brain.put_container(int(old_policy))
self.brain.put_object(headers={'X-Delete-After': 2})
# get the object timestamp
metadata = self.client.get_object_metadata(
self.account, self.container_name, self.object_name,
headers={'X-Backend-Storage-Policy-Index': int(old_policy)})
create_timestamp = Timestamp(metadata['x-timestamp'])
self.brain.start_primary_half()
# get the expiring object updates in their queue, while we have all
# the servers up
Manager(['object-updater']).once()
self.brain.stop_handoff_half()
self.brain.put_container(int(wrong_policy))
# don't start handoff servers, only wrong policy is available
# make sure auto-created containers get in the account listing
Manager(['container-updater']).once()
# this guy should no-op since it's unable to expire the object
self.expirer.once()
self.brain.start_handoff_half()
self.get_to_final_state()
# validate object is expired
found_in_policy = None
metadata = self.client.get_object_metadata(
self.account, self.container_name, self.object_name,
acceptable_statuses=(4,),
headers={'X-Backend-Storage-Policy-Index': int(old_policy)})
self.assertIn('x-backend-timestamp', metadata)
self.assertEqual(Timestamp(metadata['x-backend-timestamp']),
create_timestamp)
# but it is still in the listing
self.assertTrue(self._check_obj_in_container_listing(),
msg='Did not find listing for %s' % self.object_name)
# clear proxy cache
client.post_container(self.url, self.token, self.container_name, {})
# run the expirer again after replication
self.expirer.once()
# object is not in the listing
self.assertFalse(self._check_obj_in_container_listing(),
msg='Found listing for %s' % self.object_name)
# and validate object is tombstoned
found_in_policy = None
for policy in ENABLED_POLICIES:
metadata = self.client.get_object_metadata(
self.account, self.container_name, self.object_name,
acceptable_statuses=(4,),
headers={'X-Backend-Storage-Policy-Index': int(policy)})
if 'x-backend-timestamp' in metadata:
if found_in_policy:
self.fail('found object in %s and also %s' %
(found_in_policy, policy))
found_in_policy = policy
self.assertIn('x-backend-timestamp', metadata)
self.assertGreater(Timestamp(metadata['x-backend-timestamp']),
create_timestamp)
def test_expirer_doesnt_make_async_pendings(self):
# The object expirer cleans up its own queue. The inner loop
# basically looks like this:
#
# for obj in stuff_to_delete:
# delete_the_object(obj)
# remove_the_queue_entry(obj)
#
# By default, upon receipt of a DELETE request for an expiring
# object, the object servers will create async_pending records to
# clean the expirer queue. Since the expirer cleans its own queue,
# this is unnecessary. The expirer can make requests in such a way
# tha the object server does not write out any async pendings; this
# test asserts that this is the case.
def gather_async_pendings(onodes):
async_pendings = []
for onode in onodes:
device_dir = self.device_dir('', onode)
for ap_pol_dir in os.listdir(device_dir):
if not ap_pol_dir.startswith('async_pending'):
# skip 'objects', 'containers', etc.
continue
async_pending_dir = os.path.join(device_dir, ap_pol_dir)
try:
ap_dirs = os.listdir(async_pending_dir)
except OSError as err:
if err.errno == errno.ENOENT:
pass
else:
raise
else:
for ap_dir in ap_dirs:
ap_dir_fullpath = os.path.join(
async_pending_dir, ap_dir)
async_pendings.extend([
os.path.join(ap_dir_fullpath, ent)
for ent in os.listdir(ap_dir_fullpath)])
return async_pendings
# Make an expiring object in each policy
for policy in ENABLED_POLICIES:
container_name = "expirer-test-%d" % policy.idx
container_headers = {'X-Storage-Policy': policy.name}
client.put_container(self.url, self.token, container_name,
headers=container_headers)
now = time.time()
delete_at = int(now + 2.0)
client.put_object(
self.url, self.token, container_name, "some-object",
headers={'X-Delete-At': str(delete_at),
'X-Timestamp': Timestamp(now).normal},
contents='dontcare')
time.sleep(2.0)
# make sure auto-created expirer-queue containers get in the account
# listing so the expirer can find them
Manager(['container-updater']).once()
# Make sure there's no async_pendings anywhere. Probe tests only run
# on single-node installs anyway, so this set should be small enough
# that an exhaustive check doesn't take too long.
all_obj_nodes = {}
for policy in ENABLED_POLICIES:
for dev in policy.object_ring.devs:
all_obj_nodes[dev['device']] = dev
pendings_before = gather_async_pendings(all_obj_nodes.values())
# expire the objects
Manager(['object-expirer']).once()
pendings_after = gather_async_pendings(all_obj_nodes.values())
self.assertEqual(pendings_after, pendings_before)
def test_expirer_object_should_not_be_expired(self):
# Current object-expirer checks the correctness via x-if-delete-at
# header that it can be deleted by expirer. If there are objects
# either which doesn't have x-delete-at header as metadata or which
# has different x-delete-at value from x-if-delete-at value,
# object-expirer's delete will fail as 412 PreconditionFailed.
# However, if some of the objects are in handoff nodes, the expirer
# can put the tombstone with the timestamp as same as x-delete-at and
# the object consistency will be resolved as the newer timestamp will
# be winner (in particular, overwritten case w/o x-delete-at). This
# test asserts such a situation that, at least, the overwriten object
# which have larger timestamp than the original expirered date should
# be safe.
def put_object(headers):
# use internal client to PUT objects so that X-Timestamp in headers
# is effective
headers['Content-Length'] = '0'
path = self.client.make_path(
self.account, self.container_name, self.object_name)
try:
self.client.make_request('PUT', path, headers, (2,))
except UnexpectedResponse as e:
self.fail(
'Expected 201 for PUT object but got %s' % e.resp.status)
obj_brain = BrainSplitter(self.url, self.token, self.container_name,
self.object_name, 'object', self.policy)
# T(obj_created) < T(obj_deleted with x-delete-at) < T(obj_recreated)
# < T(expirer_executed)
# Recreated obj should be appeared in any split brain case
obj_brain.put_container()
# T(obj_deleted with x-delete-at)
# object-server accepts req only if X-Delete-At is later than 'now'
# so here, T(obj_created) < T(obj_deleted with x-delete-at)
now = time.time()
delete_at = int(now + 2.0)
recreate_at = delete_at + 1.0
put_object(headers={'X-Delete-At': str(delete_at),
'X-Timestamp': Timestamp(now).normal})
# some object servers stopped to make a situation that the
# object-expirer can put tombstone in the primary nodes.
obj_brain.stop_primary_half()
# increment the X-Timestamp explicitly
# (will be T(obj_deleted with x-delete-at) < T(obj_recreated))
put_object(headers={'X-Object-Meta-Expired': 'False',
'X-Timestamp': Timestamp(recreate_at).normal})
# make sure auto-created containers get in the account listing
Manager(['container-updater']).once()
# sanity, the newer object is still there
try:
metadata = self.client.get_object_metadata(
self.account, self.container_name, self.object_name)
except UnexpectedResponse as e:
self.fail(
'Expected 200 for HEAD object but got %s' % e.resp.status)
self.assertIn('x-object-meta-expired', metadata)
# some object servers recovered
obj_brain.start_primary_half()
# sleep until after recreated_at
while time.time() <= recreate_at:
time.sleep(0.1)
# Now, expirer runs at the time after obj is recreated
self.expirer.once()
# verify that original object was deleted by expirer
obj_brain.stop_handoff_half()
try:
metadata = self.client.get_object_metadata(
self.account, self.container_name, self.object_name,
acceptable_statuses=(4,))
except UnexpectedResponse as e:
self.fail(
'Expected 404 for HEAD object but got %s' % e.resp.status)
obj_brain.start_handoff_half()
# and inconsistent state of objects is recovered by replicator
Manager(['object-replicator']).once()
# check if you can get recreated object
try:
metadata = self.client.get_object_metadata(
self.account, self.container_name, self.object_name)
except UnexpectedResponse as e:
self.fail(
'Expected 200 for HEAD object but got %s' % e.resp.status)
self.assertIn('x-object-meta-expired', metadata)
def _test_expirer_delete_outdated_object_version(self, object_exists):
# This test simulates a case where the expirer tries to delete
# an outdated version of an object.
# One case is where the expirer gets a 404, whereas the newest version
# of the object is offline.
# Another case is where the expirer gets a 412, since the old version
# of the object mismatches the expiration time sent by the expirer.
# In any of these cases, the expirer should retry deleting the object
# later, for as long as a reclaim age has not passed.
obj_brain = BrainSplitter(self.url, self.token, self.container_name,
self.object_name, 'object', self.policy)
obj_brain.put_container()
if object_exists:
obj_brain.put_object()
# currently, the object either doesn't exist, or does not have
# an expiration
# stop primary servers and put a newer version of the object, this
# time with an expiration. only the handoff servers will have
# the new version
obj_brain.stop_primary_half()
now = time.time()
delete_at = int(now + 2.0)
obj_brain.put_object({'X-Delete-At': str(delete_at)})
# make sure auto-created containers get in the account listing
Manager(['container-updater']).once()
# update object record in the container listing
Manager(['container-replicator']).once()
# take handoff servers down, and bring up the outdated primary servers
obj_brain.start_primary_half()
obj_brain.stop_handoff_half()
# wait until object expiration time
while time.time() <= delete_at:
time.sleep(0.1)
# run expirer against the outdated servers. it should fail since
# the outdated version does not match the expiration time
self.expirer.once()
# bring all servers up, and run replicator to update servers
obj_brain.start_handoff_half()
Manager(['object-replicator']).once()
# verify the deletion has failed by checking the container listing
self.assertTrue(self._check_obj_in_container_listing(),
msg='Did not find listing for %s' % self.object_name)
# run expirer again, delete should now succeed
self.expirer.once()
# verify the deletion by checking the container listing
self.assertFalse(self._check_obj_in_container_listing(),
msg='Found listing for %s' % self.object_name)
def test_expirer_delete_returns_outdated_404(self):
self._test_expirer_delete_outdated_object_version(object_exists=False)
def test_expirer_delete_returns_outdated_412(self):
self._test_expirer_delete_outdated_object_version(object_exists=True)
if __name__ == "__main__":
unittest.main()