object-storage: remove stat of directories

It was painfully slow to gather a list of all the files in a directory tree
when there were as many directories as files, since we also stat'd all of the
directories as well. We only did that to cache using memcache the result, to
know when it changed. However, there was no way for a memcache object to be
passed down to this code. So we have removed the memcache support entirely,
and removed the stat of the directories as a result.

See BZ 911448 (https://bugzilla.redhat.com/show_bug.cgi?id=911448).

Change-Id: I2625f82eca36c31eeffa84dc76ef7f3b48aafec5
Signed-off-by: Mohammed Junaid <junaid@redhat.com>
Reviewed-on: http://review.gluster.org/5252
Reviewed-by: Peter Portante <pportant@redhat.com>
Tested-by: Peter Portante <pportant@redhat.com>
This commit is contained in:
Mohammed Junaid 2013-06-27 13:13:34 +05:30 committed by Peter Portante
parent c7ce3beec1
commit cc97abca04
2 changed files with 24 additions and 301 deletions

View File

@ -53,11 +53,6 @@ DEFAULT_UID = -1
DEFAULT_GID = -1
PICKLE_PROTOCOL = 2
CHUNK_SIZE = 65536
MEMCACHE_KEY_PREFIX = 'gluster.swift.'
MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX = MEMCACHE_KEY_PREFIX + \
'account.details.'
MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX = MEMCACHE_KEY_PREFIX + \
'container.details.'
def read_metadata(path):
@ -292,27 +287,12 @@ def _get_container_details_from_fs(cont_path):
return ContainerDetails(bytes_used, object_count, obj_list, dir_list)
def get_container_details(cont_path, memcache=None):
def get_container_details(cont_path):
"""
Return object_list, object_count and bytes_used.
"""
mkey = ''
if memcache:
mkey = MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + cont_path
cd = memcache.get(mkey)
if cd:
if not cd.dir_list:
cd = None
else:
for (path, mtime) in cd.dir_list:
if mtime != do_stat(path).st_mtime:
cd = None
else:
cd = None
if not cd:
cd = _get_container_details_from_fs(cont_path)
if memcache:
memcache.set(mkey, cd)
cd = _get_container_details_from_fs(cont_path)
return cd.obj_list, cd.object_count, cd.bytes_used
@ -330,12 +310,14 @@ class AccountDetails(object):
self.container_list = container_list
def _get_account_details_from_fs(acc_path, acc_stats):
def _get_account_details_from_fs(acc_path):
"""
Return container_list and container_count.
"""
container_list = []
container_count = 0
if not acc_stats:
acc_stats = do_stat(acc_path)
acc_stats = do_stat(acc_path)
is_dir = (acc_stats.st_mode & 0040000) != 0
if is_dir:
for name in do_listdir(acc_path):
@ -349,29 +331,12 @@ def _get_account_details_from_fs(acc_path, acc_stats):
return AccountDetails(acc_stats.st_mtime, container_count, container_list)
def get_account_details(acc_path, memcache=None):
def get_account_details(acc_path):
"""
Return container_list and container_count.
"""
acc_stats = None
mkey = ''
if memcache:
mkey = MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + acc_path
ad = memcache.get(mkey)
if ad:
# FIXME: Do we really need to stat the file? If we are object
# only, then we can track the other Swift HTTP APIs that would
# modify the account and invalidate the cached entry there. If we
# are not object only, are we even called on this path?
acc_stats = do_stat(acc_path)
if ad.mtime != acc_stats.st_mtime:
ad = None
else:
ad = None
if not ad:
ad = _get_account_details_from_fs(acc_path, acc_stats)
if memcache:
memcache.set(mkey, ad)
ad = _get_account_details_from_fs(acc_path)
return ad.container_list, ad.container_count
@ -422,12 +387,11 @@ def _add_timestamp(metadata_i):
return metadata
def get_container_metadata(cont_path, memcache=None):
def get_container_metadata(cont_path):
objects = []
object_count = 0
bytes_used = 0
objects, object_count, bytes_used = get_container_details(cont_path,
memcache)
objects, object_count, bytes_used = get_container_details(cont_path)
metadata = {X_TYPE: CONTAINER,
X_TIMESTAMP: normalize_timestamp(
os_path.getctime(cont_path)),
@ -438,10 +402,10 @@ def get_container_metadata(cont_path, memcache=None):
return _add_timestamp(metadata)
def get_account_metadata(acc_path, memcache=None):
def get_account_metadata(acc_path):
containers = []
container_count = 0
containers, container_count = get_account_details(acc_path, memcache)
containers, container_count = get_account_details(acc_path)
metadata = {X_TYPE: ACCOUNT,
X_TIMESTAMP: normalize_timestamp(
os_path.getctime(acc_path)),
@ -470,13 +434,13 @@ def create_object_metadata(obj_path):
return restore_metadata(obj_path, metadata)
def create_container_metadata(cont_path, memcache=None):
metadata = get_container_metadata(cont_path, memcache)
def create_container_metadata(cont_path):
metadata = get_container_metadata(cont_path)
return restore_metadata(cont_path, metadata)
def create_account_metadata(acc_path, memcache=None):
metadata = get_account_metadata(acc_path, memcache)
def create_account_metadata(acc_path):
metadata = get_account_metadata(acc_path)
return restore_metadata(acc_path, metadata)

View File

@ -422,7 +422,7 @@ class TestUtils(unittest.TestCase):
os.rmdir(td)
def test_get_container_metadata(self):
def _mock_get_container_details(path, memcache=None):
def _mock_get_container_details(path):
o_list = [ 'a', 'b', 'c' ]
o_count = 3
b_used = 47
@ -445,7 +445,7 @@ class TestUtils(unittest.TestCase):
os.rmdir(td)
def test_get_account_metadata(self):
def _mock_get_account_details(path, memcache=None):
def _mock_get_account_details(path):
c_list = [ '123', 'abc' ]
c_count = 2
return c_list, c_count
@ -520,247 +520,6 @@ class TestUtils(unittest.TestCase):
finally:
os.rmdir(td)
def test_container_details_uncached(self):
the_path = "/tmp/bar"
def mock_get_container_details_from_fs(cont_path):
bu = 5
oc = 1
ol = ['foo',]
dl = [('a',100),]
return utils.ContainerDetails(bu, oc, ol, dl)
orig_gcdff = utils._get_container_details_from_fs
utils._get_container_details_from_fs = mock_get_container_details_from_fs
try:
retval = utils.get_container_details(the_path)
cd = mock_get_container_details_from_fs(the_path)
assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
finally:
utils._get_container_details_from_fs = orig_gcdff
def test_container_details_cached_hit(self):
mc = SimMemcache()
the_path = "/tmp/bar"
def mock_get_container_details_from_fs(cont_path, bu_p=5):
bu = bu_p
oc = 1
ol = ['foo',]
dl = [('a',100),]
return utils.ContainerDetails(bu, oc, ol, dl)
def mock_do_stat(path):
class MockStat(object):
def __init__(self, mtime):
self.st_mtime = mtime
return MockStat(100)
cd = mock_get_container_details_from_fs(the_path, bu_p=6)
mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + the_path, cd)
orig_gcdff = utils._get_container_details_from_fs
utils._get_container_details_from_fs = mock_get_container_details_from_fs
orig_ds = utils.do_stat
utils.do_stat = mock_do_stat
try:
retval = utils.get_container_details(the_path, memcache=mc)
# If it did not properly use memcache, the default mocked version
# of get details from fs would return 5 bytes used instead of the
# 6 we specified above.
cd = mock_get_container_details_from_fs(the_path, bu_p=6)
assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
finally:
utils._get_container_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
def test_container_details_cached_miss_key(self):
mc = SimMemcache()
the_path = "/tmp/bar"
def mock_get_container_details_from_fs(cont_path, bu_p=5):
bu = bu_p
oc = 1
ol = ['foo',]
dl = [('a',100),]
return utils.ContainerDetails(bu, oc, ol, dl)
def mock_do_stat(path):
# Be sure we don't miss due to mtimes not matching
self.fail("do_stat should not have been called")
cd = mock_get_container_details_from_fs(the_path + "u", bu_p=6)
mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + the_path + "u", cd)
orig_gcdff = utils._get_container_details_from_fs
utils._get_container_details_from_fs = mock_get_container_details_from_fs
orig_ds = utils.do_stat
utils.do_stat = mock_do_stat
try:
retval = utils.get_container_details(the_path, memcache=mc)
cd = mock_get_container_details_from_fs(the_path)
assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
mkey = utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + the_path
assert mkey in mc._d
finally:
utils._get_container_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
def test_container_details_cached_miss_dir_list(self):
mc = SimMemcache()
the_path = "/tmp/bar"
def mock_get_container_details_from_fs(cont_path, bu_p=5):
bu = bu_p
oc = 1
ol = ['foo',]
dl = []
return utils.ContainerDetails(bu, oc, ol, dl)
def mock_do_stat(path):
# Be sure we don't miss due to mtimes not matching
self.fail("do_stat should not have been called")
cd = mock_get_container_details_from_fs(the_path, bu_p=6)
mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + the_path, cd)
orig_gcdff = utils._get_container_details_from_fs
utils._get_container_details_from_fs = mock_get_container_details_from_fs
orig_ds = utils.do_stat
utils.do_stat = mock_do_stat
try:
retval = utils.get_container_details(the_path, memcache=mc)
cd = mock_get_container_details_from_fs(the_path)
assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
mkey = utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + the_path
assert mkey in mc._d
assert 5 == mc._d[mkey].bytes_used
finally:
utils._get_container_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
def test_container_details_cached_miss_mtime(self):
mc = SimMemcache()
the_path = "/tmp/bar"
def mock_get_container_details_from_fs(cont_path, bu_p=5):
bu = bu_p
oc = 1
ol = ['foo',]
dl = [('a',100),]
return utils.ContainerDetails(bu, oc, ol, dl)
def mock_do_stat(path):
# Be sure we miss due to mtimes not matching
class MockStat(object):
def __init__(self, mtime):
self.st_mtime = mtime
return MockStat(200)
cd = mock_get_container_details_from_fs(the_path, bu_p=6)
mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + the_path, cd)
orig_gcdff = utils._get_container_details_from_fs
utils._get_container_details_from_fs = mock_get_container_details_from_fs
orig_ds = utils.do_stat
utils.do_stat = mock_do_stat
try:
retval = utils.get_container_details(the_path, memcache=mc)
cd = mock_get_container_details_from_fs(the_path)
assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
mkey = utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + the_path
assert mkey in mc._d
assert 5 == mc._d[mkey].bytes_used
finally:
utils._get_container_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
def test_account_details_uncached(self):
the_path = "/tmp/bar"
def mock_get_account_details_from_fs(acc_path, acc_stats):
mt = 100
cc = 2
cl = ['a', 'b']
return utils.AccountDetails(mt, cc, cl)
orig_gcdff = utils._get_account_details_from_fs
utils._get_account_details_from_fs = mock_get_account_details_from_fs
try:
retval = utils.get_account_details(the_path)
ad = mock_get_account_details_from_fs(the_path, None)
assert retval == (ad.container_list, ad.container_count)
finally:
utils._get_account_details_from_fs = orig_gcdff
def test_account_details_cached_hit(self):
mc = SimMemcache()
the_path = "/tmp/bar"
def mock_get_account_details_from_fs(acc_path, acc_stats):
mt = 100
cc = 2
cl = ['a', 'b']
return utils.AccountDetails(mt, cc, cl)
def mock_do_stat(path):
class MockStat(object):
def __init__(self, mtime):
self.st_mtime = mtime
return MockStat(100)
ad = mock_get_account_details_from_fs(the_path, None)
ad.container_list = ['x', 'y']
mc.set(utils.MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + the_path, ad)
orig_gcdff = utils._get_account_details_from_fs
orig_ds = utils.do_stat
utils._get_account_details_from_fs = mock_get_account_details_from_fs
utils.do_stat = mock_do_stat
try:
retval = utils.get_account_details(the_path, memcache=mc)
assert retval == (ad.container_list, ad.container_count)
wrong_ad = mock_get_account_details_from_fs(the_path, None)
assert wrong_ad != ad
finally:
utils._get_account_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
def test_account_details_cached_miss(self):
mc = SimMemcache()
the_path = "/tmp/bar"
def mock_get_account_details_from_fs(acc_path, acc_stats):
mt = 100
cc = 2
cl = ['a', 'b']
return utils.AccountDetails(mt, cc, cl)
def mock_do_stat(path):
class MockStat(object):
def __init__(self, mtime):
self.st_mtime = mtime
return MockStat(100)
ad = mock_get_account_details_from_fs(the_path, None)
ad.container_list = ['x', 'y']
mc.set(utils.MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + the_path + 'u', ad)
orig_gcdff = utils._get_account_details_from_fs
orig_ds = utils.do_stat
utils._get_account_details_from_fs = mock_get_account_details_from_fs
utils.do_stat = mock_do_stat
try:
retval = utils.get_account_details(the_path, memcache=mc)
correct_ad = mock_get_account_details_from_fs(the_path, None)
assert retval == (correct_ad.container_list, correct_ad.container_count)
assert correct_ad != ad
finally:
utils._get_account_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
def test_account_details_cached_miss_mtime(self):
mc = SimMemcache()
the_path = "/tmp/bar"
def mock_get_account_details_from_fs(acc_path, acc_stats):
mt = 100
cc = 2
cl = ['a', 'b']
return utils.AccountDetails(mt, cc, cl)
def mock_do_stat(path):
class MockStat(object):
def __init__(self, mtime):
self.st_mtime = mtime
return MockStat(100)
ad = mock_get_account_details_from_fs(the_path, None)
ad.container_list = ['x', 'y']
ad.mtime = 200
mc.set(utils.MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + the_path, ad)
orig_gcdff = utils._get_account_details_from_fs
orig_ds = utils.do_stat
utils._get_account_details_from_fs = mock_get_account_details_from_fs
utils.do_stat = mock_do_stat
try:
retval = utils.get_account_details(the_path, memcache=mc)
correct_ad = mock_get_account_details_from_fs(the_path, None)
assert retval == (correct_ad.container_list, correct_ad.container_count)
assert correct_ad != ad
finally:
utils._get_account_details_from_fs = orig_gcdff
utils.do_stat = orig_ds
def test_get_account_details_from_fs(self):
orig_cwd = os.getcwd()
td = tempfile.mkdtemp()
@ -769,7 +528,7 @@ class TestUtils(unittest.TestCase):
os.chdir(td)
tf.extractall()
ad = utils._get_account_details_from_fs(td, None)
ad = utils._get_account_details_from_fs(td)
assert ad.mtime == os.path.getmtime(td)
assert ad.container_count == 3
assert set(ad.container_list) == set(['c1', 'c2', 'c3'])
@ -887,14 +646,14 @@ class TestUtils(unittest.TestCase):
def test_get_account_details_from_fs_notadir_w_stats(self):
tf = tempfile.NamedTemporaryFile()
ad = utils._get_account_details_from_fs(tf.name, os.stat(tf.name))
ad = utils._get_account_details_from_fs(tf.name)
assert ad.mtime == os.path.getmtime(tf.name)
assert ad.container_count == 0
assert ad.container_list == []
def test_get_account_details_from_fs_notadir(self):
tf = tempfile.NamedTemporaryFile()
ad = utils._get_account_details_from_fs(tf.name, None)
ad = utils._get_account_details_from_fs(tf.name)
assert ad.mtime == os.path.getmtime(tf.name)
assert ad.container_count == 0
assert ad.container_list == []