Open object only if it's going to be read

Open()ing an object is necessarry only in two cases:
* Serving a GET request
* Recalculating etag when metadata is stale
  (can be triggered by any type of request)

This change ensures that for requests other than GET, a file is not
opened if the metadata is valid (size and etag accurate). Note that
if metadata is stale, the file is still opened and read to compute etag.
This patch does not change the behaviour of triggering metadata
validation and regeneration for non-GET requests.

This is a port of following change:
http://review.gluster.org/#/c/13684/

Change-Id: I54caf2242dfe94c1feb1060abfba971f862587fa
Signed-off-by: Prashanth Pai <ppai@redhat.com>
This commit is contained in:
Prashanth Pai 2016-05-19 12:01:17 +05:30
parent 87de09c9ec
commit e8d31d9e9a
2 changed files with 89 additions and 7 deletions

View File

@ -651,11 +651,13 @@ class DiskFile(object):
raise DiskFileNotExist
raise
try:
self._stat = do_fstat(self._fd)
if not self._stat:
self._stat = do_fstat(self._fd)
self._is_dir = stat.S_ISDIR(self._stat.st_mode)
obj_size = self._stat.st_size
self._metadata = read_metadata(self._fd)
if not self._metadata:
self._metadata = read_metadata(self._fd)
if not validate_object(self._metadata, self._stat):
self._metadata = create_object_metadata(self._fd, self._stat,
self._metadata)
@ -768,12 +770,43 @@ class DiskFile(object):
This method is invoked by Swift code in POST, PUT, HEAD and DELETE path
metadata = disk_file.read_metadata()
The operations performed here is very similar to those made in open().
This is to avoid opening and closing of file (two syscalls over
network). IOW, this optimization addresses the case where the fd
returned by open() isn't going to be used i.e the file is not read (GET
or metadata recalculation)
:returns: metadata dictionary for an object
:raises DiskFileError: this implementation will raise the same
errors as the `open()` method.
"""
with self.open():
return self.get_metadata()
try:
self._metadata = read_metadata(self._data_file)
except (OSError, IOError) as err:
if err.errno in (errno.ENOENT, errno.ESTALE):
raise DiskFileNotExist
raise err
if self._metadata and self._is_object_expired(self._metadata):
raise DiskFileExpired(metadata=self._metadata)
try:
self._stat = do_stat(self._data_file)
self._is_dir = stat.S_ISDIR(self._stat.st_mode)
except (OSError, IOError) as err:
if err.errno in (errno.ENOENT, errno.ESTALE):
raise DiskFileNotExist
raise err
if not validate_object(self._metadata, self._stat):
# Metadata is stale/invalid. So open the object for reading
# to update Etag and other metadata.
with self.open():
return self.get_metadata()
else:
# Metadata is valid. Don't have to open the file.
self._filter_metadata()
return self._metadata
def reader(self, iter_hook=None, keep_cache=False):
"""
@ -1096,12 +1129,12 @@ class DiskFile(object):
errors as the `create()` method.
"""
try:
metadata = read_metadata(self._data_file)
metadata = self._metadata or read_metadata(self._data_file)
except (IOError, OSError) as err:
if err.errno != errno.ENOENT:
if err.errno not in (errno.ESTALE, errno.ENOENT):
raise
else:
if metadata[X_TIMESTAMP] >= timestamp:
if metadata and metadata[X_TIMESTAMP] >= timestamp:
return
self._threadpool.run_in_thread(self._unlinkold)

View File

@ -211,6 +211,55 @@ class TestDiskFile(unittest.TestCase):
assert gdf._disk_file_open is True
assert gdf._disk_file_open is False
def test_read_metadata_optimize_open_close(self):
the_path = os.path.join(self.td, "vol0", "ufo47", "bar")
the_file = os.path.join(the_path, "z")
os.makedirs(the_path)
with open(the_file, "wb") as fd:
fd.write("1234")
init_md = {
'X-Type': 'Object',
'X-Object-Type': 'file',
'Content-Length': 4,
'ETag': md5("1234").hexdigest(),
'X-Timestamp': normalize_timestamp(os.stat(the_file).st_ctime),
'Content-Type': 'application/octet-stream'}
_metadata[_mapit(the_file)] = init_md
gdf = self._get_diskfile("vol0", "p57", "ufo47", "bar", "z")
assert gdf._obj == "z"
assert gdf._fd is None
assert gdf._disk_file_open is False
assert gdf._metadata is None
assert not gdf._is_dir
# Case 1
# Ensure that reading metadata for non-GET requests
# does not incur opening and closing the file when
# metadata is NOT stale.
mock_open = Mock()
mock_close = Mock()
with mock.patch("swiftonfile.swift.obj.diskfile.do_open", mock_open):
with mock.patch("swiftonfile.swift.obj.diskfile.do_close",
mock_close):
md = gdf.read_metadata()
self.assertEqual(md, init_md)
self.assertFalse(mock_open.called)
self.assertFalse(mock_close.called)
# Case 2
# Ensure that reading metadata for non-GET requests
# still opens and reads the file when metadata is stale
with open(the_file, "a") as fd:
# Append to the existing file to make the stored metadata
# invalid/stale.
fd.write("5678")
md = gdf.read_metadata()
# Check that the stale metadata is recalculated to account for
# change in file content
self.assertNotEqual(md, init_md)
self.assertEqual(md['Content-Length'], 8)
self.assertEqual(md['ETag'], md5("12345678").hexdigest())
def test_open_and_close(self):
mock_close = Mock()