From 4252ff6cc4a319bdef8602c5692db509968c6658 Mon Sep 17 00:00:00 2001 From: Prashanth Pai Date: Thu, 19 May 2016 11:37:44 +0530 Subject: [PATCH] Remove redundant syscalls in POST path During process of POST requests which updates object metadata (xattrs), the following (ordered) sequence of syscalls were being made twice: open(), fstat(), fgetxattr(), close() Intuitively, one may assume that a getxattr() and setxattr() is enough to fulfil the POST request as it is only supposed to update metadata. But this isn't the case. The above series of syscalls is made first during disk_file.open(). This will trigger an update of all stale metadata (outdated size/etag) and the result is retained in a diskfile class attribute named 'self._metadata' Instead of using this pre-fetched metadata, the POST path was internally invoking disk_file.open() again in disk_file.write_metadata(). This is redundant and serves no purpose. self._metadata was being erased during the context manager cleanup of disk_file.open() This change is simple and does the following: * Don't erase fetched metadata during context manager exit of open() * Use a different internal variable to detect and raise DiskFileNotOpen * Re-use self._metadata if available in disk_file.write_metadata() Here's comparing syscalls made (POST path) with and without this fix: https://bugzilla.redhat.com/show_bug.cgi?id=1314171#c4 This is a port of this change: http://review.gluster.org/#/c/13668/ Change-Id: I74e6f849f4415f3816bb367e1900924a76bf19c1 Signed-off-by: Prashanth Pai --- swiftonfile/swift/obj/diskfile.py | 23 +++++++++++++++++++---- test/unit/obj/test_diskfile.py | 12 ++++++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/swiftonfile/swift/obj/diskfile.py b/swiftonfile/swift/obj/diskfile.py index ad5404e..334c248 100644 --- a/swiftonfile/swift/obj/diskfile.py +++ b/swiftonfile/swift/obj/diskfile.py @@ -602,6 +602,7 @@ class DiskFile(object): self._put_datadir = self._container_path self._data_file = os.path.join(self._put_datadir, self._obj) + self._disk_file_open = False @property def timestamp(self): @@ -678,6 +679,7 @@ class DiskFile(object): # Re-raise the original exception after fd has been closed raise + self._disk_file_open = True return self def _is_object_expired(self, metadata): @@ -711,7 +713,7 @@ class DiskFile(object): previously invoked the :func:`swift.obj.diskfile.DiskFile.open` method. """ - if self._metadata is None: + if not self._disk_file_open: raise DiskFileNotOpen() return self @@ -731,18 +733,23 @@ class DiskFile(object): the REST API *before* the object has actually been read. It is the responsibility of the implementation to properly handle that. """ + self._disk_file_open = False self._close_fd() def get_metadata(self): """ Provide the metadata for a previously opened object as a dictionary. + This is invoked by Swift code in the GET path as follows: + with disk_file.open(): + metadata = disk_file.get_metadata() + :returns: object's metadata dictionary :raises DiskFileNotOpen: if the :func:`swift.obj.diskfile.DiskFile.open` method was not previously invoked """ - if self._metadata is None: + if not self._disk_file_open: raise DiskFileNotOpen() return self._metadata @@ -751,6 +758,9 @@ class DiskFile(object): Return the metadata for an object without requiring the caller to open the object first. + This method is invoked by Swift code in POST, PUT, HEAD and DELETE path + metadata = disk_file.read_metadata() + :returns: metadata dictionary for an object :raises DiskFileError: this implementation will raise the same errors as the `open()` method. @@ -772,7 +782,7 @@ class DiskFile(object): OS buffer cache :returns: a :class:`swift.obj.diskfile.DiskFileReader` object """ - if self._metadata is None: + if not self._disk_file_open: raise DiskFileNotOpen() dr = DiskFileReader( self._fd, self._threadpool, self._mgr.disk_chunk_size, @@ -965,6 +975,8 @@ class DiskFile(object): Write a block of metadata to an object without requiring the caller to open the object first. + This method is only called in the POST path. + :param metadata: dictionary of metadata to be associated with the object :raises DiskFileError: this implementation will raise the same @@ -983,7 +995,10 @@ class DiskFile(object): metadata. If there are any, it should be appended to the metadata obj the user is trying to write. """ - orig_metadata = self.read_metadata() + # If metadata has been previously fetched, use that. + # Stale metadata (outdated size/etag) would've been updated when + # metadata is fetched for the first time. + orig_metadata = self._metadata or read_metadata(self._data_file) sys_keys = [X_CONTENT_TYPE, X_ETAG, 'name', X_CONTENT_LENGTH, X_OBJECT_TYPE, X_TYPE] diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index 658a68a..bc38790 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -200,6 +200,7 @@ class TestDiskFile(unittest.TestCase): gdf = self._get_diskfile("vol0", "p57", "ufo47", "bar", "z") assert gdf._obj == "z" assert gdf._fd is None + assert gdf._disk_file_open is False assert gdf._metadata is None assert not gdf._is_dir with gdf.open(): @@ -207,6 +208,8 @@ class TestDiskFile(unittest.TestCase): assert not gdf._is_dir assert gdf._fd is not None assert gdf._metadata == exp_md + assert gdf._disk_file_open is True + assert gdf._disk_file_open is False def test_open_and_close(self): mock_close = Mock() @@ -239,12 +242,15 @@ class TestDiskFile(unittest.TestCase): assert gdf._obj == "z" assert gdf._fd is None assert gdf._metadata is None + assert gdf._disk_file_open is False assert not gdf._is_dir with gdf.open(): assert not gdf._is_dir assert gdf._data_file == the_file assert gdf._fd is not None assert gdf._metadata == exp_md, "%r != %r" % (gdf._metadata, exp_md) + assert gdf._disk_file_open is True + assert gdf._disk_file_open is False def test_open_invalid_existing_metadata(self): the_path = os.path.join(self.td, "vol0", "ufo47", "bar") @@ -262,9 +268,12 @@ class TestDiskFile(unittest.TestCase): assert gdf._obj == "z" assert not gdf._is_dir assert gdf._fd is None + assert gdf._disk_file_open is False with gdf.open(): assert gdf._data_file == the_file assert gdf._metadata != inv_md + assert gdf._disk_file_open is True + assert gdf._disk_file_open is False def test_open_isdir(self): the_path = os.path.join(self.td, "vol0", "ufo47", "bar") @@ -284,9 +293,12 @@ class TestDiskFile(unittest.TestCase): gdf = self._get_diskfile("vol0", "p57", "ufo47", "bar", "d") assert gdf._obj == "d" assert gdf._is_dir is False + assert gdf._disk_file_open is False with gdf.open(): assert gdf._is_dir assert gdf._data_file == the_dir + assert gdf._disk_file_open is True + assert gdf._disk_file_open is False def _create_and_get_diskfile(self, dev, par, acc, con, obj, fsize=256): # FIXME: assumes account === volume