Detect file change when file size remains same

There was a corner case where Etag returned would be incorrect. This
happens when the object is changed from file interface but with object
size remaining the same.

This change introduces an additional metadata that stores the mtime of
object during PUT. This stored mtime is compared with actual mtime
during GET to determine if the file has changed or not. Etag is
recalculated if the file has changed.

The scope of this fix in addressing the above mentioned corner case is
limited to new objects only. Also, refactoring the code further by
moving some methods from utils.py to classes in diskfile.py should
prevent some redundant (f)stat syscalls. These minor optimizations
will be addressed in a separate change.

Change-Id: If724697ef2b17a3c569b60bd81ebf98dab283da6
Signed-off-by: Prashanth Pai <ppai@redhat.com>
This commit is contained in:
Prashanth Pai 2015-10-07 19:41:12 +05:30
parent 3083d14aff
commit da53693c9b
5 changed files with 40 additions and 11 deletions

View File

@ -34,6 +34,7 @@ X_TIMESTAMP = 'X-Timestamp'
X_TYPE = 'X-Type'
X_ETAG = 'ETag'
X_OBJECT_TYPE = 'X-Object-Type'
X_MTIME = 'X-Object-PUT-Mtime'
DIR_TYPE = 'application/directory'
METADATA_KEY = 'user.swift.metadata'
MAX_XATTR_SIZE = 65536
@ -164,7 +165,7 @@ def clean_metadata(path_or_fd):
key += 1
def validate_object(metadata, stat=None):
def validate_object(metadata, statinfo=None):
if not metadata:
return False
@ -176,11 +177,17 @@ def validate_object(metadata, stat=None):
X_OBJECT_TYPE not in metadata.keys():
return False
if stat and (int(metadata[X_CONTENT_LENGTH]) != stat.st_size):
# File length has changed.
# TODO: Handle case where file content has changed but the length
# remains the same.
return False
if statinfo and stat.S_ISREG(statinfo.st_mode):
# File length has changed
if int(metadata[X_CONTENT_LENGTH]) != statinfo.st_size:
return False
# File might have changed with length being the same.
if X_MTIME in metadata and \
normalize_timestamp(metadata[X_MTIME]) != \
normalize_timestamp(statinfo.st_mtime):
return False
if metadata[X_TYPE] == OBJECT:
return True
@ -255,6 +262,7 @@ def get_object_metadata(obj_path_or_fd):
X_CONTENT_TYPE: DIR_TYPE if is_dir else FILE_TYPE,
X_OBJECT_TYPE: DIR_NON_OBJECT if is_dir else FILE,
X_CONTENT_LENGTH: 0 if is_dir else stats.st_size,
X_MTIME: 0 if is_dir else normalize_timestamp(stats.st_mtime),
X_ETAG: md5().hexdigest() if is_dir else _get_etag(obj_path_or_fd)}
return metadata

View File

@ -44,7 +44,7 @@ from swiftonfile.swift.common.utils import read_metadata, write_metadata, \
from swiftonfile.swift.common.utils import X_CONTENT_TYPE, \
X_TIMESTAMP, X_TYPE, X_OBJECT_TYPE, FILE, OBJECT, DIR_TYPE, \
FILE_TYPE, DEFAULT_UID, DEFAULT_GID, DIR_NON_OBJECT, DIR_OBJECT, \
X_ETAG, X_CONTENT_LENGTH
X_ETAG, X_CONTENT_LENGTH, X_MTIME
from swift.obj.diskfile import DiskFileManager as SwiftDiskFileManager
from swift.obj.diskfile import get_async_dir
@ -169,7 +169,7 @@ def make_directory(full_path, uid, gid, metadata=None):
return True, metadata
def _adjust_metadata(metadata):
def _adjust_metadata(fd, metadata):
# Fix up the metadata to ensure it has a proper value for the
# Content-Type metadata, as well as an X_TYPE and X_OBJECT_TYPE
# metadata values.
@ -189,6 +189,12 @@ def _adjust_metadata(metadata):
else:
metadata[X_OBJECT_TYPE] = FILE
# stat.st_mtime does not change after last write(). We set this to later
# detect if the object was changed from filesystem interface (non Swift)
statinfo = do_fstat(fd)
if stat.S_ISREG(statinfo.st_mode):
metadata[X_MTIME] = normalize_timestamp(statinfo.st_mtime)
metadata[X_TYPE] = OBJECT
return metadata
@ -385,7 +391,7 @@ class DiskFileWriter(object):
name
"""
assert self._tmppath is not None
metadata = _adjust_metadata(metadata)
metadata = _adjust_metadata(self._fd, metadata)
df = self._disk_file
if dir_is_object(metadata):

View File

@ -226,3 +226,17 @@ class TestSwiftOnFile(Base):
self.assert_status(200)
self.assertEqual(new_data_hash, metadata['etag'])
self.assertEqual(len(new_data), int(metadata['content_length']))
# Modify the file but let the length remain same
new_data = "I am Antman"
new_data_hash = hashlib.md5(new_data).hexdigest()
with open(file_path, 'w') as f:
f.write(new_data)
# Make sure GET works
self.assertEqual(new_data, object_item.read())
self.assert_status(200)
# Check Etag and content-length is right
metadata = object_item.info()
self.assert_status(200)
self.assertEqual(new_data_hash, metadata['etag'])
self.assertEqual(len(new_data), int(metadata['content_length']))

View File

@ -469,9 +469,9 @@ class TestUtils(unittest.TestCase):
utils.X_CONTENT_LENGTH: '12345',
utils.X_TYPE: utils.OBJECT,
utils.X_OBJECT_TYPE: 'na'}
fake_stat = Mock(st_size=12346)
fake_stat = Mock(st_size=12346, st_mode=33188)
self.assertFalse(utils.validate_object(md, fake_stat))
fake_stat = Mock(st_size=12345)
fake_stat = Mock(st_size=12345, st_mode=33188)
self.assertTrue(utils.validate_object(md, fake_stat))
def test_write_pickle(self):

View File

@ -188,6 +188,7 @@ class TestDiskFile(unittest.TestCase):
'Content-Length': 4,
'ETag': etag,
'X-Timestamp': ts,
'X-Object-PUT-Mtime': normalize_timestamp(stats.st_mtime),
'Content-Type': 'application/octet-stream'}
gdf = self._get_diskfile("vol0", "p57", "ufo47", "bar", "z")
assert gdf._obj == "z"