Refactor read_metadata() method

This change:
* Simplifies read_metadata() method.
* Validates pickle header before attempting to unpickle.

Change-Id: I08d4187f7f4cc963d095b2cd2bcee3039a7dc858
Signed-off-by: Prashanth Pai <ppai@redhat.com>
This commit is contained in:
Prashanth Pai 2015-10-21 15:46:27 +05:30
parent eb9abdc934
commit fbc07d3ad9
2 changed files with 62 additions and 59 deletions

View File

@ -63,68 +63,72 @@ def normalize_timestamp(timestamp):
return "%016.05f" % (float(timestamp)) return "%016.05f" % (float(timestamp))
def serialize_metadata(metadata):
return pickle.dumps(metadata, PICKLE_PROTOCOL)
def deserialize_metadata(metastr):
"""
Returns dict populated with metadata if deserializing is successful.
Returns empty dict if deserialzing fails.
"""
if metastr.startswith('\x80\x02}') and metastr.endswith('.'):
# Assert that the metadata was indeed pickled before attempting
# to unpickle. This only *reduces* risk of malicious shell code
# being executed. However, it does NOT fix anything.
try:
return pickle.loads(metastr)
except (pickle.UnpicklingError, EOFError, AttributeError,
IndexError, ImportError, AssertionError):
return {}
else:
return {}
def read_metadata(path_or_fd): def read_metadata(path_or_fd):
""" """
Helper function to read the pickled metadata from a File/Directory. Helper function to read the serialized metadata from a File/Directory.
:param path_or_fd: File/Directory path or fd from which to read metadata. :param path_or_fd: File/Directory path or fd from which to read metadata.
:returns: dictionary of metadata :returns: dictionary of metadata
""" """
metadata = None metastr = ''
metadata_s = ''
key = 0 key = 0
while metadata is None: try:
try: while True:
metadata_s += do_getxattr(path_or_fd, metastr += do_getxattr(path_or_fd, '%s%s' %
'%s%s' % (METADATA_KEY, (key or ''))) (METADATA_KEY, (key or '')))
except IOError as err: key += 1
if err.errno == errno.ENODATA: if len(metastr) < MAX_XATTR_SIZE:
if key > 0: # Prevent further getxattr calls
# No errors reading the xattr keys, but since we have not break
# been able to find enough chunks to get a successful except IOError as err:
# unpickle operation, we consider the metadata lost, and if err.errno != errno.ENODATA:
# drop the existing data so that the internal state can be raise
# recreated.
clean_metadata(path_or_fd) if not metastr:
# We either could not find any metadata key, or we could find return {}
# some keys, but were not successful in performing the
# unpickling (missing keys perhaps)? Either way, just report metadata = deserialize_metadata(metastr)
# to the caller we have no metadata. if not metadata:
metadata = {} # Empty dict i.e deserializing of metadata has failed, probably
else: # because it is invalid or incomplete or corrupt
# Note that we don't touch the keys on errors fetching the clean_metadata(path_or_fd)
# data since it could be a transient state.
raise SwiftOnFileSystemIOError( assert isinstance(metadata, dict)
err.errno, '%s, getxattr("%s", %s)' % (err.strerror,
path_or_fd, key))
else:
try:
# If this key provides all or the remaining part of the pickle
# data, we don't need to keep searching for more keys. This
# means if we only need to store data in N xattr key/value
# pair, we only need to invoke xattr get N times. With large
# keys sizes we are shooting for N = 1.
metadata = pickle.loads(metadata_s)
assert isinstance(metadata, dict)
except (EOFError, pickle.UnpicklingError):
# We still are not able recognize this existing data collected
# as a pickled object. Make sure we loop around to try to get
# more from another xattr key.
metadata = None
key += 1
return metadata return metadata
def write_metadata(path_or_fd, metadata): def write_metadata(path_or_fd, metadata):
""" """
Helper function to write pickled metadata for a File/Directory. Helper function to write serialized metadata for a File/Directory.
:param path_or_fd: File/Directory path or fd to write the metadata :param path_or_fd: File/Directory path or fd to write the metadata
:param metadata: dictionary of metadata write :param metadata: dictionary of metadata write
""" """
assert isinstance(metadata, dict) assert isinstance(metadata, dict)
metastr = pickle.dumps(metadata, PICKLE_PROTOCOL) metastr = serialize_metadata(metadata)
key = 0 key = 0
while metastr: while metastr:
try: try:

View File

@ -22,11 +22,11 @@ import xattr
import cPickle as pickle import cPickle as pickle
import tempfile import tempfile
import hashlib import hashlib
import tarfile
import shutil import shutil
from collections import defaultdict from collections import defaultdict
from mock import patch, Mock from mock import patch, Mock
from swiftonfile.swift.common import utils from swiftonfile.swift.common import utils
from swiftonfile.swift.common.utils import deserialize_metadata, serialize_metadata
from swiftonfile.swift.common.exceptions import SwiftOnFileSystemOSError, \ from swiftonfile.swift.common.exceptions import SwiftOnFileSystemOSError, \
SwiftOnFileSystemIOError SwiftOnFileSystemIOError
from swift.common.exceptions import DiskFileNoSpace from swift.common.exceptions import DiskFileNoSpace
@ -154,7 +154,7 @@ class TestUtils(unittest.TestCase):
xkey = _xkey(path, utils.METADATA_KEY) xkey = _xkey(path, utils.METADATA_KEY)
assert len(_xattrs.keys()) == 1 assert len(_xattrs.keys()) == 1
assert xkey in _xattrs assert xkey in _xattrs
assert orig_d == pickle.loads(_xattrs[xkey]) assert orig_d == deserialize_metadata(_xattrs[xkey])
assert _xattr_op_cnt['set'] == 1 assert _xattr_op_cnt['set'] == 1
def test_write_metadata_err(self): def test_write_metadata_err(self):
@ -205,13 +205,13 @@ class TestUtils(unittest.TestCase):
assert xkey in _xattrs assert xkey in _xattrs
assert len(_xattrs[xkey]) <= utils.MAX_XATTR_SIZE assert len(_xattrs[xkey]) <= utils.MAX_XATTR_SIZE
payload += _xattrs[xkey] payload += _xattrs[xkey]
assert orig_d == pickle.loads(payload) assert orig_d == deserialize_metadata(payload)
assert _xattr_op_cnt['set'] == 3, "%r" % _xattr_op_cnt assert _xattr_op_cnt['set'] == 3, "%r" % _xattr_op_cnt
def test_clean_metadata(self): def test_clean_metadata(self):
path = "/tmp/foo/c" path = "/tmp/foo/c"
expected_d = {'a': 'y' * 150000} expected_d = {'a': 'y' * 150000}
expected_p = pickle.dumps(expected_d, utils.PICKLE_PROTOCOL) expected_p = serialize_metadata(expected_d)
for i in range(0, 3): for i in range(0, 3):
xkey = _xkey(path, "%s%s" % (utils.METADATA_KEY, i or '')) xkey = _xkey(path, "%s%s" % (utils.METADATA_KEY, i or ''))
_xattrs[xkey] = expected_p[:utils.MAX_XATTR_SIZE] _xattrs[xkey] = expected_p[:utils.MAX_XATTR_SIZE]
@ -223,7 +223,7 @@ class TestUtils(unittest.TestCase):
def test_clean_metadata_err(self): def test_clean_metadata_err(self):
path = "/tmp/foo/c" path = "/tmp/foo/c"
xkey = _xkey(path, utils.METADATA_KEY) xkey = _xkey(path, utils.METADATA_KEY)
_xattrs[xkey] = pickle.dumps({'a': 'y'}, utils.PICKLE_PROTOCOL) _xattrs[xkey] = serialize_metadata({'a': 'y'})
_xattr_rem_err[xkey] = errno.EOPNOTSUPP _xattr_rem_err[xkey] = errno.EOPNOTSUPP
try: try:
utils.clean_metadata(path) utils.clean_metadata(path)
@ -237,7 +237,7 @@ class TestUtils(unittest.TestCase):
path = "/tmp/foo/r" path = "/tmp/foo/r"
expected_d = {'a': 'y'} expected_d = {'a': 'y'}
xkey = _xkey(path, utils.METADATA_KEY) xkey = _xkey(path, utils.METADATA_KEY)
_xattrs[xkey] = pickle.dumps(expected_d, utils.PICKLE_PROTOCOL) _xattrs[xkey] = serialize_metadata(expected_d)
res_d = utils.read_metadata(path) res_d = utils.read_metadata(path)
assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d) assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d)
assert _xattr_op_cnt['get'] == 1, "%r" % _xattr_op_cnt assert _xattr_op_cnt['get'] == 1, "%r" % _xattr_op_cnt
@ -252,7 +252,7 @@ class TestUtils(unittest.TestCase):
path = "/tmp/foo/r" path = "/tmp/foo/r"
expected_d = {'a': 'y'} expected_d = {'a': 'y'}
xkey = _xkey(path, utils.METADATA_KEY) xkey = _xkey(path, utils.METADATA_KEY)
_xattrs[xkey] = pickle.dumps(expected_d, utils.PICKLE_PROTOCOL) _xattrs[xkey] = serialize_metadata(expected_d)
_xattr_get_err[xkey] = errno.EOPNOTSUPP _xattr_get_err[xkey] = errno.EOPNOTSUPP
try: try:
utils.read_metadata(path) utils.read_metadata(path)
@ -265,7 +265,7 @@ class TestUtils(unittest.TestCase):
def test_read_metadata_multiple(self): def test_read_metadata_multiple(self):
path = "/tmp/foo/r" path = "/tmp/foo/r"
expected_d = {'a': 'y' * 150000} expected_d = {'a': 'y' * 150000}
expected_p = pickle.dumps(expected_d, utils.PICKLE_PROTOCOL) expected_p = serialize_metadata(expected_d)
for i in range(0, 3): for i in range(0, 3):
xkey = _xkey(path, "%s%s" % (utils.METADATA_KEY, i or '')) xkey = _xkey(path, "%s%s" % (utils.METADATA_KEY, i or ''))
_xattrs[xkey] = expected_p[:utils.MAX_XATTR_SIZE] _xattrs[xkey] = expected_p[:utils.MAX_XATTR_SIZE]
@ -273,12 +273,12 @@ class TestUtils(unittest.TestCase):
assert not expected_p assert not expected_p
res_d = utils.read_metadata(path) res_d = utils.read_metadata(path)
assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d) assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d)
assert _xattr_op_cnt['get'] == 3, "%r" % _xattr_op_cnt assert _xattr_op_cnt['get'] == 4, "%r" % _xattr_op_cnt
def test_read_metadata_multiple_one_missing(self): def test_read_metadata_multiple_one_missing(self):
path = "/tmp/foo/r" path = "/tmp/foo/r"
expected_d = {'a': 'y' * 150000} expected_d = {'a': 'y' * 150000}
expected_p = pickle.dumps(expected_d, utils.PICKLE_PROTOCOL) expected_p = serialize_metadata(expected_d)
for i in range(0, 2): for i in range(0, 2):
xkey = _xkey(path, "%s%s" % (utils.METADATA_KEY, i or '')) xkey = _xkey(path, "%s%s" % (utils.METADATA_KEY, i or ''))
_xattrs[xkey] = expected_p[:utils.MAX_XATTR_SIZE] _xattrs[xkey] = expected_p[:utils.MAX_XATTR_SIZE]
@ -287,7 +287,6 @@ class TestUtils(unittest.TestCase):
res_d = utils.read_metadata(path) res_d = utils.read_metadata(path)
assert res_d == {} assert res_d == {}
assert _xattr_op_cnt['get'] == 3, "%r" % _xattr_op_cnt assert _xattr_op_cnt['get'] == 3, "%r" % _xattr_op_cnt
assert len(_xattrs.keys()) == 0, "Expected 0 keys, found %d" % len(_xattrs.keys())
def test_restore_metadata_none(self): def test_restore_metadata_none(self):
# No initial metadata # No initial metadata
@ -303,7 +302,7 @@ class TestUtils(unittest.TestCase):
path = "/tmp/foo/i" path = "/tmp/foo/i"
initial_d = {'a': 'z'} initial_d = {'a': 'z'}
xkey = _xkey(path, utils.METADATA_KEY) xkey = _xkey(path, utils.METADATA_KEY)
_xattrs[xkey] = pickle.dumps(initial_d, utils.PICKLE_PROTOCOL) _xattrs[xkey] = serialize_metadata(initial_d)
res_d = utils.restore_metadata(path, {'b': 'y'}) res_d = utils.restore_metadata(path, {'b': 'y'})
expected_d = {'a': 'z', 'b': 'y'} expected_d = {'a': 'z', 'b': 'y'}
assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d) assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d)
@ -315,7 +314,7 @@ class TestUtils(unittest.TestCase):
path = "/tmp/foo/i" path = "/tmp/foo/i"
initial_d = {'a': 'z'} initial_d = {'a': 'z'}
xkey = _xkey(path, utils.METADATA_KEY) xkey = _xkey(path, utils.METADATA_KEY)
_xattrs[xkey] = pickle.dumps(initial_d, utils.PICKLE_PROTOCOL) _xattrs[xkey] = serialize_metadata(initial_d)
res_d = utils.restore_metadata(path, {}) res_d = utils.restore_metadata(path, {})
expected_d = {'a': 'z'} expected_d = {'a': 'z'}
assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d) assert res_d == expected_d, "Expected %r, result %r" % (expected_d, res_d)
@ -398,7 +397,7 @@ class TestUtils(unittest.TestCase):
assert xkey in _xattrs assert xkey in _xattrs
assert _xattr_op_cnt['get'] == 1 assert _xattr_op_cnt['get'] == 1
assert _xattr_op_cnt['set'] == 1 assert _xattr_op_cnt['set'] == 1
md = pickle.loads(_xattrs[xkey]) md = deserialize_metadata(_xattrs[xkey])
assert r_md == md assert r_md == md
for key in self.obj_keys: for key in self.obj_keys:
@ -420,7 +419,7 @@ class TestUtils(unittest.TestCase):
assert xkey in _xattrs assert xkey in _xattrs
assert _xattr_op_cnt['get'] == 1 assert _xattr_op_cnt['get'] == 1
assert _xattr_op_cnt['set'] == 1 assert _xattr_op_cnt['set'] == 1
md = pickle.loads(_xattrs[xkey]) md = deserialize_metadata(_xattrs[xkey])
assert r_md == md assert r_md == md
for key in self.obj_keys: for key in self.obj_keys: