Do not use pickle: Use json

Change-Id: Ic4217e82fc72f9a32c13136528e1bc3cc05d0c73
Signed-off-by: Prashanth Pai <ppai@redhat.com>
This commit is contained in:
Prashanth Pai 2015-10-21 16:56:02 +05:30
parent fbc07d3ad9
commit 435a0dcfc7
7 changed files with 311 additions and 9 deletions

162
bin/swiftonfile-migrate-metadata Executable file
View File

@ -0,0 +1,162 @@
#!/usr/bin/env python
#
# Copyright (c) 2015 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import pwd
import sys
import stat
import errno
import xattr
import cPickle as pickle
import multiprocessing
from optparse import OptionParser
from swiftonfile.swift.common.utils import write_metadata, SafeUnpickler, \
METADATA_KEY, MAX_XATTR_SIZE
ORIGINAL_EUID = os.geteuid()
NOBODY_UID = pwd.getpwnam('nobody').pw_uid
def print_msg(s):
global options
if options.verbose:
print(s)
def clean_metadata(path, key_count):
"""
Can only be used when you know the key_count. Saves one unnecessarry
removexattr() call. Ignores error when file or metadata isn't found.
"""
for key in xrange(0, key_count):
try:
xattr.removexattr(path, '%s%s' % (METADATA_KEY, (key or '')))
except IOError as err:
if err.errno not in (errno.ENOENT, errno.ESTALE, errno.ENODATA):
print_msg("xattr.removexattr(%s, %s%s) failed: %s" %
(path, METADATA_KEY, (key or ''), err.errno))
def process_object(path):
metastr = ''
key_count = 0
try:
while True:
metastr += xattr.getxattr(path, '%s%s' %
(METADATA_KEY, (key_count or '')))
key_count += 1
if len(metastr) < MAX_XATTR_SIZE:
# Prevent further getxattr calls
break
except IOError as err:
if err.errno not in (errno.ENOENT, errno.ESTALE, errno.ENODATA):
print_msg("xattr.getxattr(%s, %s%s) failed: %s" %
(path, METADATA_KEY, (key_count or ''), err.errno))
if not metastr:
return
if metastr.startswith('\x80\x02}') and metastr.endswith('.'):
# It's pickled. If unpickling is successful and metadata is
# not stale write back the metadata by serializing it.
try:
os.seteuid(NOBODY_UID) # Drop privileges
metadata = SafeUnpickler.loads(metastr)
os.seteuid(ORIGINAL_EUID) # Restore privileges
assert isinstance(metadata, dict)
except (pickle.UnpicklingError, EOFError, AttributeError,
IndexError, ImportError, AssertionError):
clean_metadata(path, key_count)
else:
try:
# Remove existing metadata first before writing new metadata
clean_metadata(path, key_count)
write_metadata(path, metadata)
print_msg("%s MIGRATED" % (path))
except IOError as err:
if err.errno not in (errno.ENOENT, errno.ESTALE):
raise
elif metastr.startswith("{") and metastr.endswith("}"):
# It's not pickled and is already serialized, just return
print_msg("%s SKIPPED" % (path))
else:
# Metadata is malformed
clean_metadata(path, key_count)
print_msg("%s CLEANED" % (path))
def walktree(top, pool, root=True):
"""
Recursively walk the filesystem tree and migrate metadata of each object
found. Unlike os.walk(), this method performs stat() sys call on a
file/directory at most only once.
"""
if root:
# The root of volume is account which also contains metadata
pool.apply_async(process_object, (top, ))
for f in os.listdir(top):
if root and f in (".trashcan", ".glusterfs", "async_pending", "tmp"):
continue
path = os.path.join(top, f)
try:
s = os.stat(path)
except OSError as err:
if err.errno in (errno.ENOENT, errno.ESTALE):
continue
raise
if stat.S_ISLNK(s.st_mode):
pass
elif stat.S_ISDIR(s.st_mode):
pool.apply_async(process_object, (path, ))
# Recurse into directory
walktree(path, pool, root=False)
elif stat.S_ISREG(s.st_mode):
pool.apply_async(process_object, (path, ))
if __name__ == '__main__':
global options
usage = "usage: %prog [options] volume1_mountpath volume2_mountpath..."
description = """Object metadata are stored as \
extended attributes of files and directories. This utility migrates metadata \
stored in pickled format to JSON format."""
parser = OptionParser(usage=usage, description=description)
parser.add_option("-v", "--verbose", dest="verbose",
action="store_true", default=False,
help="Print object paths as they are processed.")
(options, mount_paths) = parser.parse_args()
if len(mount_paths) < 1:
print "Mountpoint path(s) missing."
parser.print_usage()
sys.exit(-1)
pool = multiprocessing.Pool(multiprocessing.cpu_count() * 2)
for path in mount_paths:
if os.path.isdir(path):
walktree(path, pool)
pool.close()
pool.join()

View File

@ -46,5 +46,18 @@ disk_chunk_size = 65536
# This will provide a reasonable starting point for tuning this value.
network_chunk_size = 65536
# In older versions of swiftonfile, metadata stored as xattrs of files
# were serialized using PICKLE format. The PICKLE format is vulnerable to
# exploits in deployments where a user has access to backend filesystem.
# Deserializing pickled metadata can result in malicious code being
# executed if an attacker has stored malicious code as xattr from filesystem
# interface. Although, new metadata is always serialized using JSON format,
# existing metadata already stored in PICKLE format can be loaded by setting
# the following option to 'on'.
# You can turn this option to 'off' once you have migrated all your metadata
# from PICKLE format to JSON format using swiftonfile-migrate-metadata tool.
# This conf option will be deprecated and eventualy removed in future releases
# read_pickled_metadata = off
[object-updater]
user = <your-user-name>

View File

@ -42,6 +42,7 @@ setup(
install_requires=[],
scripts=[
'bin/swiftonfile-print-metadata',
'bin/swiftonfile-migrate-metadata',
],
entry_points={
'paste.app_factory': [

View File

@ -15,14 +15,18 @@
import os
import stat
import json
import errno
import random
import logging
from hashlib import md5
from eventlet import sleep
import cPickle as pickle
from cStringIO import StringIO
import pickletools
from swiftonfile.swift.common.exceptions import SwiftOnFileSystemIOError
from swift.common.exceptions import DiskFileNoSpace
from swift.common.db import utf8encodekeys
from swiftonfile.swift.common.fs_utils import do_stat, \
do_walk, do_rmdir, do_log_rl, get_filename_from_fd, do_open, \
do_getxattr, do_setxattr, do_removexattr, do_read, \
@ -47,6 +51,8 @@ DEFAULT_GID = -1
PICKLE_PROTOCOL = 2
CHUNK_SIZE = 65536
read_pickled_metadata = False
def normalize_timestamp(timestamp):
"""
@ -63,8 +69,41 @@ def normalize_timestamp(timestamp):
return "%016.05f" % (float(timestamp))
class SafeUnpickler(object):
"""
Loading a pickled stream is potentially unsafe and exploitable because
the loading process can import modules/classes (via GLOBAL opcode) and
run any callable (via REDUCE opcode). As the metadata stored in Swift
is just a dictionary, we take away these powerful "features", thus
making the loading process safe. Hence, this is very Swift specific
and is not a general purpose safe unpickler.
"""
__slots__ = 'OPCODE_BLACKLIST'
OPCODE_BLACKLIST = ('GLOBAL', 'REDUCE', 'BUILD', 'OBJ', 'NEWOBJ', 'INST',
'EXT1', 'EXT2', 'EXT4')
@classmethod
def find_class(self, module, name):
# Do not allow importing of ANY module. This is really redundant as
# we block those OPCODEs that results in invocation of this method.
raise pickle.UnpicklingError('Potentially unsafe pickle')
@classmethod
def loads(self, string):
for opcode in pickletools.genops(string):
if opcode[0].name in self.OPCODE_BLACKLIST:
raise pickle.UnpicklingError('Potentially unsafe pickle')
orig_unpickler = pickle.Unpickler(StringIO(string))
orig_unpickler.find_global = self.find_class
return orig_unpickler.load()
pickle.loads = SafeUnpickler.loads
def serialize_metadata(metadata):
return pickle.dumps(metadata, PICKLE_PROTOCOL)
return json.dumps(metadata, separators=(',', ':'))
def deserialize_metadata(metastr):
@ -72,14 +111,24 @@ def deserialize_metadata(metastr):
Returns dict populated with metadata if deserializing is successful.
Returns empty dict if deserialzing fails.
"""
if metastr.startswith('\x80\x02}') and metastr.endswith('.'):
# Assert that the metadata was indeed pickled before attempting
# to unpickle. This only *reduces* risk of malicious shell code
# being executed. However, it does NOT fix anything.
global read_pickled_metadata
if metastr.startswith('\x80\x02}') and metastr.endswith('.') and \
read_pickled_metadata:
# Assert that the serialized metadata is pickled using
# pickle protocol 2.
try:
return pickle.loads(metastr)
except (pickle.UnpicklingError, EOFError, AttributeError,
IndexError, ImportError, AssertionError):
except Exception:
logging.warning("pickle.loads() failed.", exc_info=True)
return {}
elif metastr.startswith('{') and metastr.endswith('}'):
try:
metadata = json.loads(metastr)
utf8encodekeys(metadata)
return metadata
except (UnicodeDecodeError, ValueError):
logging.warning("json.loads() failed.", exc_info=True)
return {}
else:
return {}

View File

@ -16,7 +16,8 @@
""" Object Server for Gluster for Swift """
from swift.common.swob import HTTPConflict, HTTPNotImplemented
from swift.common.utils import public, timing_stats, replication
from swift.common.utils import public, timing_stats, replication, \
config_true_value
from swift.common.request_helpers import get_name_and_placement
from swiftonfile.swift.common.exceptions import AlreadyExistsAsFile, \
AlreadyExistsAsDir
@ -26,6 +27,7 @@ from swift.obj import server
from swiftonfile.swift.obj.diskfile import DiskFileManager
from swiftonfile.swift.common.constraints import check_object_creation
from swiftonfile.swift.common import utils
class SwiftOnFileDiskFileRouter(object):
@ -57,6 +59,10 @@ class ObjectController(server.ObjectController):
"""
# Replaces Swift's DiskFileRouter object reference with ours.
self._diskfile_router = SwiftOnFileDiskFileRouter(conf, self.logger)
# This conf option will be deprecated and eventualy removed in
# future releases
utils.read_pickled_metadata = \
config_true_value(conf.get('read_pickled_metadata', 'no'))
@public
@timing_stats()

View File

@ -1267,6 +1267,9 @@ class TestFile(Base):
self.assert_status(400)
def testMetadataNumberLimit(self):
raise SkipTest("Bad test")
# TODO(ppai): Fix it in upstream swift first
# Refer to comments below
number_limit = load_constraint('max_meta_count')
size_limit = load_constraint('max_meta_overall_size')
@ -1279,10 +1282,13 @@ class TestFile(Base):
metadata = {}
while len(metadata.keys()) < i:
key = Utils.create_ascii_name()
# The following line returns a valid utf8 byte sequence
val = Utils.create_name()
if len(key) > j:
key = key[:j]
# This slicing done below can make the 'utf8' byte
# sequence invalid and hence it cannot be decoded.
val = val[:j]
size += len(key) + len(val)

View File

@ -16,6 +16,7 @@
""" Tests for common.utils """
import os
import json
import unittest
import errno
import xattr
@ -23,10 +24,12 @@ import cPickle as pickle
import tempfile
import hashlib
import shutil
import cPickle as pickle
from collections import defaultdict
from mock import patch, Mock
from swiftonfile.swift.common import utils
from swiftonfile.swift.common.utils import deserialize_metadata, serialize_metadata
from swiftonfile.swift.common.utils import deserialize_metadata, \
serialize_metadata, PICKLE_PROTOCOL
from swiftonfile.swift.common.exceptions import SwiftOnFileSystemOSError, \
SwiftOnFileSystemIOError
from swift.common.exceptions import DiskFileNoSpace
@ -138,6 +141,32 @@ def _mock_os_fsync(fd):
return
class TestSafeUnpickler(unittest.TestCase):
class Exploit(object):
def __reduce__(self):
return (os.system, ('touch /tmp/pickle-exploit',))
def test_loads(self):
valid_md = {'key1': 'val1', 'key2': 'val2'}
for protocol in (0, 1, 2):
valid_dump = pickle.dumps(valid_md, protocol)
mal_dump = pickle.dumps(self.Exploit(), protocol)
# malicious dump is appended to valid dump
payload1 = valid_dump[:-1] + mal_dump
# malicious dump is prefixed to valid dump
payload2 = mal_dump[:-1] + valid_dump
# entire dump is malicious
payload3 = mal_dump
for payload in (payload1, payload2, payload3):
try:
utils.SafeUnpickler.loads(payload)
except pickle.UnpicklingError as err:
self.assertTrue('Potentially unsafe pickle' in err)
else:
self.fail("Expecting cPickle.UnpicklingError")
class TestUtils(unittest.TestCase):
""" Tests for common.utils """
@ -321,6 +350,42 @@ class TestUtils(unittest.TestCase):
assert _xattr_op_cnt['get'] == 1, "%r" % _xattr_op_cnt
assert _xattr_op_cnt['set'] == 0, "%r" % _xattr_op_cnt
def test_deserialize_metadata_pickle(self):
orig_md = {'key1': 'value1', 'key2': 'value2'}
pickled_md = pickle.dumps(orig_md, PICKLE_PROTOCOL)
_m_pickle_loads = Mock(return_value={})
utils.read_pickled_metadata = True
with patch('swiftonfile.swift.common.utils.pickle.loads',
_m_pickle_loads):
# pickled
md = utils.deserialize_metadata(pickled_md)
self.assertTrue(_m_pickle_loads.called)
self.assertTrue(isinstance(md, dict))
_m_pickle_loads.reset_mock()
# not pickled
utils.deserialize_metadata("not_pickle")
self.assertFalse(_m_pickle_loads.called)
_m_pickle_loads.reset_mock()
# pickled but conf does not allow loading
utils.read_pickled_metadata = False
md = utils.deserialize_metadata(pickled_md)
self.assertFalse(_m_pickle_loads.called)
# malformed pickle
_m_pickle_loads.side_effect = pickle.UnpicklingError
md = utils.deserialize_metadata("malformed_pickle")
self.assertTrue(isinstance(md, dict))
def test_deserialize_metadata_json(self):
_m_json_loads = Mock(return_value={})
with patch('swiftonfile.swift.common.utils.json.loads',
_m_json_loads):
utils.deserialize_metadata("not_json")
self.assertFalse(_m_json_loads.called)
_m_json_loads.reset_mock()
utils.deserialize_metadata("{fake_valid_json}")
self.assertTrue(_m_json_loads.called)
def test_get_etag_empty(self):
tf = tempfile.NamedTemporaryFile()
hd = utils._get_etag(tf.name)