Merge "Use "multihash" for data download validation"

This commit is contained in:
Zuul 2018-09-18 07:58:36 +00:00 committed by Gerrit Code Review
commit a4ea9f0720
8 changed files with 395 additions and 25 deletions

View File

@ -449,6 +449,26 @@ def integrity_iter(iter, checksum):
(md5sum, checksum))
def serious_integrity_iter(iter, hasher, hash_value):
"""Check image data integrity using the Glance "multihash".
:param iter: iterable containing the image data
:param hasher: a hashlib object
:param hash_value: hexdigest of the image data
:raises: IOError if the hashdigest of the data is not hash_value
"""
for chunk in iter:
yield chunk
if isinstance(chunk, six.string_types):
chunk = six.b(chunk)
hasher.update(chunk)
computed = hasher.hexdigest()
if computed != hash_value:
raise IOError(errno.EPIPE,
'Corrupt image download. Hash was %s expected %s' %
(computed, hash_value))
def memoized_property(fn):
attr_name = '_lazy_once_' + fn.__name__

View File

@ -965,7 +965,9 @@ class ShellTestRequests(testutils.TestCase):
self.requests = self.useFixture(rm_fixture.Fixture())
self.requests.get('http://example.com/v2/images/%s/file' % id,
headers=headers, raw=fake)
self.requests.get('http://example.com/v2/images/%s' % id,
headers={'Content-type': 'application/json'},
json=image_show_fixture)
shell = openstack_shell.OpenStackImagesShell()
argstr = ('--os-image-api-version 2 --os-auth-token faketoken '
'--os-image-url http://example.com '

View File

@ -14,6 +14,9 @@
# License for the specific language governing permissions and limitations
# under the License.
import hashlib
UUID = "3fc2ba62-9a02-433e-b565-d493ffc69034"
image_list_fixture = {
@ -65,7 +68,9 @@ image_show_fixture = {
"tags": [],
"updated_at": "2015-07-24T12:18:13Z",
"virtual_size": "null",
"visibility": "shared"
"visibility": "shared",
"os_hash_algo": "sha384",
"os_hash_value": hashlib.sha384(b'DATA').hexdigest()
}
image_create_fixture = {

View File

@ -14,6 +14,7 @@
# under the License.
import errno
import hashlib
import mock
import testtools
@ -193,7 +194,13 @@ data_fixtures = {
'A',
),
},
'/v2/images/66fb18d6-db27-11e1-a1eb-080027cbe205/file': {
'/v2/images/5cc4bebc-db27-11e1-a1eb-080027cbe205': {
'GET': (
{},
{},
),
},
'/v2/images/headeronly-db27-11e1-a1eb-080027cbe205/file': {
'GET': (
{
'content-md5': 'wrong'
@ -201,7 +208,83 @@ data_fixtures = {
'BB',
),
},
'/v2/images/1b1c6366-dd57-11e1-af0f-02163e68b1d8/file': {
'/v2/images/headeronly-db27-11e1-a1eb-080027cbe205': {
'GET': (
{},
{},
),
},
'/v2/images/chkonly-db27-11e1-a1eb-080027cbe205/file': {
'GET': (
{
'content-md5': 'wrong'
},
'BB',
),
},
'/v2/images/chkonly-db27-11e1-a1eb-080027cbe205': {
'GET': (
{},
{
'checksum': 'wrong',
},
),
},
'/v2/images/multihash-db27-11e1-a1eb-080027cbe205/file': {
'GET': (
{
'content-md5': 'wrong'
},
'BB',
),
},
'/v2/images/multihash-db27-11e1-a1eb-080027cbe205': {
'GET': (
{},
{
'checksum': 'wrong',
'os_hash_algo': 'md5',
'os_hash_value': 'junk'
},
),
},
'/v2/images/badalgo-db27-11e1-a1eb-080027cbe205/file': {
'GET': (
{
'content-md5': hashlib.md5(b'BB').hexdigest()
},
'BB',
),
},
'/v2/images/badalgo-db27-11e1-a1eb-080027cbe205': {
'GET': (
{},
{
'checksum': hashlib.md5(b'BB').hexdigest(),
'os_hash_algo': 'not_an_algo',
'os_hash_value': 'whatever'
},
),
},
'/v2/images/bad-multihash-value-good-checksum/file': {
'GET': (
{
'content-md5': hashlib.md5(b'GOODCHECKSUM').hexdigest()
},
'GOODCHECKSUM',
),
},
'/v2/images/bad-multihash-value-good-checksum': {
'GET': (
{},
{
'checksum': hashlib.md5(b'GOODCHECKSUM').hexdigest(),
'os_hash_algo': 'sha512',
'os_hash_value': 'badmultihashvalue'
},
),
},
'/v2/images/headeronly-dd57-11e1-af0f-02163e68b1d8/file': {
'GET': (
{
'content-md5': 'defb99e69a9f1f6e06f15006b1f166ae'
@ -209,6 +292,46 @@ data_fixtures = {
'CCC',
),
},
'/v2/images/headeronly-dd57-11e1-af0f-02163e68b1d8': {
'GET': (
{},
{},
),
},
'/v2/images/chkonly-dd57-11e1-af0f-02163e68b1d8/file': {
'GET': (
{
'content-md5': 'defb99e69a9f1f6e06f15006b1f166ae'
},
'CCC',
),
},
'/v2/images/chkonly-dd57-11e1-af0f-02163e68b1d8': {
'GET': (
{},
{
'checksum': 'defb99e69a9f1f6e06f15006b1f166ae',
},
),
},
'/v2/images/multihash-dd57-11e1-af0f-02163e68b1d8/file': {
'GET': (
{
'content-md5': 'defb99e69a9f1f6e06f15006b1f166ae'
},
'CCC',
),
},
'/v2/images/multihash-dd57-11e1-af0f-02163e68b1d8': {
'GET': (
{},
{
'checksum': 'defb99e69a9f1f6e06f15006b1f166ae',
'os_hash_algo': 'sha384',
'os_hash_value': hashlib.sha384(b'CCC').hexdigest()
},
),
},
'/v2/images/87b634c1-f893-33c9-28a9-e5673c99239a/actions/reactivate': {
'POST': ({}, None)
},
@ -846,12 +969,11 @@ class TestController(testtools.TestCase):
self.assertEqual('A', body)
def test_data_with_wrong_checksum(self):
body = self.controller.data('66fb18d6-db27-11e1-a1eb-080027cbe205',
body = self.controller.data('headeronly-db27-11e1-a1eb-080027cbe205',
do_checksum=False)
body = ''.join([b for b in body])
self.assertEqual('BB', body)
body = self.controller.data('66fb18d6-db27-11e1-a1eb-080027cbe205')
body = self.controller.data('headeronly-db27-11e1-a1eb-080027cbe205')
try:
body = ''.join([b for b in body])
self.fail('data did not raise an error.')
@ -860,15 +982,116 @@ class TestController(testtools.TestCase):
msg = 'was 9d3d9048db16a7eee539e93e3618cbe7 expected wrong'
self.assertIn(msg, str(e))
def test_data_with_checksum(self):
body = self.controller.data('1b1c6366-dd57-11e1-af0f-02163e68b1d8',
body = self.controller.data('chkonly-db27-11e1-a1eb-080027cbe205',
do_checksum=False)
body = ''.join([b for b in body])
self.assertEqual('CCC', body)
self.assertEqual('BB', body)
body = self.controller.data('chkonly-db27-11e1-a1eb-080027cbe205')
try:
body = ''.join([b for b in body])
self.fail('data did not raise an error.')
except IOError as e:
self.assertEqual(errno.EPIPE, e.errno)
msg = 'was 9d3d9048db16a7eee539e93e3618cbe7 expected wrong'
self.assertIn(msg, str(e))
body = self.controller.data('1b1c6366-dd57-11e1-af0f-02163e68b1d8')
body = self.controller.data('multihash-db27-11e1-a1eb-080027cbe205',
do_checksum=False)
body = ''.join([b for b in body])
self.assertEqual('CCC', body)
self.assertEqual('BB', body)
body = self.controller.data('multihash-db27-11e1-a1eb-080027cbe205')
try:
body = ''.join([b for b in body])
self.fail('data did not raise an error.')
except IOError as e:
self.assertEqual(errno.EPIPE, e.errno)
msg = 'was 9d3d9048db16a7eee539e93e3618cbe7 expected junk'
self.assertIn(msg, str(e))
body = self.controller.data('badalgo-db27-11e1-a1eb-080027cbe205',
do_checksum=False)
body = ''.join([b for b in body])
self.assertEqual('BB', body)
try:
body = self.controller.data('badalgo-db27-11e1-a1eb-080027cbe205')
self.fail('bad os_hash_algo did not raise an error.')
except ValueError as e:
msg = 'unsupported hash type not_an_algo'
self.assertIn(msg, str(e))
def test_data_with_checksum(self):
for prefix in ['headeronly', 'chkonly', 'multihash']:
body = self.controller.data(prefix +
'-dd57-11e1-af0f-02163e68b1d8',
do_checksum=False)
body = ''.join([b for b in body])
self.assertEqual('CCC', body)
body = self.controller.data(prefix +
'-dd57-11e1-af0f-02163e68b1d8')
body = ''.join([b for b in body])
self.assertEqual('CCC', body)
def test_data_with_checksum_and_fallback(self):
# make sure the allow_md5_fallback option does not cause any
# incorrect behavior when fallback is not needed
for prefix in ['headeronly', 'chkonly', 'multihash']:
body = self.controller.data(prefix +
'-dd57-11e1-af0f-02163e68b1d8',
do_checksum=False,
allow_md5_fallback=True)
body = ''.join([b for b in body])
self.assertEqual('CCC', body)
body = self.controller.data(prefix +
'-dd57-11e1-af0f-02163e68b1d8',
allow_md5_fallback=True)
body = ''.join([b for b in body])
self.assertEqual('CCC', body)
def test_data_with_bad_hash_algo_and_fallback(self):
# shouldn't matter when do_checksum is False
body = self.controller.data('badalgo-db27-11e1-a1eb-080027cbe205',
do_checksum=False,
allow_md5_fallback=True)
body = ''.join([b for b in body])
self.assertEqual('BB', body)
# default value for do_checksum is True
body = self.controller.data('badalgo-db27-11e1-a1eb-080027cbe205',
allow_md5_fallback=True)
body = ''.join([b for b in body])
self.assertEqual('BB', body)
def test_neg_data_with_bad_hash_value_and_fallback_enabled(self):
# make sure download fails when good hash_algo but bad hash_value
# even when correct checksum is present regardless of
# allow_md5_fallback setting
body = self.controller.data('bad-multihash-value-good-checksum',
allow_md5_fallback=False)
try:
body = ''.join([b for b in body])
self.fail('bad os_hash_value did not raise an error.')
except IOError as e:
self.assertEqual(errno.EPIPE, e.errno)
msg = 'expected badmultihashvalue'
self.assertIn(msg, str(e))
body = self.controller.data('bad-multihash-value-good-checksum',
allow_md5_fallback=True)
try:
body = ''.join([b for b in body])
self.fail('bad os_hash_value did not raise an error.')
except IOError as e:
self.assertEqual(errno.EPIPE, e.errno)
msg = 'expected badmultihashvalue'
self.assertIn(msg, str(e))
# download should succeed when do_checksum is off, though
body = self.controller.data('bad-multihash-value-good-checksum',
do_checksum=False)
body = ''.join([b for b in body])
self.assertEqual('GOODCHECKSUM', body)
def test_image_import(self):
uri = 'http://example.com/image.qcow'
@ -883,7 +1106,7 @@ class TestController(testtools.TestCase):
def test_download_no_data(self):
resp = utils.FakeResponse(headers={}, status_code=204)
self.controller.controller.http_client.get = mock.Mock(
return_value=(resp, None))
return_value=(resp, {}))
self.controller.data('image_id')
def test_download_forbidden(self):

View File

@ -1729,7 +1729,8 @@ class ShellV2Test(testtools.TestCase):
def test_image_download(self):
args = self._make_args(
{'id': 'IMG-01', 'file': 'test', 'progress': True})
{'id': 'IMG-01', 'file': 'test', 'progress': True,
'allow_md5_fallback': False})
with mock.patch.object(self.gc.images, 'data') as mocked_data, \
mock.patch.object(utils, '_extract_request_id'):
@ -1737,14 +1738,27 @@ class ShellV2Test(testtools.TestCase):
[c for c in 'abcdef'])
test_shell.do_image_download(self.gc, args)
mocked_data.assert_called_once_with('IMG-01')
mocked_data.assert_called_once_with('IMG-01',
allow_md5_fallback=False)
# check that non-default value is being passed correctly
args.allow_md5_fallback = True
with mock.patch.object(self.gc.images, 'data') as mocked_data, \
mock.patch.object(utils, '_extract_request_id'):
mocked_data.return_value = utils.RequestIdProxy(
[c for c in 'abcdef'])
test_shell.do_image_download(self.gc, args)
mocked_data.assert_called_once_with('IMG-01',
allow_md5_fallback=True)
@mock.patch.object(utils, 'exit')
@mock.patch('sys.stdout', autospec=True)
def test_image_download_no_file_arg(self, mocked_stdout,
mocked_utils_exit):
# Indicate that no file name was given as command line argument
args = self._make_args({'id': '1234', 'file': None, 'progress': False})
args = self._make_args({'id': '1234', 'file': None, 'progress': False,
'allow_md5_fallback': False})
# Indicate that no file is specified for output redirection
mocked_stdout.isatty = lambda: True
test_shell.do_image_download(self.gc, args)
@ -1835,7 +1849,8 @@ class ShellV2Test(testtools.TestCase):
def test_do_image_download_with_forbidden_id(self, mocked_print_err,
mocked_stdout):
args = self._make_args({'id': 'IMG-01', 'file': None,
'progress': False})
'progress': False,
'allow_md5_fallback': False})
mocked_stdout.isatty = lambda: False
with mock.patch.object(self.gc.images, 'data') as mocked_data:
mocked_data.side_effect = exc.HTTPForbidden
@ -1852,7 +1867,8 @@ class ShellV2Test(testtools.TestCase):
@mock.patch.object(utils, 'print_err')
def test_do_image_download_with_500(self, mocked_print_err, mocked_stdout):
args = self._make_args({'id': 'IMG-01', 'file': None,
'progress': False})
'progress': False,
'allow_md5_fallback': False})
mocked_stdout.isatty = lambda: False
with mock.patch.object(self.gc.images, 'data') as mocked_data:
mocked_data.side_effect = exc.HTTPInternalServerError

View File

@ -13,6 +13,7 @@
# License for the specific language governing permissions and limitations
# under the License.
import hashlib
import json
from oslo_utils import encodeutils
from requests import codes
@ -197,13 +198,39 @@ class Controller(object):
return self._get(image_id)
@utils.add_req_id_to_object()
def data(self, image_id, do_checksum=True):
def data(self, image_id, do_checksum=True, allow_md5_fallback=False):
"""Retrieve data of an image.
:param image_id: ID of the image to download.
:param do_checksum: Enable/disable checksum validation.
:returns: An iterable body or None
When do_checksum is enabled, validation proceeds as follows:
1. if the image has a 'os_hash_value' property, the algorithm
specified in the image's 'os_hash_algo' property will be used
to validate against the 'os_hash_value' value. If the
specified hash algorithm is not available AND allow_md5_fallback
is True, then continue to step #2
2. else if the image has a checksum property, MD5 is used to
validate against the 'checksum' value
3. else if the download response has a 'content-md5' header, MD5
is used to validate against the header value
4. if none of 1-3 obtain, the data is **not validated** (this is
compatible with legacy behavior)
:param image_id: ID of the image to download
:param do_checksum: Enable/disable checksum validation
:param allow_md5_fallback:
Use the MD5 checksum for validation if the algorithm specified by
the image's 'os_hash_algo' property is not available
:returns: An iterable body or ``None``
"""
if do_checksum:
# doing this first to prevent race condition if image record
# is deleted during the image download
url = '/v2/images/%s' % image_id
resp, image_meta = self.http_client.get(url)
meta_checksum = image_meta.get('checksum', None)
meta_hash_value = image_meta.get('os_hash_value', None)
meta_hash_algo = image_meta.get('os_hash_algo', None)
url = '/v2/images/%s/file' % image_id
resp, body = self.http_client.get(url)
if resp.status_code == codes.no_content:
@ -212,8 +239,32 @@ class Controller(object):
checksum = resp.headers.get('content-md5', None)
content_length = int(resp.headers.get('content-length', 0))
if do_checksum and checksum is not None:
body = utils.integrity_iter(body, checksum)
check_md5sum = do_checksum
if do_checksum and meta_hash_value is not None:
try:
hasher = hashlib.new(str(meta_hash_algo))
body = utils.serious_integrity_iter(body,
hasher,
meta_hash_value)
check_md5sum = False
except ValueError as ve:
if (str(ve).startswith('unsupported hash type') and
allow_md5_fallback):
check_md5sum = True
else:
raise
if do_checksum and check_md5sum:
if meta_checksum is not None:
body = utils.integrity_iter(body, meta_checksum)
elif checksum is not None:
body = utils.integrity_iter(body, checksum)
else:
# NOTE(rosmaita): this preserves legacy behavior to return the
# image data when checksumming is requested but there's no
# 'content-md5' header in the response. Just want to make it
# clear that we're doing this on purpose.
pass
return utils.IterableWithLength(body, content_length), resp

View File

@ -490,6 +490,17 @@ def do_stores_info(gc, args):
utils.print_dict(stores_info)
@utils.arg('--allow-md5-fallback', action='store_true',
default=utils.env('OS_IMAGE_ALLOW_MD5_FALLBACK', default=False),
help=_('If os_hash_algo and os_hash_value properties are available '
'on the image, they will be used to validate the downloaded '
'image data. If the indicated secure hash algorithm is not '
'available on the client, the download will fail. Use this '
'flag to indicate that in such a case the legacy MD5 image '
'checksum should be used to validate the downloaded data. '
'You can also set the enviroment variable '
'OS_IMAGE_ALLOW_MD5_FALLBACK to any value to activate this '
'option.'))
@utils.arg('--file', metavar='<FILE>',
help=_('Local file to save downloaded image data to. '
'If this is not specified and there is no redirection '
@ -506,7 +517,8 @@ def do_image_download(gc, args):
utils.exit(msg)
try:
body = gc.images.data(args.id)
body = gc.images.data(args.id,
allow_md5_fallback=args.allow_md5_fallback)
except (exc.HTTPForbidden, exc.HTTPException) as e:
msg = "Unable to download image '%s'. (%s)" % (args.id, e)
utils.exit(msg)

View File

@ -0,0 +1,41 @@
---
features:
- |
This release adds verification of image data downloads using the Glance
"multihash" feature introduced in the OpenStack Rocky release. When
the ``os_hash_value`` is populated on an image, the glanceclient will
verify this value by computing the hexdigest of the downloaded data
using the algorithm specified by the image's ``os_hash_algo`` property.
Because the secure hash algorithm specified is determined by the cloud
provider, it is possible that the ``os_hash_algo`` may identify an
algorithm not available in the version of the Python ``hashlib`` library
used by the client. In such a case the download will fail due to an
unsupported hash type. In the event this occurs, a new option,
``--allow-md5-fallback``, is introduced to the ``image-download`` command.
When present, this option will allow the glanceclient to use the legacy
MD5 checksum to verify the downloaded data if the secure hash algorithm
specified by the ``os_hash_algo`` image property is not supported.
Note that the fallback is *not* used in the case where the algorithm is
supported but the hexdigest of the downloaded data does not match the
``os_hash_value``. In that case the download fails regardless of whether
the option is present or not.
Whether using the ``--allow-md5-fallback`` option is a good idea depends
upon the user's expectations for the verification. MD5 is an insecure
hashing algorithm, so if you are interested in making sure that the
downloaded image data has not been replaced by a datastream carefully
crafted to have the same MD5 checksum, then you should not use the
fallback. If, however, you are using Glance in a trusted environment
and your interest is simply to verify that no bits have flipped during
the data transfer, the MD5 fallback is sufficient for that purpose. That
being said, it is our recommendation that the multihash should be used
whenever possible.
security:
- |
This release of the glanceclient uses the Glance "multihash" feature,
introduced in Rocky, to use a secure hashing algorithm to verify the
integrity of downloaded data. Legacy images without the "multihash"
image properties (``os_hash_algo`` and ``os_hash_value``) are verified
using the MD5 ``checksum`` image property.