Truncate encoded instance sys meta to 255 or less

In nova/utils.py function get_system_metadata_from_image converts image metadata into instance system metadata. Since Nova's DB column is limited to 255 characters for system metadata, the image properties are truncated to 255 characters. However, the Glance image may have non-English unicode property values that are longer than 255 after being encoded to bytes. We need to truncate the encoded byte property value to 255 or fewer bytes in order to ensure the DB insert operation succeeds. The same encoding and truncation issue was fixed for the instance fault table under change id I62fa2830b22e367eb9486d09d3c8818a18ebd20d. This fix refactors that code fix into a utility method and uses it to fix both problems. Change-Id: Ie1051777dd09a2fb91afbadad3728956f83452cf Closes-Bug: #1425657 Related-Bug: #1389102
2015-03-04 20:17:08 -06:00 · 2015-03-04 20:17:08 -06:00 · 8ec6a73fef
parent c2aec045ae
commit 8ec6a73fef
4 changed files with 44 additions and 37 deletions
--- a/nova/compute/utils.py
+++ b/nova/compute/utils.py
@ -21,7 +21,6 @@ import traceback
 import netifaces
 from oslo_config import cfg
 from oslo_log import log
-from oslo_utils import encodeutils

 from nova import block_device
 from nova.compute import power_state
@ -64,20 +63,7 @@ def exception_to_dict(fault):
    # NOTE(dripton) The message field in the database is limited to 255 chars.
    # MySQL silently truncates overly long messages, but PostgreSQL throws an
    # error if we don't truncate it.
-    b_message = encodeutils.safe_encode(message)[:255]
-
-    # NOTE(chaochin) UTF-8 character byte size varies from 1 to 6. If
-    # truncating a long byte string to 255, the last character may be
-    # cut in the middle, so that UnicodeDecodeError will occur when
-    # converting it back to unicode.
-    decode_ok = False
-    while not decode_ok:
-        try:
-            u_message = encodeutils.safe_decode(b_message)
-            decode_ok = True
-        except UnicodeDecodeError:
-            b_message = b_message[:-1]
-
+    u_message = utils.safe_truncate(message, 255)
    fault_dict = dict(exception=fault)
    fault_dict["message"] = u_message
    fault_dict["code"] = code
--- a/nova/tests/unit/compute/test_compute_utils.py
+++ b/nova/tests/unit/compute/test_compute_utils.py
@ -23,7 +23,6 @@ import uuid
 import mock
 from oslo_config import cfg
 from oslo_serialization import jsonutils
-from oslo_utils import encodeutils
 from oslo_utils import importutils
 import six
 import testtools
@ -835,26 +834,6 @@ class ComputeUtilsGetRebootTypes(test.NoDBTestCase):


 class ComputeUtilsTestCase(test.NoDBTestCase):
-    def test_exception_to_dict_with_long_message_3_bytes(self):
-        # Generate Chinese byte string whose length is 300. This Chinese UTF-8
-        # character occupies 3 bytes. After truncating, the byte string length
-        # should be 255.
-        msg = encodeutils.safe_decode('\xe8\xb5\xb5' * 100)
-        exc = exception.NovaException(message=msg)
-        fault_dict = compute_utils.exception_to_dict(exc)
-        byte_message = encodeutils.safe_encode(fault_dict["message"])
-        self.assertEqual(255, len(byte_message))
-
-    def test_exception_to_dict_with_long_message_2_bytes(self):
-        # Generate Russian byte string whose length is 300. This Russian UTF-8
-        # character occupies 2 bytes. After truncating, the byte string length
-        # should be 254.
-        msg = encodeutils.safe_decode('\xd0\x92' * 150)
-        exc = exception.NovaException(message=msg)
-        fault_dict = compute_utils.exception_to_dict(exc)
-        byte_message = encodeutils.safe_encode(fault_dict["message"])
-        self.assertEqual(254, len(byte_message))
-
    @mock.patch('netifaces.interfaces')
    def test_get_machine_ips_value_error(self, mock_interfaces):
        # Tests that the utility method does not explode if netifaces raises
--- a/nova/tests/unit/test_utils.py
+++ b/nova/tests/unit/test_utils.py
@ -28,6 +28,7 @@ from mox3 import mox
 import netaddr
 from oslo_concurrency import processutils
 from oslo_config import cfg
+from oslo_utils import encodeutils
 from oslo_utils import timeutils

 import nova
@ -992,3 +993,23 @@ class ResourceFilterTestCase(test.NoDBTestCase):
        # Make sure bug #1365887 is fixed
        i1['metadata']['key3'] = 'a'
        self._assert_filtering(rl, {'value': 'banana'}, [])
+
+
+class SafeTruncateTestCase(test.NoDBTestCase):
+    def test_exception_to_dict_with_long_message_3_bytes(self):
+        # Generate Chinese byte string whose length is 300. This Chinese UTF-8
+        # character occupies 3 bytes. After truncating, the byte string length
+        # should be 255.
+        msg = encodeutils.safe_decode('\xe8\xb5\xb5' * 100)
+        truncated_msg = utils.safe_truncate(msg, 255)
+        byte_message = encodeutils.safe_encode(truncated_msg)
+        self.assertEqual(255, len(byte_message))
+
+    def test_exception_to_dict_with_long_message_2_bytes(self):
+        # Generate Russian byte string whose length is 300. This Russian UTF-8
+        # character occupies 2 bytes. After truncating, the byte string length
+        # should be 254.
+        msg = encodeutils.safe_decode('\xd0\x92' * 150)
+        truncated_msg = utils.safe_truncate(msg, 255)
+        byte_message = encodeutils.safe_encode(truncated_msg)
+        self.assertEqual(254, len(byte_message))
--- a/nova/utils.py
+++ b/nova/utils.py
@ -41,6 +41,7 @@ from oslo_concurrency import processutils
 from oslo_config import cfg
 from oslo_log import log as logging
 import oslo_messaging as messaging
+from oslo_utils import encodeutils
 from oslo_utils import excutils
 from oslo_utils import importutils
 from oslo_utils import timeutils
@ -1026,7 +1027,7 @@ def get_system_metadata_from_image(image_meta, flavor=None):
    prefix_format = SM_IMAGE_PROP_PREFIX + '%s'

    for key, value in image_meta.get('properties', {}).iteritems():
-        new_value = unicode(value)[:255]
+        new_value = safe_truncate(unicode(value), 255)
        system_meta[prefix_format % key] = new_value

    for key in SM_INHERITABLE_KEYS:
@ -1193,3 +1194,23 @@ def filter_and_format_resource_metadata(resource_type, resource_list,
                             '%s_id' % resource_type: _get_id(res)})

    return formatted_metadata_list
+
+
+def safe_truncate(value, length):
+    """Safely truncates unicode strings such that their encoded length is
+    no greater than the length provided.
+    """
+    b_value = encodeutils.safe_encode(value)[:length]
+
+    # NOTE(chaochin) UTF-8 character byte size varies from 1 to 6. If
+    # truncating a long byte string to 255, the last character may be
+    # cut in the middle, so that UnicodeDecodeError will occur when
+    # converting it back to unicode.
+    decode_ok = False
+    while not decode_ok:
+        try:
+            u_value = encodeutils.safe_decode(b_value)
+            decode_ok = True
+        except UnicodeDecodeError:
+            b_value = b_value[:-1]
+    return u_value