Ensure and verify utf-8 support

Added proper handling of unicode messages by expiclit
usage of unicode and utf-8 encoding for Python2.
For Python3 it was just sufficient to ensure that message
is properly UTF-8 encoded. Results of that change can be examined
inside Kibana (or using link [2]).

To prevent any regression, a set of unicode messages has been
added. It contains:

* 'Unicode is evil...' sentence translated to several languages
* border conditions of UnicodeStressTest [1]
* some funnier examples just because programming is fun

Extra:
* unified setting up API test case

[1]: http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
[2]: https://pasteboard.co/x9gQqicR.png

Story: 2001084
Task: 4734

(cherry-picked from commit 21493ac9cf)
Depends-On: I5cc2ab2fd28ce8f82a9983ffb5ebb03a834d64ff
Change-Id: I36e3c2f1ea5788ecb19089089d84924b7d6666bf
This commit is contained in:
Tomasz Trębski 2017-06-21 13:56:26 +02:00
parent 0d04377055
commit 20ed6513ee
11 changed files with 308 additions and 71 deletions

View File

@ -13,9 +13,9 @@
# License for the specific language governing permissions and limitations
# under the License.
import falcon
import time
import falcon
from monasca_common.kafka import producer
from monasca_common.rest import utils as rest_utils
from oslo_config import cfg
@ -143,8 +143,7 @@ class LogPublisher(object):
sent_counter = len(send_messages)
except Exception as ex:
LOG.error('Failure in publishing messages to kafka')
LOG.exception(ex)
LOG.exception('Failure in publishing messages to kafka')
raise ex
finally:
self._after_publish(sent_counter, num_of_msgs)
@ -167,7 +166,22 @@ class LogPublisher(object):
"""
if not self._is_message_valid(message):
raise InvalidMessageException()
return self._truncate(message)
truncated = self._truncate(message)
proper = self._ensure_type_bytes(truncated)
return proper
def _ensure_type_bytes(self, message):
"""Ensures that message will have proper type.
Kafka client expects that messages being
posted have certain data type (:py:func:`six.binary_type`).
This method ensures by the means of encoding that such type
will always be a case regardless if codebase runs under
:py:data:`six.PY2` or :py:data:`six.PY3`
"""
message = message.encode('utf-8')
return message
def _truncate(self, envelope):
"""Truncates the message if needed.
@ -183,8 +197,8 @@ class LogPublisher(object):
:rtype: str
"""
msg_str = rest_utils.as_json(envelope)
envelope_size = ((len(bytearray(msg_str, 'utf-8')) +
msg_str = model.serialize_envelope(envelope)
envelope_size = ((len(bytearray(msg_str, 'utf-8', 'replace')) +
_TIMESTAMP_KEY_SIZE +
_KAFKA_META_DATA_SIZE)
if msg_str is not None else -1)
@ -231,7 +245,7 @@ class LogPublisher(object):
LOG.debug('Sent %d messages to topic %s', num_of_msg, topic)
except Exception as ex:
raise falcon.HTTPServiceUnavailable('Service unavailable',
ex.message, 60)
str(ex), 60)
@staticmethod
def _is_message_valid(message):

View File

@ -13,6 +13,27 @@
# under the License.
from oslo_utils import timeutils
import six
from monasca_common.rest import utils as rest_utils
def serialize_envelope(envelope):
"""Returns json representation of an envelope.
:return: json object of envelope
:rtype: six.text_type
"""
json = rest_utils.as_json(envelope, ensure_ascii=False)
if six.PY2:
raw = unicode(json.replace(r'\\', r'\\\\'), encoding='utf-8',
errors='replace')
else:
raw = json
return raw
class LogEnvelopeException(Exception):

View File

@ -1,5 +1,6 @@
# coding=utf-8
# Copyright 2015 kornicameister@gmail.com
# Copyright 2015 FUJITSU LIMITED
# Copyright 2015-2017 FUJITSU LIMITED
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
@ -12,9 +13,18 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import codecs
import random
import string
import falcon
from falcon import testing
import mock
from oslo_config import fixture as oo_cfg
from oslo_context import fixture as oo_ctx
from oslotest import base as os_test
import six
from monasca_log_api.api.core import request
@ -43,3 +53,89 @@ class MockedAPI(falcon.API):
middleware=None,
router=None
)
def generate_unique_message(size):
letters = string.ascii_letters
def rand(amount, space=True):
space = ' ' if space else ''
return ''.join((random.choice(letters + space) for _ in range(amount)))
return rand(size)
def _hex_to_unicode(hex_raw):
hex_raw = six.b(hex_raw.replace(' ', ''))
hex_str_raw = codecs.getdecoder('hex')(hex_raw)[0]
hex_str = hex_str_raw.decode('utf-8', 'replace')
return hex_str
# NOTE(trebskit) => http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
UNICODE_MESSAGES = [
# Unicode is evil...
{'case': 'arabic', 'input': 'يونيكود هو الشر'},
{'case': 'polish', 'input': 'Unicode to zło'},
{'case': 'greek', 'input': 'Unicode είναι κακό'},
{'case': 'portuguese', 'input': 'Unicode é malvado'},
{'case': 'lao', 'input': 'unicode ເປັນຄວາມຊົ່ວຮ້າຍ'},
{'case': 'german', 'input': 'Unicode ist böse'},
{'case': 'japanese', 'input': 'ユニコードは悪です'},
{'case': 'russian', 'input': 'Unicode - зло'},
{'case': 'urdu', 'input': 'یونیسیڈ برائی ہے'},
{'case': 'weird', 'input': '🆄🅽🅸🅲🅾🅳🅴 🅸🆂 🅴🆅🅸🅻...'}, # funky, huh ?
# conditions from link above
# 2.3 Other boundary conditions
{'case': 'stress_2_3_1', 'input': _hex_to_unicode('ed 9f bf')},
{'case': 'stress_2_3_2', 'input': _hex_to_unicode('ee 80 80')},
{'case': 'stress_2_3_3', 'input': _hex_to_unicode('ef bf bd')},
{'case': 'stress_2_3_4', 'input': _hex_to_unicode('f4 8f bf bf')},
{'case': 'stress_2_3_5', 'input': _hex_to_unicode('f4 90 80 80')},
# 3.5 Impossible byes
{'case': 'stress_3_5_1', 'input': _hex_to_unicode('fe')},
{'case': 'stress_3_5_2', 'input': _hex_to_unicode('ff')},
{'case': 'stress_3_5_3', 'input': _hex_to_unicode('fe fe ff ff')},
# 4.1 Examples of an overlong ASCII character
{'case': 'stress_4_1_1', 'input': _hex_to_unicode('c0 af')},
{'case': 'stress_4_1_2', 'input': _hex_to_unicode('e0 80 af')},
{'case': 'stress_4_1_3', 'input': _hex_to_unicode('f0 80 80 af')},
{'case': 'stress_4_1_4', 'input': _hex_to_unicode('f8 80 80 80 af')},
{'case': 'stress_4_1_5', 'input': _hex_to_unicode('fc 80 80 80 80 af')},
# 4.2 Maximum overlong sequences
{'case': 'stress_4_2_1', 'input': _hex_to_unicode('c1 bf')},
{'case': 'stress_4_2_2', 'input': _hex_to_unicode('e0 9f bf')},
{'case': 'stress_4_2_3', 'input': _hex_to_unicode('f0 8f bf bf')},
{'case': 'stress_4_2_4', 'input': _hex_to_unicode('f8 87 bf bf bf')},
{'case': 'stress_4_2_5', 'input': _hex_to_unicode('fc 83 bf bf bf bf')},
# 4.3 Overlong representation of the NUL character
{'case': 'stress_4_3_1', 'input': _hex_to_unicode('c0 80')},
{'case': 'stress_4_3_2', 'input': _hex_to_unicode('e0 80 80')},
{'case': 'stress_4_3_3', 'input': _hex_to_unicode('f0 80 80 80')},
{'case': 'stress_4_3_4', 'input': _hex_to_unicode('f8 80 80 80 80')},
{'case': 'stress_4_3_5', 'input': _hex_to_unicode('fc 80 80 80 80 80')},
# and some cheesy example from polish novel 'Pan Tadeusz'
{'case': 'mr_t', 'input': 'Hajże na Soplicę!'},
# it won't be complete without that one
{'case': 'mr_b', 'input': 'Grzegorz Brzęczyszczykiewicz, '
'Chrząszczyżewoszyce, powiat Łękołody'},
# great success, christmas time
{'case': 'olaf', 'input': ''}
]
class DisableStatsdMixin(object):
def setUp(self):
super(DisableStatsdMixin, self).setUp()
self.statsd_patch = mock.patch('monascastatsd.Connection')
self.statsd_check = self.statsd_patch.start()
class BaseTestCase(DisableStatsdMixin, os_test.BaseTestCase):
pass
class BaseApiTestCase(BaseTestCase, testing.TestBase):
api_class = MockedAPI
def before(self):
self.conf = mock_config(self)

View File

@ -13,7 +13,6 @@
# under the License.
import falcon
from falcon import testing
import mock
import simplejson as json
@ -24,9 +23,9 @@ from monasca_log_api.tests import base
ENDPOINT = '/healthcheck'
class TestHealthChecks(testing.TestBase):
class TestApiHealthChecks(base.BaseApiTestCase):
def before(self):
self.conf = base.mock_config(self)
super(TestApiHealthChecks, self).before()
self.resource = healthchecks.HealthChecks()
self.api.add_route(
ENDPOINT,

View File

@ -16,31 +16,22 @@
import copy
import datetime
import random
import string
import ujson
import unittest
import mock
from oslotest import base as os_test
from oslo_log import log
import six
from monasca_log_api.reference.common import log_publisher
from monasca_log_api.reference.common import model
from monasca_log_api.tests import base
LOG = log.getLogger(__name__)
EPOCH_START = datetime.datetime(1970, 1, 1)
def _generate_unique_message(size):
letters = string.ascii_lowercase
def rand(amount, space=True):
space = ' ' if space else ''
return ''.join((random.choice(letters + space) for _ in range(amount)))
return rand(size)
class TestSendMessage(os_test.BaseTestCase):
class TestSendMessage(base.BaseTestCase):
def setUp(self):
self.conf = base.mock_config(self)
@ -127,7 +118,7 @@ class TestSendMessage(os_test.BaseTestCase):
msg = model.Envelope(
log={
'message': 1,
'message': '1',
'application_type': application_type,
'dimensions': {
dimension_1_name: dimension_1_value,
@ -135,7 +126,7 @@ class TestSendMessage(os_test.BaseTestCase):
}
},
meta={
'tenantId': 1
'tenantId': '1'
}
)
msg['creation_time'] = creation_time
@ -143,7 +134,7 @@ class TestSendMessage(os_test.BaseTestCase):
instance._kafka_publisher.publish.assert_called_once_with(
self.conf.conf.log_publisher.topics[0],
[ujson.dumps(msg)])
[ujson.dumps(msg, ensure_ascii=False).encode('utf-8')])
@mock.patch('monasca_log_api.reference.common.log_publisher.producer'
'.KafkaProducer')
@ -166,7 +157,7 @@ class TestSendMessage(os_test.BaseTestCase):
application_type = 'monasca-log-api'
msg = model.Envelope(
log={
'message': 1,
'message': '1',
'application_type': application_type,
'dimensions': {
dimension_1_name: dimension_1_value,
@ -174,11 +165,11 @@ class TestSendMessage(os_test.BaseTestCase):
}
},
meta={
'tenantId': 1
'tenantId': '1'
}
)
msg['creation_time'] = creation_time
json_msg = ujson.dumps(msg)
json_msg = ujson.dumps(msg, ensure_ascii=False)
instance.send_message(msg)
@ -187,13 +178,50 @@ class TestSendMessage(os_test.BaseTestCase):
for topic in topics:
instance._kafka_publisher.publish.assert_any_call(
topic,
[json_msg])
[json_msg.encode('utf-8')])
@mock.patch('monasca_log_api.reference.common.log_publisher.producer'
'.KafkaProducer')
def test_should_send_unicode_message(self, kp):
instance = log_publisher.LogPublisher()
instance._kafka_publisher = kp
for um in base.UNICODE_MESSAGES:
case, msg = um.values()
try:
envelope = model.Envelope(
log={
'message': msg,
'application_type': 'test',
'dimensions': {
'test': 'test_log_publisher',
'case': 'test_should_send_unicode_message'
}
},
meta={
'tenantId': 1
}
)
instance.send_message(envelope)
expected_message = ujson.dumps(envelope, ensure_ascii=False)
if six.PY3:
expected_message = expected_message.encode('utf-8')
instance._kafka_publisher.publish.assert_called_with(
self.conf.conf.log_publisher.topics[0],
[expected_message]
)
except Exception:
LOG.exception('Failed to evaluate unicode case %s', case)
raise
@mock.patch(
'monasca_log_api.reference.common.log_publisher.producer'
'.KafkaProducer')
class TestTruncation(os_test.BaseTestCase):
class TestTruncation(base.BaseTestCase):
EXTRA_CHARS_SIZE = len(bytearray(ujson.dumps({
'log': {
'message': None
@ -231,7 +259,7 @@ class TestTruncation(os_test.BaseTestCase):
max_message_size=1000,
log_size_factor=0,
truncate_by=0,
gen_fn=_generate_unique_message):
gen_fn=base.generate_unique_message):
log_size = (max_message_size -
TestTruncation.EXTRA_CHARS_SIZE -

View File

@ -14,9 +14,7 @@
# under the License.
import falcon
from falcon import testing
import mock
import unittest
from monasca_log_api.api import exceptions as log_api_exceptions
from monasca_log_api.api import headers
@ -31,7 +29,7 @@ def _init_resource(test):
return resource
class TestLogsVersion(unittest.TestCase):
class TestLogsVersion(base.BaseApiTestCase):
@mock.patch('monasca_log_api.reference.common.log_publisher.LogPublisher')
@mock.patch('monasca_log_api.reference.v2.common.service.LogCreator')
def test_should_return_v2_as_version(self, _, __):
@ -39,7 +37,7 @@ class TestLogsVersion(unittest.TestCase):
self.assertEqual('v2.0', logs_resource.version)
class TestLogs(testing.TestBase):
class TestLogs(base.BaseApiTestCase):
api_class = base.MockedAPI

View File

@ -1,4 +1,4 @@
# Copyright 2016 FUJITSU LIMITED
# Copyright 2016-2017 FUJITSU LIMITED
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
@ -12,11 +12,7 @@
# License for the specific language governing permissions and limitations
# under the License.
import random
import string
import unittest
from falcon import testing
import falcon
import mock
import ujson as json
@ -36,23 +32,24 @@ def _init_resource(test):
return resource
def _generate_unique_message(size):
letters = string.ascii_lowercase
def rand(amount, space=True):
space = ' ' if space else ''
return ''.join((random.choice(letters + space) for _ in range(amount)))
return rand(size)
def _generate_v3_payload(log_count):
v3_logs = [{
'message': _generate_unique_message(100),
'dimensions': {
'hostname': 'host_%d' % it,
'component': 'component_%d' % it,
'service': 'service_%d' % it
def _generate_v3_payload(log_count=None, messages=None):
if not log_count and messages:
log_count = len(messages)
v3_logs = [{
'message': messages[it],
'dimensions': {
'hostname': 'host_%d' % it,
'component': 'component_%d' % it,
'service': 'service_%d' % it
}
} for it in range(log_count)]
else:
v3_logs = [{
'message': base.generate_unique_message(100),
'dimensions': {
'hostname': 'host_%d' % it,
'component': 'component_%d' % it,
'service': 'service_%d' % it
}
} for it in range(log_count)]
v3_body = {
@ -65,7 +62,7 @@ def _generate_v3_payload(log_count):
return v3_body, v3_logs
class TestLogsVersion(unittest.TestCase):
class TestApiLogsVersion(base.BaseApiTestCase):
@mock.patch('monasca_log_api.reference.v3.common.'
'bulk_processor.BulkProcessor')
@ -77,9 +74,7 @@ class TestLogsVersion(unittest.TestCase):
@mock.patch('monasca_log_api.reference.common.log_publisher.producer.'
'KafkaProducer')
@mock.patch('monasca_log_api.monitoring.client.monascastatsd.Connection')
class TestLogsMonitoring(testing.TestBase):
api_class = base.MockedAPI
class TestApiLogsMonitoring(base.BaseApiTestCase):
def test_monitor_bulk_rejected(self, __, _):
res = _init_resource(self)
@ -204,3 +199,27 @@ class TestLogsMonitoring(testing.TestBase):
self.assertEqual(1, size_gauge.call_count)
self.assertEqual(content_length,
size_gauge.mock_calls[0][2]['value'])
class TestUnicodeLogs(base.BaseApiTestCase):
@mock.patch('monasca_log_api.reference.common.log_publisher.producer.'
'KafkaProducer')
def test_should_send_unicode_messages(self, _):
_init_resource(self)
messages = [m['input'] for m in base.UNICODE_MESSAGES]
v3_body, _ = _generate_v3_payload(messages=messages)
payload = json.dumps(v3_body, ensure_ascii=False)
content_length = len(payload)
self.simulate_request(
'/logs',
method='POST',
headers={
headers.X_ROLES.name: logs_api.MONITORING_DELEGATE_ROLE,
'Content-Type': 'application/json',
'Content-Length': str(content_length)
},
body=payload
)
self.assertEqual(falcon.HTTP_204, self.srmock.status)

View File

@ -12,7 +12,6 @@
# License for the specific language governing permissions and limitations
# under the License.
from falcon import testing
import mock
import ujson as json
@ -23,9 +22,7 @@ from monasca_log_api.reference.v3 import logs as v3_logs
from monasca_log_api.tests import base
class SameV2V3Output(testing.TestBase):
api_class = base.MockedAPI
class TestApiSameV2V3Output(base.BaseApiTestCase):
# noinspection PyProtectedMember
@mock.patch('monasca_log_api.reference.common.'

View File

@ -13,22 +13,23 @@
# under the License.
import falcon
from falcon import testing
import ujson as json
from monasca_log_api.reference import versions
from monasca_log_api.tests import base
def _get_versioned_url(version_id):
return '/version/%s' % version_id
class TestVersions(testing.TestBase):
class TestApiVersions(base.BaseApiTestCase):
def __init__(self, *args, **kwargs):
self.versions = None
super(TestVersions, self).__init__(*args, **kwargs)
super(TestApiVersions, self).__init__(*args, **kwargs)
def before(self):
super(TestApiVersions, self).before()
self.versions = versions.Versions()
self.api.add_route("/version/", self.versions)
self.api.add_route("/version/{version_id}", self.versions)

View File

@ -0,0 +1,63 @@
# Copyright 2017 FUJITSU LIMITED
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from tempest import test
from tempest.lib.common.utils import test_utils
from monasca_log_api.tests import base as api_base
from monasca_log_api_tempest.tests import base
_API_VERSION = 'v3'
_RETRY_COUNT = 15
_RETRY_WAIT = 2
_UNICODE_CASES = api_base.UNICODE_MESSAGES
class TestUnicodeV3(base.BaseLogsTestCase):
def _run_and_wait(self, key, data,
content_type='application/json',
headers=None):
headers = base._get_headers(headers, content_type)
def wait():
return self.logs_search_client.count_search_messages(key,
headers) > 0
self.assertEqual(0, self.logs_search_client.count_search_messages(key,
headers),
'Find log message in elasticsearch: {0}'.format(key))
headers = base._get_headers(headers, content_type)
data = base._get_data(data, content_type, version=_API_VERSION)
client = self.logs_clients[_API_VERSION]
response, _ = client.send_single_log(data, headers)
self.assertEqual(204, response.status)
test_utils.call_until_true(wait, _RETRY_COUNT * _RETRY_WAIT,
_RETRY_WAIT)
response = self.logs_search_client.search_messages(key, headers)
self.assertEqual(1, len(response))
return response
@test.attr(type="gate")
def test_unicode_message(self):
for m in _UNICODE_CASES:
case, msg = m.values()
self._run_and_wait(*base.generate_small_message(msg), headers={
'LA-Unicode-Case': case
})

View File

@ -22,6 +22,7 @@ deps = -r{toxinidir}/requirements.txt
-r{toxinidir}/test-requirements.txt
commands =
find ./ -type f -name '*.pyc' -delete
rm -Rf .testrepository/times.dbm
[testenv:py27]
basepython = python2.7