optimization: Skip post-validation for rendered document cache hit

This patch set adds logic to optimize performance around rendered
documents caching further by implementing logic to skip over
post-rendering validation when cache hits occur. This works because:

* if the result set isn't cached, then post-validation is performed;
  if the post-validation fails the just-cached result set is
  invalidated
* if the result set is cached, then post-validation for that
  result set has already been performed successfully, so it
  can be safely skipped over

It was discovered via profiler anaylsis that document validation
accounts for an appreciable amount of run time, so optimizing
around this makes a noticeable difference.

Change-Id: I18344ea750cf1028a377028f80ecdd931d9672c0
This commit is contained in:
Felipe Monteiro 2018-08-27 00:31:34 +01:00
parent 7dddbd56d4
commit 18ae85a229
7 changed files with 140 additions and 62 deletions

View File

@ -149,11 +149,10 @@ def get_rendered_docs(revision_id, **filters):
documents = document_wrapper.DocumentDict.from_list(data)
encryption_sources = _resolve_encrypted_data(documents)
try:
rendered_documents = engine.render(
return engine.render(
revision_id,
documents,
encryption_sources=encryption_sources)
return rendered_documents
except (errors.BarbicanClientException,
errors.BarbicanServerException,
errors.InvalidDocumentLayer,

View File

@ -17,11 +17,11 @@ from oslo_log import log as logging
import six
from deckhand.common import utils
from deckhand.common import validation_message as vm
from deckhand.control import base as api_base
from deckhand.control import common
from deckhand.control.views import document as document_view
from deckhand.db.sqlalchemy import api as db_api
from deckhand import engine
from deckhand.engine import document_validation
from deckhand import errors
from deckhand import policy
@ -107,7 +107,18 @@ class RenderedDocumentsResource(api_base.BaseResource):
if include_encrypted:
filters['metadata.storagePolicy'].append('encrypted')
rendered_documents = common.get_rendered_docs(revision_id, **filters)
rendered_documents, cache_hit = common.get_rendered_docs(
revision_id, **filters)
# If the rendered documents result set is cached, then post-validation
# for that result set has already been performed successfully, so it
# can be safely skipped over as an optimization.
if not cache_hit:
data_schemas = db_api.revision_documents_get(
schema=types.DATA_SCHEMA_SCHEMA, deleted=False)
validator = document_validation.DocumentValidation(
rendered_documents, data_schemas, pre_validate=False)
engine.validate_render(revision_id, rendered_documents, validator)
# Filters to be applied post-rendering, because many documents are
# involved in rendering. User filters can only be applied once all
@ -130,50 +141,4 @@ class RenderedDocumentsResource(api_base.BaseResource):
rendered_documents = rendered_documents[:limit]
resp.status = falcon.HTTP_200
self._post_validate(rendered_documents)
resp.body = self.view_builder.list(rendered_documents)
def _post_validate(self, rendered_documents):
# Perform schema validation post-rendering to ensure that rendering
# and substitution didn't break anything.
data_schemas = db_api.revision_documents_get(
schema=types.DATA_SCHEMA_SCHEMA, deleted=False)
doc_validator = document_validation.DocumentValidation(
rendered_documents, data_schemas, pre_validate=False)
try:
validations = doc_validator.validate_all()
except errors.InvalidDocumentFormat as e:
# Post-rendering validation errors likely indicate an internal
# rendering bug, so override the default code to 500.
e.code = 500
LOG.error('Failed to post-validate rendered documents.')
LOG.exception(e.format_message())
raise e
else:
error_list = []
for validation in validations:
if validation['status'] == 'failure':
error_list.extend([
vm.ValidationMessage(
message=error['message'],
name=vm.DOCUMENT_POST_RENDERING_FAILURE,
doc_schema=error['schema'],
doc_name=error['name'],
doc_layer=error['layer'],
diagnostic={
k: v for k, v in error.items() if k in (
'schema_path',
'validation_schema',
'error_section'
)
}
)
for error in validation['errors']
])
if error_list:
raise errors.InvalidDocumentFormat(
error_list=error_list,
reason='Validation'
)

View File

@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from deckhand.engine.render import render
from deckhand.engine.render import * # noqa
__all__ = ('render',)
__all__ = ('render',
'validate_render')

View File

@ -31,19 +31,51 @@ _DOCUMENT_RENDERING_CACHE = _CACHE.get_cache('rendered_documents_cache')
def lookup_by_revision_id(revision_id, documents, **kwargs):
"""Look up rendered documents by ``revision_id``."""
"""Look up rendered documents by ``revision_id``.
:param revision_id: Revision ID for which to render documents. Used as key
in cache.
:type revision_id: int
:param documents: List of raw documents to render.
:type documents: List[dict]
:param kwargs: Kwargs to pass to ``render``.
:returns: Tuple, where first arg is rendered documents and second arg
indicates whether cache was hit.
:rtype: Tuple[dict, boolean]
"""
def do_render():
"""Perform document rendering for the revision."""
document_layering = layering.DocumentLayering(documents, **kwargs)
return document_layering.render()
def contains_revision():
try:
_DOCUMENT_RENDERING_CACHE.get(key=revision_id)
return True
except KeyError:
return False
if CONF.engine.enable_cache:
cache_hit = contains_revision()
return _DOCUMENT_RENDERING_CACHE.get(key=revision_id,
createfunc=do_render)
createfunc=do_render), cache_hit
else:
return do_render()
# The cache is disabled, so this is necessarily false.
return do_render(), False
def invalidate():
"""Invalidate the entire cache."""
_DOCUMENT_RENDERING_CACHE.clear()
def invalidate_one(revision_id):
"""Invalidate single entry in cache.
:param revision_id: Revision to invalidate.
:type revision_id: int
"""
_DOCUMENT_RENDERING_CACHE.remove_value(key=revision_id)

View File

@ -12,9 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from deckhand.engine import cache
from oslo_log import log as logging
__all__ = ('render',)
from deckhand.common import validation_message as vm
from deckhand.engine import cache
from deckhand import errors
LOG = logging.getLogger(__name__)
__all__ = ('render',
'validate_render')
def render(revision_id, documents, encryption_sources=None):
@ -43,3 +50,63 @@ def render(revision_id, documents, encryption_sources=None):
documents,
encryption_sources=encryption_sources,
validate=False)
def validate_render(revision_id, rendered_documents, validator):
"""Validate rendered documents using ``validator``.
:param revision_id: Key used for caching rendered documents by.
:type revision_id: int
:param documents: List of rendered documents corresponding to
``revision_id``.
:type documents: List[dict]
:param validator: Validation object used for validating
``rendered_documents``.
:type validator: deckhand.engine.document_validation.DocumentValidation
:raises: InvalidDocumentFormat if validation fails.
"""
# Perform schema validation post-rendering to ensure that rendering
# and substitution didn't break anything.
try:
validations = validator.validate_all()
except errors.InvalidDocumentFormat as e:
# Invalidate cache entry so that future lookups also fail.
cache.invalidate_one(revision_id)
# Post-rendering validation errors likely indicate an internal
# rendering bug, so override the default code to 500.
e.code = 500
LOG.error('Failed to post-validate rendered documents.')
LOG.exception(e.format_message())
raise e
error_list = []
for validation in validations:
if validation['status'] == 'failure':
error_list.extend([
vm.ValidationMessage(
message=error['message'],
name=vm.DOCUMENT_POST_RENDERING_FAILURE,
doc_schema=error['schema'],
doc_name=error['name'],
doc_layer=error['layer'],
diagnostic={
k: v for k, v in error.items() if k in (
'schema_path',
'validation_schema',
'error_section'
)
}
)
for error in validation['errors']
])
if error_list:
# Invalidate cache entry so that future lookups also fail.
cache.invalidate_one(revision_id)
raise errors.InvalidDocumentFormat(
error_list=error_list,
reason='Validation',
)

View File

@ -292,5 +292,5 @@ def _format_diff_result(dr):
def _rendered_doc(revision_id):
"""Provides rendered document by given revision id."""
filters = {'deleted': False}
rendered_documents = common.get_rendered_docs(revision_id, **filters)
rendered_documents, _ = common.get_rendered_docs(revision_id, **filters)
return rendered_documents

View File

@ -13,6 +13,7 @@
# limitations under the License.
from threading import Thread
import time
import testtools
@ -39,12 +40,16 @@ class RenderedDocumentsCacheTest(test_base.DeckhandTestCase):
documents = document_factory.gen_test({})
# Validate that caching the ref returns expected payload.
rendered_documents = cache.lookup_by_revision_id(1, documents)
rendered_documents, cache_hit = cache.lookup_by_revision_id(
1, documents)
self.assertIsInstance(rendered_documents, list)
self.assertFalse(cache_hit)
# Validate that the cache actually works.
next_rendered_documents = cache.lookup_by_revision_id(1, None)
next_rendered_documents, cache_hit = cache.lookup_by_revision_id(
1, None)
self.assertEqual(rendered_documents, next_rendered_documents)
self.assertTrue(cache_hit)
# No documents passed in and revision ID 2 isn't cached - so expect
# this to blow up.
@ -69,18 +74,25 @@ class RenderedDocumentsCacheTest(test_base.DeckhandTestCase):
self.assertNotEqual(documents1, documents2)
rendered_documents_by_thread = []
cache_hit_by_thread = []
def threaded_function(documents):
# Validate that caching the ref returns expected payload.
rendered_documents = cache.lookup_by_revision_id(1, documents)
rendered_documents, cache_hit = cache.lookup_by_revision_id(
1, documents)
rendered_documents_by_thread.append(rendered_documents)
return rendered_documents
cache_hit_by_thread.append(cache_hit)
thread1 = Thread(target=threaded_function,
kwargs={'documents': documents1})
thread2 = Thread(target=threaded_function,
kwargs={'documents': documents2})
thread1.start()
# NOTE(felipemonteiro): Add a sleep here to avoid a data race where the
# cache might not be populated fast enough before the second thread
# checks the cache -- and finds nothing thereby proceeding with another
# render request. In real scenarios, though, this is highly unlikely.
time.sleep(1)
thread2.start()
thread1.join()
thread2.join()
@ -90,3 +102,5 @@ class RenderedDocumentsCacheTest(test_base.DeckhandTestCase):
self.assertEqual(2, len(rendered_documents_by_thread))
self.assertEqual(rendered_documents_by_thread[0],
rendered_documents_by_thread[1])
self.assertFalse(cache_hit_by_thread[0]) # 1st time missing in cache.
self.assertTrue(cache_hit_by_thread[1]) # 2nd time should hit cache.