Merge "Parallelize unsequenced chart group deployments"

2018-10-03 19:34:52 +00:00 · 2018-10-03 19:34:52 +00:00 · 924ecc1a8f
parent daa5863c07 d229d52292
commit 924ecc1a8f
8 changed files with 501 additions and 421 deletions
--- a/armada/conf/init.py
+++ b/armada/conf/init.py
@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import logging
 import os
+import threading

 from oslo_config import cfg
 from oslo_log import log
@ -42,10 +44,27 @@ def set_app_default_configs():
    config_files = _get_config_files()
    if all([os.path.exists(x) for x in config_files]):
        CONF([], project='armada', default_config_files=config_files)
+    setup_chart_deploy_aware_logging()
    set_default_for_default_log_levels()
    default.register_opts(CONF)


+# Stores chart being deployed (if any) in current thread.
+current_chart_thread_local = threading.local()
+
+
+def get_current_chart():
+    return getattr(current_chart_thread_local, 'chart', None)
+
+
+def set_current_chart(chart):
+    current_chart_thread_local.chart = chart
+
+
+def setup_chart_deploy_aware_logging():
+    logging.setLoggerClass(ChartDeployAwareLogger)
+
+
 def set_default_for_default_log_levels():
    """Set the default for the default_log_levels option for Armada.
    Armada uses some packages that other OpenStack services don't use that do
@ -56,5 +75,23 @@ def set_default_for_default_log_levels():

    extra_log_level_defaults = ['kubernetes.client.rest=INFO']

-    log.set_defaults(default_log_levels=log.get_default_log_levels() +
-                     extra_log_level_defaults)
+    log.set_defaults(
+        default_log_levels=log.get_default_log_levels() +
+        extra_log_level_defaults, )
+
+
+class ChartDeployAwareLogger(logging.Logger):
+    """Includes name of chart currently being deployed (if any) in log
+    messages.
+    """
+
+    def _log(self, level, msg, *args, **kwargs):
+        chart = get_current_chart()
+        if chart:
+            name = chart['chart_name']
+            prefix = '[chart={}]: '.format(name)
+        else:
+            prefix = ''
+        prefixed = '{}{}'.format(prefix, msg)
+        return super(ChartDeployAwareLogger, self)._log(
+            level, prefixed, *args, **kwargs)
--- a/armada/exceptions/armada_exceptions.py
+++ b/armada/exceptions/armada_exceptions.py
@ -77,3 +77,13 @@ class InvalidWaitTypeException(ArmadaException):
        self._message = (
            'Armada encountered invalid wait type: %s' % wait_type)
        super(InvalidWaitTypeException, self).__init__(self._message)
+
+
+class ChartDeployException(ArmadaException):
+    '''
+    Exception that occurs while deploying charts.
+    '''
+
+    def __init__(self, chart_names):
+        self._message = ('Exception deploying charts: %s' % chart_names)
+        super(ChartDeployException, self).__init__(self._message)
--- a/armada/handlers/armada.py
+++ b/armada/handlers/armada.py
@ -12,26 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import functools
-import time
-import yaml
-
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from oslo_config import cfg
 from oslo_log import log as logging

 from armada import const
+from armada.conf import set_current_chart
 from armada.exceptions import armada_exceptions
 from armada.exceptions import override_exceptions
 from armada.exceptions import source_exceptions
 from armada.exceptions import tiller_exceptions
 from armada.exceptions import validate_exceptions
-from armada.handlers.chartbuilder import ChartBuilder
+from armada.handlers.chart_deploy import ChartDeploy
 from armada.handlers.manifest import Manifest
 from armada.handlers.override import Override
-from armada.handlers.release_diff import ReleaseDiff
-from armada.handlers.test import test_release_for_success
 from armada.handlers.tiller import Tiller
-from armada.handlers.wait import get_wait_for
 from armada.utils.release import release_prefixer
 from armada.utils import source

@ -89,12 +84,9 @@ class Armada(object):
        tiller_port = tiller_port or CONF.tiller_port
        tiller_namespace = tiller_namespace or CONF.tiller_namespace

-        self.disable_update_pre = disable_update_pre
-        self.disable_update_post = disable_update_post
        self.enable_chart_cleanup = enable_chart_cleanup
        self.dry_run = dry_run
        self.force_wait = force_wait
-        self.timeout = timeout
        # TODO: Use dependency injection i.e. pass in a Tiller instead of
        #       creating it here.
        self.tiller = Tiller(
@ -109,19 +101,12 @@ class Armada(object):
        except (validate_exceptions.InvalidManifestException,
                override_exceptions.InvalidOverrideValueException):
            raise
-        self.k8s_wait_attempts = k8s_wait_attempts
-        self.k8s_wait_attempt_sleep = k8s_wait_attempt_sleep
        self.manifest = Manifest(
            self.documents, target_manifest=target_manifest).get_manifest()
        self.cloned_dirs = set()
-
-    def find_release_chart(self, known_releases, release_name):
-        '''
-        Find a release given a list of known_releases and a release name
-        '''
-        for release, _, chart, values, _ in known_releases:
-            if release == release_name:
-                return chart, values
+        self.chart_deploy = ChartDeploy(
+            disable_update_pre, disable_update_post, self.dry_run,
+            k8s_wait_attempts, k8s_wait_attempt_sleep, timeout, self.tiller)

    def pre_flight_ops(self):
        """Perform a series of checks and operations to ensure proper
@ -240,9 +225,12 @@ class Armada(object):
        for chartgroup in manifest_data.get(const.KEYWORD_GROUPS, []):
            cg_name = chartgroup.get('name', '<missing name>')
            cg_desc = chartgroup.get('description', '<missing description>')
-            cg_sequenced = chartgroup.get('sequenced', False)
-            LOG.info('Processing ChartGroup: %s (%s), sequenced=%s', cg_name,
-                     cg_desc, cg_sequenced)
+            cg_sequenced = chartgroup.get('sequenced',
+                                          False) or self.force_wait
+
+            LOG.info('Processing ChartGroup: %s (%s), sequenced=%s%s', cg_name,
+                     cg_desc, cg_sequenced,
+                     ' (forced)' if self.force_wait else '')

            # TODO(MarshM): Deprecate the `test_charts` key
            cg_test_all_charts = chartgroup.get('test_charts')
@ -255,284 +243,61 @@ class Armada(object):
                # explicitly disable helm tests if they choose
                cg_test_all_charts = True

-            ns_label_set = set()
-            tests_to_run = []
-
            cg_charts = chartgroup.get(const.KEYWORD_CHARTS, [])
+            charts = map(lambda x: x.get('chart', {}), cg_charts)

-            # Track largest Chart timeout to stop the ChartGroup at the end
-            cg_max_timeout = 0
+            def deploy_chart(chart):
+                set_current_chart(chart)
+                try:
+                    return self.chart_deploy.execute(chart, cg_test_all_charts,
+                                                     prefix, deployed_releases,
+                                                     failed_releases)
+                finally:
+                    set_current_chart(None)

-            for chart_entry in cg_charts:
-                chart = chart_entry.get('chart', {})
-                namespace = chart.get('namespace')
-                release = chart.get('release')
-                release_name = release_prefixer(prefix, release)
-                LOG.info('Processing Chart, release=%s', release_name)
+            results = []
+            failures = []

-                values = chart.get('values', {})
-                pre_actions = {}
-                post_actions = {}
-
-                protected = chart.get('protected', {})
-                p_continue = protected.get('continue_processing', False)
-
-                # Check for existing FAILED release, and purge
-                if release_name in [rel[0] for rel in failed_releases]:
-                    LOG.info('Purging FAILED release %s before deployment.',
-                             release_name)
-                    if protected:
-                        if p_continue:
-                            LOG.warn(
-                                'Release %s is `protected`, '
-                                'continue_processing=True. Operator must '
-                                'handle FAILED release manually.',
-                                release_name)
-                            msg['protected'].append(release_name)
-                            continue
-                        else:
-                            LOG.error(
-                                'Release %s is `protected`, '
-                                'continue_processing=False.', release_name)
-                            raise armada_exceptions.ProtectedReleaseException(
-                                release_name)
-                    else:
-                        # Purge the release
-                        self.tiller.uninstall_release(release_name)
-                        msg['purge'].append(release_name)
-
-                # NOTE(MarshM): Calculating `wait_timeout` is unfortunately
-                #   overly complex. The order of precedence is currently:
-                #   1) User provided override via API/CLI (default 0 if not
-                #      provided by client/user).
-                #   2) Chart's `data.wait.timeout`, or...
-                #   3) Chart's `data.timeout` (deprecated).
-                #   4) const.DEFAULT_CHART_TIMEOUT, if nothing is ever
-                #      specified, for use in waiting for final ChartGroup
-                #      health and helm tests, but ignored for the actual
-                #      install/upgrade of the Chart.
-                # NOTE(MarshM): Not defining a timeout has a side effect of
-                #   allowing Armada to install charts with a circular
-                #   dependency defined between components.
-
-                # TODO(MarshM): Deprecated, remove the following block
-                deprecated_timeout = chart.get('timeout', None)
-                if isinstance(deprecated_timeout, int):
-                    LOG.warn('The `timeout` key is deprecated and support '
-                             'for this will be removed soon. Use '
-                             '`wait.timeout` instead.')
-
-                wait_values = chart.get('wait', {})
-                wait_labels = wait_values.get('labels', {})
-                wait_timeout = self.timeout
-                if wait_timeout <= 0:
-                    wait_timeout = wait_values.get('timeout', wait_timeout)
-                    # TODO(MarshM): Deprecated, remove the following check
-                    if wait_timeout <= 0:
-                        wait_timeout = deprecated_timeout or wait_timeout
-
-                # Determine wait logic
-                # NOTE(Dan Kim): Conditions to wait are below :
-                # 1) set sequenced=True in chart group
-                # 2) set force_wait param
-                # 3) add Chart's `data.wait.timeout`
-                # --timeout param will do not set wait=True, it just change
-                # max timeout of chart's deployment. (default: 900)
-                this_chart_should_wait = (cg_sequenced or self.force_wait or
-                                          (bool(wait_values) and
-                                           (wait_timeout > 0)))
-
-                # If there is still no timeout, we need to use a default
-                # (item 4 in note above)
-                if wait_timeout <= 0:
-                    LOG.warn('No Chart timeout specified, using default: %ss',
-                             const.DEFAULT_CHART_TIMEOUT)
-                    wait_timeout = const.DEFAULT_CHART_TIMEOUT
-
-                # Naively take largest timeout to apply at end
-                # TODO(MarshM) better handling of timeout/timer
-                cg_max_timeout = max(wait_timeout, cg_max_timeout)
-
-                test_chart_override = chart.get('test')
-                # Use old default value when not using newer `test` key
-                test_cleanup = True
-                if test_chart_override is None:
-                    test_this_chart = cg_test_all_charts
-                elif isinstance(test_chart_override, bool):
-                    LOG.warn('Boolean value for chart `test` key is'
-                             ' deprecated and support for this will'
-                             ' be removed. Use `test.enabled` '
-                             'instead.')
-                    test_this_chart = test_chart_override
+            # Returns whether or not there was a failure
+            def handle_result(chart, get_result):
+                name = chart['chart_name']
+                try:
+                    result = get_result()
+                except Exception as e:
+                    LOG.error('Chart deploy [%s] failed: %s', name, e)
+                    failures.append(name)
+                    return True
                else:
-                    # NOTE: helm tests are enabled by default
-                    test_this_chart = test_chart_override.get('enabled', True)
-                    test_cleanup = test_chart_override.get('options', {}).get(
-                        'cleanup', False)
+                    results.append(result)
+                    return False

-                chartbuilder = ChartBuilder(chart)
-                new_chart = chartbuilder.get_helm_chart()
+            if cg_sequenced:
+                for chart in charts:
+                    if (handle_result(chart, lambda: deploy_chart(chart))):
+                        break
+            else:
+                with ThreadPoolExecutor(
+                        max_workers=len(cg_charts)) as executor:
+                    future_to_chart = {
+                        executor.submit(deploy_chart, chart): chart
+                        for chart in charts
+                    }

-                # Begin Chart timeout deadline
-                deadline = time.time() + wait_timeout
+                    for future in as_completed(future_to_chart):
+                        chart = future_to_chart[future]
+                        handle_result(chart, future.result)

-                # TODO(mark-burnett): It may be more robust to directly call
-                # tiller status to decide whether to install/upgrade rather
-                # than checking for list membership.
-                if release_name in [rel[0] for rel in deployed_releases]:
+            if failures:
+                LOG.error('Chart deploy(s) failed: %s', failures)
+                raise armada_exceptions.ChartDeployException(failures)

-                    # indicate to the end user what path we are taking
-                    LOG.info("Upgrading release %s in namespace %s",
-                             release_name, namespace)
-                    # extract the installed chart and installed values from the
-                    # latest release so we can compare to the intended state
-                    old_chart, old_values_string = self.find_release_chart(
-                        deployed_releases, release_name)
-
-                    upgrade = chart.get('upgrade', {})
-                    disable_hooks = upgrade.get('no_hooks', False)
-                    force = upgrade.get('force', False)
-                    recreate_pods = upgrade.get('recreate_pods', False)
-
-                    LOG.info("Checking Pre/Post Actions")
-                    if upgrade:
-                        upgrade_pre = upgrade.get('pre', {})
-                        upgrade_post = upgrade.get('post', {})
-
-                        if not self.disable_update_pre and upgrade_pre:
-                            pre_actions = upgrade_pre
-
-                        if not self.disable_update_post and upgrade_post:
-                            post_actions = upgrade_post
-
-                    try:
-                        old_values = yaml.safe_load(old_values_string)
-                    except yaml.YAMLError:
-                        chart_desc = '{} (previously deployed)'.format(
-                            old_chart.metadata.name)
-                        raise armada_exceptions.\
-                            InvalidOverrideValuesYamlException(chart_desc)
-
-                    LOG.info('Checking for updates to chart release inputs.')
-                    diff = self.get_diff(old_chart, old_values, new_chart,
-                                         values)
-
-                    if not diff:
-                        LOG.info("Found no updates to chart release inputs")
-                        continue
-
-                    LOG.info("Found updates to chart release inputs")
-                    LOG.debug("%s", diff)
-                    msg['diff'].append({chart['release']: str(diff)})
-
-                    # TODO(MarshM): Add tiller dry-run before upgrade and
-                    # consider deadline impacts
-
-                    # do actual update
-                    timer = int(round(deadline - time.time()))
-                    LOG.info('Beginning Upgrade, wait=%s, timeout=%ss',
-                             this_chart_should_wait, timer)
-                    tiller_result = self.tiller.update_release(
-                        new_chart,
-                        release_name,
-                        namespace,
-                        pre_actions=pre_actions,
-                        post_actions=post_actions,
-                        disable_hooks=disable_hooks,
-                        values=yaml.safe_dump(values),
-                        wait=this_chart_should_wait,
-                        timeout=timer,
-                        force=force,
-                        recreate_pods=recreate_pods)
-
-                    if this_chart_should_wait:
-                        self._wait_until_ready(release_name, wait_labels,
-                                               namespace, timer)
-
-                    # Track namespace+labels touched by upgrade
-                    ns_label_set.add((namespace, tuple(wait_labels.items())))
-
-                    LOG.info('Upgrade completed with results from Tiller: %s',
-                             tiller_result.__dict__)
-                    msg['upgrade'].append(release_name)
-
-                # process install
-                else:
-                    LOG.info("Installing release %s in namespace %s",
-                             release_name, namespace)
-
-                    timer = int(round(deadline - time.time()))
-                    LOG.info('Beginning Install, wait=%s, timeout=%ss',
-                             this_chart_should_wait, timer)
-                    tiller_result = self.tiller.install_release(
-                        new_chart,
-                        release_name,
-                        namespace,
-                        values=yaml.safe_dump(values),
-                        wait=this_chart_should_wait,
-                        timeout=timer)
-
-                    if this_chart_should_wait:
-                        self._wait_until_ready(release_name, wait_labels,
-                                               namespace, timer)
-
-                    # Track namespace+labels touched by install
-                    ns_label_set.add((namespace, tuple(wait_labels.items())))
-
-                    LOG.info('Install completed with results from Tiller: %s',
-                             tiller_result.__dict__)
-                    msg['install'].append(release_name)
-
-                # Keeping track of time remaining
-                timer = int(round(deadline - time.time()))
-                test_chart_args = (release_name, timer, test_cleanup)
-                if test_this_chart:
-                    # Sequenced ChartGroup should run tests after each Chart
-                    if cg_sequenced:
-                        LOG.info(
-                            'Running sequenced test, timeout remaining: '
-                            '%ss.', timer)
-                        self._test_chart(*test_chart_args)
-
-                    # Un-sequenced ChartGroup should run tests at the end
-                    else:
-                        tests_to_run.append(
-                            functools.partial(self._test_chart,
-                                              *test_chart_args))
+            for result in results:
+                for k, v in result.items():
+                    msg[k].append(v)

            # End of Charts in ChartGroup
            LOG.info('All Charts applied in ChartGroup %s.', cg_name)

-            # After all Charts are applied, we should wait for the entire
-            # ChartGroup to become healthy by looking at the namespaces seen
-            # TODO(MarshM): Need to determine a better timeout
-            #               (not cg_max_timeout)
-            if cg_max_timeout <= 0:
-                cg_max_timeout = const.DEFAULT_CHART_TIMEOUT
-            deadline = time.time() + cg_max_timeout
-            for (ns, labels) in ns_label_set:
-                labels_dict = dict(labels)
-                timer = int(round(deadline - time.time()))
-                LOG.info(
-                    'Final ChartGroup wait for healthy namespace=%s, '
-                    'labels=(%s), timeout remaining: %ss.', ns, labels_dict,
-                    timer)
-                if timer <= 0:
-                    reason = ('Timeout expired waiting on namespace: %s, '
-                              'labels: (%s)' % (ns, labels_dict))
-                    LOG.error(reason)
-                    raise armada_exceptions.ArmadaTimeoutException(reason)
-
-                self._wait_until_ready(
-                    release_name=None,
-                    wait_labels=labels_dict,
-                    namespace=ns,
-                    timeout=timer)
-
-            # After entire ChartGroup is healthy, run any pending tests
-            for callback in tests_to_run:
-                callback()
-
        self.post_flight_ops()

        if self.enable_chart_cleanup:
@ -554,58 +319,6 @@ class Armada(object):
            LOG.debug('Removing cloned temp directory: %s', cloned_dir)
            source.source_cleanup(cloned_dir)

-    def _wait_until_ready(self, release_name, wait_labels, namespace, timeout):
-        if self.dry_run:
-            LOG.info(
-                'Skipping wait during `dry-run`, would have waited on '
-                'namespace=%s, labels=(%s) for %ss.', namespace, wait_labels,
-                timeout)
-            return
-
-        LOG.info('Waiting for release=%s', release_name)
-
-        waits = [
-            get_wait_for('job', self.tiller.k8s, skip_if_none_found=True),
-            get_wait_for('pod', self.tiller.k8s)
-        ]
-        deadline = time.time() + timeout
-        deadline_remaining = timeout
-        for wait in waits:
-            if deadline_remaining <= 0:
-                reason = (
-                    'Timeout expired waiting for release=%s' % release_name)
-                LOG.error(reason)
-                raise armada_exceptions.ArmadaTimeoutException(reason)
-
-            wait.wait(
-                labels=wait_labels,
-                namespace=namespace,
-                k8s_wait_attempts=self.k8s_wait_attempts,
-                k8s_wait_attempt_sleep=self.k8s_wait_attempt_sleep,
-                timeout=timeout)
-            deadline_remaining = int(round(deadline - time.time()))
-
-    def _test_chart(self, release_name, timeout, cleanup):
-        if self.dry_run:
-            LOG.info(
-                'Skipping test during `dry-run`, would have tested '
-                'release=%s with timeout %ss.', release_name, timeout)
-            return True
-
-        if timeout <= 0:
-            reason = ('Timeout expired before testing '
-                      'release %s' % release_name)
-            LOG.error(reason)
-            raise armada_exceptions.ArmadaTimeoutException(reason)
-
-        success = test_release_for_success(
-            self.tiller, release_name, timeout=timeout, cleanup=cleanup)
-        if success:
-            LOG.info("Test passed for release: %s", release_name)
-        else:
-            LOG.info("Test failed for release: %s", release_name)
-            raise tiller_exceptions.TestFailedException(release_name)
-
    def _chart_cleanup(self, prefix, charts, msg):
        LOG.info('Processing chart cleanup to remove unspecified releases.')

@ -625,6 +338,3 @@ class Armada(object):
                         release)
                self.tiller.uninstall_release(release)
                msg['purge'].append(release)
-
-    def get_diff(self, old_chart, old_values, new_chart, values):
-        return ReleaseDiff(old_chart, old_values, new_chart, values).get_diff()
--- a/armada/handlers/chart_deploy.py
+++ b/armada/handlers/chart_deploy.py
@ -0,0 +1,290 @@
+# Copyright 2018 The Armada Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from oslo_log import log as logging
+import time
+import yaml
+
+from armada import const
+from armada.exceptions import armada_exceptions
+from armada.handlers.chartbuilder import ChartBuilder
+from armada.handlers.test import test_release_for_success
+from armada.handlers.release_diff import ReleaseDiff
+from armada.handlers.wait import get_wait_for
+from armada.exceptions import tiller_exceptions
+from armada.utils.release import release_prefixer
+
+LOG = logging.getLogger(__name__)
+
+
+class ChartDeploy(object):
+
+    def __init__(self, disable_update_pre, disable_update_post, dry_run,
+                 k8s_wait_attempts, k8s_wait_attempt_sleep, timeout, tiller):
+        self.disable_update_pre = disable_update_pre
+        self.disable_update_post = disable_update_post
+        self.dry_run = dry_run
+        self.k8s_wait_attempts = k8s_wait_attempts
+        self.k8s_wait_attempt_sleep = k8s_wait_attempt_sleep
+        self.timeout = timeout
+        self.tiller = tiller
+
+    def execute(self, chart, cg_test_all_charts, prefix, deployed_releases,
+                failed_releases):
+        namespace = chart.get('namespace')
+        release = chart.get('release')
+        release_name = release_prefixer(prefix, release)
+        LOG.info('Processing Chart, release=%s', release_name)
+
+        values = chart.get('values', {})
+        pre_actions = {}
+        post_actions = {}
+
+        result = {}
+
+        protected = chart.get('protected', {})
+        p_continue = protected.get('continue_processing', False)
+
+        # Check for existing FAILED release, and purge
+        if release_name in [rel[0] for rel in failed_releases]:
+            LOG.info('Purging FAILED release %s before deployment.',
+                     release_name)
+            if protected:
+                if p_continue:
+                    LOG.warn(
+                        'Release %s is `protected`, '
+                        'continue_processing=True. Operator must '
+                        'handle FAILED release manually.', release_name)
+                    result['protected'] = release_name
+                    return result
+                else:
+                    LOG.error(
+                        'Release %s is `protected`, '
+                        'continue_processing=False.', release_name)
+                    raise armada_exceptions.ProtectedReleaseException(
+                        release_name)
+            else:
+                # Purge the release
+                self.tiller.uninstall_release(release_name)
+                result['purge'] = release_name
+
+        wait_values = chart.get('wait', {})
+        wait_labels = wait_values.get('labels', {})
+
+        wait_timeout = self.timeout
+        if wait_timeout <= 0:
+            wait_timeout = wait_values.get('timeout', wait_timeout)
+
+        # TODO(MarshM): Deprecated, remove `timeout` key.
+        deprecated_timeout = chart.get('timeout', None)
+        if isinstance(deprecated_timeout, int):
+            LOG.warn('The `timeout` key is deprecated and support '
+                     'for this will be removed soon. Use '
+                     '`wait.timeout` instead.')
+        if wait_timeout <= 0:
+            wait_timeout = deprecated_timeout or wait_timeout
+
+        if wait_timeout <= 0:
+            LOG.info('No Chart timeout specified, using default: %ss',
+                     const.DEFAULT_CHART_TIMEOUT)
+            wait_timeout = const.DEFAULT_CHART_TIMEOUT
+
+        native_wait = wait_values.get('native', {})
+        native_wait_enabled = native_wait.get('enabled', True)
+
+        chartbuilder = ChartBuilder(chart)
+        new_chart = chartbuilder.get_helm_chart()
+
+        # Begin Chart timeout deadline
+        deadline = time.time() + wait_timeout
+
+        # TODO(mark-burnett): It may be more robust to directly call
+        # tiller status to decide whether to install/upgrade rather
+        # than checking for list membership.
+        if release_name in [rel[0] for rel in deployed_releases]:
+
+            # indicate to the end user what path we are taking
+            LOG.info("Upgrading release %s in namespace %s", release_name,
+                     namespace)
+            # extract the installed chart and installed values from the
+            # latest release so we can compare to the intended state
+            old_chart, old_values_string = self.find_release_chart(
+                deployed_releases, release_name)
+
+            upgrade = chart.get('upgrade', {})
+            disable_hooks = upgrade.get('no_hooks', False)
+            force = upgrade.get('force', False)
+            recreate_pods = upgrade.get('recreate_pods', False)
+
+            LOG.info("Checking Pre/Post Actions")
+            if upgrade:
+                upgrade_pre = upgrade.get('pre', {})
+                upgrade_post = upgrade.get('post', {})
+
+                if not self.disable_update_pre and upgrade_pre:
+                    pre_actions = upgrade_pre
+
+                if not self.disable_update_post and upgrade_post:
+                    post_actions = upgrade_post
+
+            try:
+                old_values = yaml.safe_load(old_values_string)
+            except yaml.YAMLError:
+                chart_desc = '{} (previously deployed)'.format(
+                    old_chart.metadata.name)
+                raise armada_exceptions.\
+                    InvalidOverrideValuesYamlException(chart_desc)
+
+            LOG.info('Checking for updates to chart release inputs.')
+            diff = self.get_diff(old_chart, old_values, new_chart, values)
+
+            if not diff:
+                LOG.info("Found no updates to chart release inputs")
+                return result
+
+            LOG.info("Found updates to chart release inputs")
+            LOG.debug("%s", diff)
+            result['diff'] = {chart['release']: str(diff)}
+
+            # TODO(MarshM): Add tiller dry-run before upgrade and
+            # consider deadline impacts
+
+            # do actual update
+            timer = int(round(deadline - time.time()))
+            LOG.info('Beginning Upgrade, wait=%s, timeout=%ss',
+                     native_wait_enabled, timer)
+            tiller_result = self.tiller.update_release(
+                new_chart,
+                release_name,
+                namespace,
+                pre_actions=pre_actions,
+                post_actions=post_actions,
+                disable_hooks=disable_hooks,
+                values=yaml.safe_dump(values),
+                wait=native_wait_enabled,
+                timeout=timer,
+                force=force,
+                recreate_pods=recreate_pods)
+
+            LOG.info('Upgrade completed with results from Tiller: %s',
+                     tiller_result.__dict__)
+            result['upgrade'] = release_name
+
+        # process install
+        else:
+            LOG.info("Installing release %s in namespace %s", release_name,
+                     namespace)
+
+            timer = int(round(deadline - time.time()))
+            LOG.info('Beginning Install, wait=%s, timeout=%ss',
+                     native_wait_enabled, timer)
+            tiller_result = self.tiller.install_release(
+                new_chart,
+                release_name,
+                namespace,
+                values=yaml.safe_dump(values),
+                wait=native_wait_enabled,
+                timeout=timer)
+
+            LOG.info('Install completed with results from Tiller: %s',
+                     tiller_result.__dict__)
+            result['install'] = release_name
+
+        timer = int(round(deadline - time.time()))
+        self._wait_until_ready(release_name, wait_labels, namespace, timer)
+
+        test_chart_override = chart.get('test')
+        # Use old default value when not using newer `test` key
+        test_cleanup = True
+        if test_chart_override is None:
+            test_this_chart = cg_test_all_charts
+        elif isinstance(test_chart_override, bool):
+            LOG.warn('Boolean value for chart `test` key is'
+                     ' deprecated and support for this will'
+                     ' be removed. Use `test.enabled` '
+                     'instead.')
+            test_this_chart = test_chart_override
+        else:
+            # NOTE: helm tests are enabled by default
+            test_this_chart = test_chart_override.get('enabled', True)
+            test_cleanup = test_chart_override.get('options', {}).get(
+                'cleanup', False)
+        if test_this_chart:
+            timer = int(round(deadline - time.time()))
+            self._test_chart(release_name, timer, test_cleanup)
+
+        return result
+
+    def _wait_until_ready(self, release_name, wait_labels, namespace, timeout):
+        if self.dry_run:
+            LOG.info(
+                'Skipping wait during `dry-run`, would have waited on '
+                'namespace=%s, labels=(%s) for %ss.', namespace, wait_labels,
+                timeout)
+            return
+
+        LOG.info('Waiting for release=%s', release_name)
+
+        waits = [
+            get_wait_for('job', self.tiller.k8s, skip_if_none_found=True),
+            get_wait_for('pod', self.tiller.k8s)
+        ]
+        deadline = time.time() + timeout
+        deadline_remaining = timeout
+        for wait in waits:
+            if deadline_remaining <= 0:
+                reason = (
+                    'Timeout expired waiting for release=%s' % release_name)
+                LOG.error(reason)
+                raise armada_exceptions.ArmadaTimeoutException(reason)
+
+            wait.wait(
+                labels=wait_labels,
+                namespace=namespace,
+                k8s_wait_attempts=self.k8s_wait_attempts,
+                k8s_wait_attempt_sleep=self.k8s_wait_attempt_sleep,
+                timeout=timeout)
+            deadline_remaining = int(round(deadline - time.time()))
+
+    def _test_chart(self, release_name, timeout, cleanup):
+        if self.dry_run:
+            LOG.info(
+                'Skipping test during `dry-run`, would have tested '
+                'release=%s with timeout %ss.', release_name, timeout)
+            return True
+
+        if timeout <= 0:
+            reason = ('Timeout expired before testing '
+                      'release %s' % release_name)
+            LOG.error(reason)
+            raise armada_exceptions.ArmadaTimeoutException(reason)
+
+        success = test_release_for_success(
+            self.tiller, release_name, timeout=timeout, cleanup=cleanup)
+        if success:
+            LOG.info("Test passed for release: %s", release_name)
+        else:
+            LOG.info("Test failed for release: %s", release_name)
+            raise tiller_exceptions.TestFailedException(release_name)
+
+    def get_diff(self, old_chart, old_values, new_chart, values):
+        return ReleaseDiff(old_chart, old_values, new_chart, values).get_diff()
+
+    def find_release_chart(self, known_releases, release_name):
+        '''
+        Find a release given a list of known_releases and a release name
+        '''
+        for release, _, chart, values, _ in known_releases:
+            if release == release_name:
+                return chart, values
--- a/armada/schemas/armada-chart-schema.yaml
+++ b/armada/schemas/armada-chart-schema.yaml
@ -83,6 +83,14 @@ data:
          type: integer
        labels:
          $ref: "#/definitions/labels"
+        # Config for helm's native `--wait` param.
+        native:
+          type: object
+          properties:
+            # TODO: Add separate timeout for native wait?
+            enabled:
+              type: boolean
+          additionalProperties: false
      additionalProperties: false
    source:
      type: object
--- a/armada/tests/test_utils.py
+++ b/armada/tests/test_utils.py
@ -16,11 +16,34 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.

+import mock
 import random
 import string
+import testtools
+import threading
 import uuid

-import testtools
+_mock_thread_safe = False
+_mock_call_lock = threading.RLock()
+
+
+# TODO(seaneagan): Get this working.
+def makeMockThreadSafe():
+    '''
+        This attempts to make a subset of the mock library thread safe using
+        locking, so that the mock call records are accurate.
+    '''
+    global _mock_thread_safe
+    if not _mock_thread_safe:
+        unsafe_mock_call = mock.CallableMixin._mock_call
+
+        def safe_mock_call(*args, **kwargs):
+            with _mock_call_lock:
+                return unsafe_mock_call(*args, **kwargs)
+
+        mock.CallableMixin._mock_call = safe_mock_call
+
+        _mock_thread_safe = True


 def rand_uuid_hex():
--- a/armada/tests/unit/handlers/test_armada.py
+++ b/armada/tests/unit/handlers/test_armada.py
@ -17,14 +17,17 @@ import yaml

 from armada import const
 from armada.handlers import armada
+from armada.handlers import chart_deploy
 from armada.tests.unit import base
-from armada.tests.test_utils import AttrDict
+from armada.tests.test_utils import AttrDict, makeMockThreadSafe
 from armada.utils.release import release_prefixer
 from armada.exceptions import ManifestException
 from armada.exceptions.override_exceptions import InvalidOverrideValueException
 from armada.exceptions.validate_exceptions import InvalidManifestException
 from armada.exceptions import tiller_exceptions
-from armada.exceptions.armada_exceptions import ProtectedReleaseException
+from armada.exceptions.armada_exceptions import ChartDeployException
+
+makeMockThreadSafe()

 TEST_YAML = """
 ---
@ -43,7 +46,7 @@ metadata:
  name: example-group
 data:
  description: this is a test
-  sequenced: False
+  sequenced: True
  chart_group:
    - example-chart-1
    - example-chart-2
@ -136,6 +139,8 @@ data:
    dependencies: []
    wait:
      timeout: 10
+      native:
+        enabled: false
    test:
      enabled: true
 """
@ -171,7 +176,10 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
                            'source_dir': CHART_SOURCES[0],
                            'values': {},
                            'wait': {
-                                'timeout': 10
+                                'timeout': 10,
+                                'native': {
+                                    'enabled': False
+                                }
                            },
                            'test': {
                                'enabled': True
@ -260,7 +268,7 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
                    'name':
                    'example-group',
                    'sequenced':
-                    False
+                    True
                }]
            }
        }  # yapf: disable
@ -317,6 +325,11 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
                    mock_source.source_cleanup.assert_called_with(
                        CHART_SOURCES[counter][0])

+    # TODO(seaneagan): Separate ChartDeploy tests into separate module.
+    # TODO(seaneagan): Once able to make mock library sufficiently thread safe,
+    # run sync tests for unsequenced as well by moving them to separate test
+    # class with two separate subclasses which set chart group `sequenced`
+    # field, one to true, one to false.
    def _test_sync(self,
                   known_releases,
                   test_success=True,
@ -326,15 +339,15 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):

        @mock.patch.object(armada.Armada, 'post_flight_ops')
        @mock.patch.object(armada.Armada, 'pre_flight_ops')
-        @mock.patch('armada.handlers.armada.ChartBuilder')
+        @mock.patch('armada.handlers.chart_deploy.ChartBuilder')
        @mock.patch('armada.handlers.armada.Tiller')
-        @mock.patch.object(armada, 'test_release_for_success')
+        @mock.patch.object(chart_deploy, 'test_release_for_success')
        def _do_test(mock_test_release_for_success, mock_tiller,
                     mock_chartbuilder, mock_pre_flight, mock_post_flight):
            # Instantiate Armada object.
            yaml_documents = list(yaml.safe_load_all(TEST_YAML))
            armada_obj = armada.Armada(yaml_documents)
-            armada_obj.get_diff = mock.Mock()
+            armada_obj.chart_deploy.get_diff = mock.Mock()

            chart_group = armada_obj.manifest['armada']['chart_groups'][0]
            charts = chart_group['chart_group']
@ -360,7 +373,7 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
            mock_chartbuilder.get_helm_chart.return_value = None

            # Simulate chart diff, upgrade should only happen if non-empty.
-            armada_obj.get_diff.return_value = diff
+            armada_obj.chart_deploy.get_diff.return_value = diff

            armada_obj.sync()

@ -376,7 +389,8 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
                release_name = release_prefixer(prefix, release)
                # Simplified check because the actual code uses logical-or's
                # multiple conditions, so this is enough.
-                this_chart_should_wait = chart['wait']['timeout'] > 0
+                native_wait_enabled = (chart['wait'].get('native', {}).get(
+                    'enabled', True))

                expected_apply = True
                if release_name not in [x[0] for x in known_releases]:
@ -388,8 +402,8 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
                                ['release_prefix'], chart['release']),
                            chart['namespace'],
                            values=yaml.safe_dump(chart['values']),
-                            wait=this_chart_should_wait,
-                            timeout=chart['wait']['timeout']))
+                            wait=native_wait_enabled,
+                            timeout=mock.ANY))
                else:
                    target_release = None
                    for known_release in known_releases:
@ -412,15 +426,16 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
                                            chart['release']),
                                        chart['namespace'],
                                        values=yaml.safe_dump(chart['values']),
-                                        wait=this_chart_should_wait,
-                                        timeout=chart['wait']['timeout']))
+                                        wait=native_wait_enabled,
+                                        timeout=mock.ANY))
                            else:
                                p_continue = protected.get(
                                    'continue_processing', False)
                                if p_continue:
                                    continue
                                else:
-                                    break
+                                    if chart_group['sequenced']:
+                                        break

                        if status == const.STATUS_DEPLOYED:
                            if not diff:
@ -446,8 +461,8 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
                                        force=force,
                                        recreate_pods=recreate_pods,
                                        values=yaml.safe_dump(chart['values']),
-                                        wait=this_chart_should_wait,
-                                        timeout=chart['wait']['timeout']))
+                                        wait=native_wait_enabled,
+                                        timeout=mock.ANY))

                test_chart_override = chart.get('test')
                # Use old default value when not using newer `test` key
@ -469,6 +484,7 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
                            timeout=mock.ANY,
                            cleanup=test_cleanup))

+            any_order = not chart_group['sequenced']
            # Verify that at least 1 release is either installed or updated.
            self.assertTrue(
                len(expected_install_release_calls) >= 1 or
@ -479,28 +495,28 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
                len(expected_install_release_calls),
                m_tiller.install_release.call_count)
            m_tiller.install_release.assert_has_calls(
-                expected_install_release_calls)
+                expected_install_release_calls, any_order=any_order)
            # Verify that the expected number of deployed releases are
            # updated with expected arguments.
            self.assertEqual(
                len(expected_update_release_calls),
                m_tiller.update_release.call_count)
            m_tiller.update_release.assert_has_calls(
-                expected_update_release_calls)
+                expected_update_release_calls, any_order=any_order)
            # Verify that the expected number of deployed releases are
            # uninstalled with expected arguments.
            self.assertEqual(
                len(expected_uninstall_release_calls),
                m_tiller.uninstall_release.call_count)
            m_tiller.uninstall_release.assert_has_calls(
-                expected_uninstall_release_calls)
+                expected_uninstall_release_calls, any_order=any_order)
            # Verify that the expected number of deployed releases are
            # tested with expected arguments.
            self.assertEqual(
                len(expected_test_release_for_success_calls),
                mock_test_release_for_success.call_count)
            mock_test_release_for_success.assert_has_calls(
-                expected_test_release_for_success_calls)
+                expected_test_release_for_success_calls, any_order=any_order)

        _do_test()

@ -566,66 +582,21 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
        def _test_method():
            self._test_sync(known_releases)

-        self.assertRaises(ProtectedReleaseException, _test_method)
+        self.assertRaises(ChartDeployException, _test_method)

    def test_armada_sync_test_failure(self):

        def _test_method():
            self._test_sync([], test_success=False)

-        self.assertRaises(tiller_exceptions.TestFailedException, _test_method)
+        self.assertRaises(ChartDeployException, _test_method)

    def test_armada_sync_test_failure_to_run(self):

        def _test_method():
            self._test_sync([], test_failure_to_run=True)

-        self.assertRaises(tiller_exceptions.ReleaseException, _test_method)
-
-    @mock.patch.object(armada.Armada, 'post_flight_ops')
-    @mock.patch.object(armada.Armada, 'pre_flight_ops')
-    @mock.patch('armada.handlers.armada.ChartBuilder')
-    @mock.patch('armada.handlers.armada.Tiller')
-    def test_install(self, mock_tiller, mock_chartbuilder, mock_pre_flight,
-                     mock_post_flight):
-        '''Test install functionality from the sync() method'''
-
-        # Instantiate Armada object.
-        yaml_documents = list(yaml.safe_load_all(TEST_YAML))
-        armada_obj = armada.Armada(yaml_documents)
-
-        charts = armada_obj.manifest['armada']['chart_groups'][0][
-            'chart_group']
-        chart_1 = charts[0]['chart']
-        chart_2 = charts[1]['chart']
-
-        # Mock irrelevant methods called by `armada.sync()`.
-        mock_tiller.list_charts.return_value = []
-        mock_chartbuilder.get_source_path.return_value = None
-        mock_chartbuilder.get_helm_chart.return_value = None
-
-        armada_obj.sync()
-
-        # Check params that should be passed to `tiller.install_release()`.
-        method_calls = [
-            mock.call(
-                mock_chartbuilder().get_helm_chart(),
-                "{}-{}".format(armada_obj.manifest['armada']['release_prefix'],
-                               chart_1['release']),
-                chart_1['namespace'],
-                values=yaml.safe_dump(chart_1['values']),
-                timeout=10,
-                wait=True),
-            mock.call(
-                mock_chartbuilder().get_helm_chart(),
-                "{}-{}".format(armada_obj.manifest['armada']['release_prefix'],
-                               chart_2['release']),
-                chart_2['namespace'],
-                values=yaml.safe_dump(chart_2['values']),
-                timeout=10,
-                wait=True)
-        ]
-        mock_tiller.return_value.install_release.assert_has_calls(method_calls)
+        self.assertRaises(ChartDeployException, _test_method)


 class ArmadaNegativeHandlerTestCase(base.ArmadaTestCase):
--- a/doc/source/operations/guide-build-armada-yaml.rst
+++ b/doc/source/operations/guide-build-armada-yaml.rst
@ -99,12 +99,12 @@ Chart
 +-----------------+----------+---------------------------------------------------------------------------------------+
 | namespace       | string   | namespace of your chart                                                               |
 +-----------------+----------+---------------------------------------------------------------------------------------+
-| wait            | object   | contains wait information such as (timeout, labels)                                   |
+| wait            | object   | See Wait_.                                                                            |
 +-----------------+----------+---------------------------------------------------------------------------------------+
 | protected       | object   | do not delete FAILED releases when encountered from previous run (provide the         |
 |                 |          | 'continue_processing' bool to continue or halt execution (default: halt))             |
 +-----------------+----------+---------------------------------------------------------------------------------------+
-| test            | object   | Run helm tests on the chart after install/upgrade (default enabled)                   |
+| test            | object   | See Test_.                                                                            |
 +-----------------+----------+---------------------------------------------------------------------------------------+
 | install         | object   | install the chart into your Kubernetes cluster                                        |
 +-----------------+----------+---------------------------------------------------------------------------------------+
@ -119,15 +119,41 @@ Chart
 | timeout         | int      | time (in seconds) allotted for chart to deploy when 'wait' flag is set (DEPRECATED)   |
 +-----------------+----------+---------------------------------------------------------------------------------------+

+Wait
+^^^^
+
+-------------+----------+--------------------------------------------------------------------+
+| keyword     | type     | action                                                             |
+=============+==========+====================================================================+
+| native      | object   | See `Wait Native`_.                                                |
+-------------+----------+--------------------------------------------------------------------+
+| timeout     | int      | time (in seconds) to wait for chart to deploy                      |
+-------------+----------+--------------------------------------------------------------------+
+| labels      | object   | k:v mapping of labels to select Kubernetes resources               |
+-------------+----------+--------------------------------------------------------------------+
+
+Wait Native
+^^^^^^^^^^^
+
+Config for the native ``helm (install|upgrade) --wait`` flag.
+
+-------------+----------+--------------------------------------------------------------------+
+| keyword     | type     | action                                                             |
+=============+==========+====================================================================+
+| enabled     | boolean  | defaults to true                                                   |
+-------------+----------+--------------------------------------------------------------------+
+
 Test
 ^^^^

+Run helm tests on the chart after install/upgrade.
+
 +-------------+----------+--------------------------------------------------------------------+
 | keyword     | type     | action                                                             |
 +=============+==========+====================================================================+
 | enabled     | bool     | whether to enable/disable helm tests for this chart (default True) |
 +-------------+----------+--------------------------------------------------------------------+
-| options     | object   | options to pass through to helm                                    |
+| options     | object   | See `Test Options`_.                                               |
 +-------------+----------+--------------------------------------------------------------------+

 .. note::
@ -142,8 +168,13 @@ Test
    deprecated and will be removed.  The ``cleanup`` option below is set to true
    in this case for backward compatibility.

-Test - Options
-^^^^^^^^^^^^^^
+.. _test_options:
+
+
+Test Options
+^^^^^^^^^^^^
+
+Test options to pass through directly to helm.

 +-------------+----------+---------------------------------------------------------------+
 | keyword     | type     | action                                                        |