Add template param for ambari pkg install timeout

Often time ambari fails during cluster installation/service
starting stage. This is quiet prominent when  there is
a large number of nodes in the cluster. Review of the
logs from the cluster indicates that ambari installation
scripts has a timeout parameter set to 1800 sec, this
requires adjustment depending on the environment and
speed of package installation.

This fix provides one parameter named
"agent.package.install.task.timeout"  inside the Ambari tab
of the HDP cluster template UI . User may change the
values and accordingly the ambari server will be setup
for package installation timeout.'

Change-Id: I826dbebb446d49e01e3cd6d7e525b43aa4523434
Story: #2003176
Task: #23320
This commit is contained in:
bhujay 2018-08-20 16:13:49 +05:30
parent 8250e5cc14
commit e7a4b58c5a
5 changed files with 57 additions and 2 deletions

View File

@ -92,3 +92,19 @@ complete a few steps below.
like ``oozie``, ``hdfs`` and ``spark`` so that you will not have to
perform additional auth operations to execute your jobs on top of the
cluster.
Adjusting Ambari Agent Package Installation timeout Parameter
-------------------------------------------------------------
For a cluster with large number of nodes or slow connectivity to HDP repo
server, a Sahara HDP Cluster creation may fail due to ambari agent
reaching the timeout threshold while installing the packages in the nodes.
Such failures will occur during the "cluster start" stage which can be
monitored from Cluster Events tab of Sahara Dashboard. The timeout error will
be visible from the Ambari Dashboard as well.
* To avoid the package installation timeout by ambari agent you need to change
the default value of ``Ambari Agent Package Install timeout`` parameter which
can be found in the ``General Parameters`` section of the cluster template
configuration.

View File

@ -0,0 +1,4 @@
---
features:
- Adding the ability to change default timeout parameter for
ambari agent package installation

View File

@ -129,6 +129,9 @@ autoconfigs_strategy = provisioning.Config(
'NEVER_APPLY', 'ALWAYS_APPLY', 'ONLY_STACK_DEFAULTS_APPLY',
]],
)
ambari_pkg_install_timeout = provisioning.Config(
"Ambari Agent Package Install timeout", "general", "cluster",
priority=1, default_value="1800")
def _get_service_name(service):
@ -193,7 +196,7 @@ def load_configs(version):
vanilla_cfg = jsonutils.loads(files.get_file_text(cfg_path))
CONFIGS[version] = vanilla_cfg
sahara_cfg = [hdp_repo_cfg, hdp_utils_repo_cfg, use_base_repos_cfg,
autoconfigs_strategy]
autoconfigs_strategy, ambari_pkg_install_timeout]
for service, confs in vanilla_cfg.items():
for k, v in confs.items():
sahara_cfg.append(provisioning.Config(
@ -226,6 +229,10 @@ def get_auto_configuration_strategy(cluster):
return _get_config_value(cluster, autoconfigs_strategy)
def get_ambari_pkg_install_timeout(cluster):
return _get_config_value(cluster, ambari_pkg_install_timeout)
def _serialize_ambari_configs(configs):
return list(map(lambda x: {x: configs[x]}, configs))

View File

@ -71,9 +71,14 @@ os_type_map = {
def setup_ambari(cluster):
LOG.debug("Set up Ambari management console")
ambari = plugin_utils.get_instance(cluster, p_common.AMBARI_SERVER)
ambari_settings = ("agent.package.install.task.timeout=%s"
% configs.get_ambari_pkg_install_timeout(cluster))
with ambari.remote() as r:
sudo = functools.partial(r.execute_command, run_as_root=True)
sudo("rngd -r /dev/urandom -W 4096")
r.replace_remote_line("/etc/ambari-server/conf/ambari.properties",
"agent.package.install.task.timeout=",
ambari_settings)
sudo("ambari-server setup -s -j"
" `cut -f2 -d \"=\" /etc/profile.d/99-java.sh`", timeout=1800)
redirect_file = "/tmp/%s" % uuidutils.generate_uuid()

View File

@ -429,6 +429,13 @@ def _replace_remote_string(remote_file, old_str, new_str):
_execute_command(cmd)
def _replace_remote_line(remote_file, old_line_with_start_string, new_line):
search_string = old_line_with_start_string.replace("\'", "\''")
cmd = ("sudo sed -i 's/^%s.*/%s/' %s" % (search_string,
new_line, remote_file))
_execute_command(cmd)
def _execute_on_vm_interactive(cmd, matcher):
global _ssh
@ -901,12 +908,28 @@ class InstanceInteropHelper(remote.Remote):
timeout=None):
description = _('In file "%(file)s" replacing string '
'"%(old_string)s" with "%(new_string)s"') % {
"file": remote_file, "old_string": old_str, "new_string": new_str}
"file": remote_file,
"old_string": old_str, "new_string": new_str}
self._log_command(description)
self._run_s(_replace_remote_string, timeout, description,
remote_file, old_str, new_str)
def replace_remote_line(self, remote_file,
old_line_with_start_string,
new_line, timeout=None):
description = _('In file "%(file)s" replacing line'
' begining with string'
'"%(old_line_with_start_string)s"'
' with "%(new_line)s"') % {
"file": remote_file,
"old_line_with_start_string": old_line_with_start_string,
"new_line": new_line}
self._log_command(description)
self._run_s(_replace_remote_line, timeout, description,
remote_file, old_line_with_start_string, new_line)
def execute_on_vm_interactive(self, cmd, matcher, timeout=None):
"""Runs given command and responds to prompts.